scmkl.run
1import anndata as ad 2import numpy as np 3import time 4import tracemalloc 5 6from scmkl.train_model import train_model 7from scmkl.test import predict, find_selected_groups 8 9 10def run(adata: ad.AnnData, alpha_list: np.ndarray, 11 metrics: list | None = None, 12 return_probs: bool=False) -> dict: 13 """ 14 Wrapper function for training and test with multiple alpha values. 15 Returns metrics, predictions, group weights, and resource usage. 16 17 Parameters 18 ---------- 19 adata : ad.AnnData 20 A processed `ad.AnnData` with `'Z_train'`, `'Z_test'`, and 21 `'group_dict'` keys in `adata.uns`. 22 23 alpha_list : np.ndarray 24 Sparsity values to create models with. Alpha refers to the 25 penalty parameter in Group Lasso. Larger alphas force group 26 weights to shrink towards zero while smaller alphas apply a 27 lesser penalty to kernal weights. Values too large will results 28 in models that weight all groups as zero. 29 30 metrics : list[str] 31 Metrics that should be calculated on predictions. Options are 32 `['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']`. 33 When set to `None`, all metrics are calculated. 34 35 Returns 36 ------- 37 results : dict 38 Results with keys and values: 39 40 `'Metrics'` (dict): 41 A nested dictionary as `[alpha][metric] = value`. 42 43 `'Group_names'` (np.ndarray): 44 Array of group names used in model(s). 45 46 `'Selected_groups'` (dict): 47 A nested dictionary as `[alpha] = np.array([nonzero_groups])`. 48 Nonzero groups are groups that had a kernel weight above zero. 49 50 `'Norms'` (dict): 51 A nested dictionary as `[alpha] = np.array([kernel_weights])` 52 Order of `kernel_weights` is respective to `'Group_names'` 53 values. 54 55 `'Observed'` (np.nparray): 56 An array of ground truth cell labels from the test set. 57 58 `'Predictions'` (dict): 59 A nested dictionary as `[alpha] = predicted_class` respective 60 to `'Observations'` for `alpha`. 61 62 `'Test_indices'` (np.array: 63 Indices of samples respective to adata used in the training 64 set. 65 66 `'Model'` (dict): 67 A nested dictionary where `[alpha] = celer.GroupLasso` object 68 for `alpha`. 69 70 `'RAM_usage'` (dict): 71 A nested dictionary with memory usage in GB after 72 training models for each `alpha`. 73 74 Examples 75 -------- 76 >>> results = scmkl.run(adata = adata, 77 ... alpha_list = np.array([0.05, 0.1, 0.5])) 78 >>> results 79 dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 80 ... 'Observed', 'Test_indices', 'Group_names', 'Models', 81 ... 'Train_time', 'RAM_usage']) 82 >>> 83 >>> alpha values 84 >>> results['Metrics'].keys() 85 dict_keys([0.05, 0.1, 0.5]) 86 >>> 87 >>> results['Metrics'][0.05] 88 {'AUROC': 0.9859, 89 'Accuracy': 0.945, 90 'F1-Score': 0.9452736318407959, 91 'Precision': 0.9405940594059405, 92 'Recall': 0.95} 93 """ 94 if metrics is None: 95 metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall'] 96 97 # Initializing variables to capture metrics 98 group_names = list(adata.uns['group_dict'].keys()) 99 preds = {} 100 group_norms = {} 101 mets_dict = {} 102 selected_groups = {} 103 train_time = {} 104 models = {} 105 probs = {} 106 107 D = adata.uns['D'] 108 109 # Generating models for each alpha and outputs 110 tracemalloc.start() 111 for alpha in alpha_list: 112 113 print(f' Evaluating model. Alpha: {alpha}', flush = True) 114 115 train_start = time.time() 116 117 adata = train_model(adata, group_size= 2*D, alpha = alpha) 118 if return_probs: 119 alpha_res = predict(adata, 120 metrics = metrics, 121 return_probs = return_probs) 122 preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res 123 124 else: 125 alpha_res = predict(adata, 126 metrics = metrics, 127 return_probs = return_probs) 128 preds[alpha], mets_dict[alpha] = alpha_res 129 130 selected_groups[alpha] = find_selected_groups(adata) 131 132 kernel_weights = adata.uns['model'].coef_ 133 group_norms[alpha] = [ 134 np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1]) 135 for i in np.arange(len(group_names)) 136 ] 137 138 models[alpha] = adata.uns['model'] 139 140 train_end = time.time() 141 train_time[alpha] = train_end - train_start 142 143 # Combining results into one object 144 results = {} 145 results['Metrics'] = mets_dict 146 results['Selected_groups'] = selected_groups 147 results['Norms'] = group_norms 148 results['Predictions'] = preds 149 results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']] 150 results['Test_indices'] = adata.uns['test_indices'] 151 results['Group_names']= group_names 152 results['Models'] = models 153 results['Train_time'] = train_time 154 results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1]/1e9} GB' 155 results['Probabilities'] = probs 156 157 return results
11def run(adata: ad.AnnData, alpha_list: np.ndarray, 12 metrics: list | None = None, 13 return_probs: bool=False) -> dict: 14 """ 15 Wrapper function for training and test with multiple alpha values. 16 Returns metrics, predictions, group weights, and resource usage. 17 18 Parameters 19 ---------- 20 adata : ad.AnnData 21 A processed `ad.AnnData` with `'Z_train'`, `'Z_test'`, and 22 `'group_dict'` keys in `adata.uns`. 23 24 alpha_list : np.ndarray 25 Sparsity values to create models with. Alpha refers to the 26 penalty parameter in Group Lasso. Larger alphas force group 27 weights to shrink towards zero while smaller alphas apply a 28 lesser penalty to kernal weights. Values too large will results 29 in models that weight all groups as zero. 30 31 metrics : list[str] 32 Metrics that should be calculated on predictions. Options are 33 `['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']`. 34 When set to `None`, all metrics are calculated. 35 36 Returns 37 ------- 38 results : dict 39 Results with keys and values: 40 41 `'Metrics'` (dict): 42 A nested dictionary as `[alpha][metric] = value`. 43 44 `'Group_names'` (np.ndarray): 45 Array of group names used in model(s). 46 47 `'Selected_groups'` (dict): 48 A nested dictionary as `[alpha] = np.array([nonzero_groups])`. 49 Nonzero groups are groups that had a kernel weight above zero. 50 51 `'Norms'` (dict): 52 A nested dictionary as `[alpha] = np.array([kernel_weights])` 53 Order of `kernel_weights` is respective to `'Group_names'` 54 values. 55 56 `'Observed'` (np.nparray): 57 An array of ground truth cell labels from the test set. 58 59 `'Predictions'` (dict): 60 A nested dictionary as `[alpha] = predicted_class` respective 61 to `'Observations'` for `alpha`. 62 63 `'Test_indices'` (np.array: 64 Indices of samples respective to adata used in the training 65 set. 66 67 `'Model'` (dict): 68 A nested dictionary where `[alpha] = celer.GroupLasso` object 69 for `alpha`. 70 71 `'RAM_usage'` (dict): 72 A nested dictionary with memory usage in GB after 73 training models for each `alpha`. 74 75 Examples 76 -------- 77 >>> results = scmkl.run(adata = adata, 78 ... alpha_list = np.array([0.05, 0.1, 0.5])) 79 >>> results 80 dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 81 ... 'Observed', 'Test_indices', 'Group_names', 'Models', 82 ... 'Train_time', 'RAM_usage']) 83 >>> 84 >>> alpha values 85 >>> results['Metrics'].keys() 86 dict_keys([0.05, 0.1, 0.5]) 87 >>> 88 >>> results['Metrics'][0.05] 89 {'AUROC': 0.9859, 90 'Accuracy': 0.945, 91 'F1-Score': 0.9452736318407959, 92 'Precision': 0.9405940594059405, 93 'Recall': 0.95} 94 """ 95 if metrics is None: 96 metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall'] 97 98 # Initializing variables to capture metrics 99 group_names = list(adata.uns['group_dict'].keys()) 100 preds = {} 101 group_norms = {} 102 mets_dict = {} 103 selected_groups = {} 104 train_time = {} 105 models = {} 106 probs = {} 107 108 D = adata.uns['D'] 109 110 # Generating models for each alpha and outputs 111 tracemalloc.start() 112 for alpha in alpha_list: 113 114 print(f' Evaluating model. Alpha: {alpha}', flush = True) 115 116 train_start = time.time() 117 118 adata = train_model(adata, group_size= 2*D, alpha = alpha) 119 if return_probs: 120 alpha_res = predict(adata, 121 metrics = metrics, 122 return_probs = return_probs) 123 preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res 124 125 else: 126 alpha_res = predict(adata, 127 metrics = metrics, 128 return_probs = return_probs) 129 preds[alpha], mets_dict[alpha] = alpha_res 130 131 selected_groups[alpha] = find_selected_groups(adata) 132 133 kernel_weights = adata.uns['model'].coef_ 134 group_norms[alpha] = [ 135 np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1]) 136 for i in np.arange(len(group_names)) 137 ] 138 139 models[alpha] = adata.uns['model'] 140 141 train_end = time.time() 142 train_time[alpha] = train_end - train_start 143 144 # Combining results into one object 145 results = {} 146 results['Metrics'] = mets_dict 147 results['Selected_groups'] = selected_groups 148 results['Norms'] = group_norms 149 results['Predictions'] = preds 150 results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']] 151 results['Test_indices'] = adata.uns['test_indices'] 152 results['Group_names']= group_names 153 results['Models'] = models 154 results['Train_time'] = train_time 155 results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1]/1e9} GB' 156 results['Probabilities'] = probs 157 158 return results
Wrapper function for training and test with multiple alpha values. Returns metrics, predictions, group weights, and resource usage.
Parameters
- adata (ad.AnnData):
A processed
ad.AnnDatawith'Z_train','Z_test', and'group_dict'keys inadata.uns. - alpha_list (np.ndarray): Sparsity values to create models with. Alpha refers to the penalty parameter in Group Lasso. Larger alphas force group weights to shrink towards zero while smaller alphas apply a lesser penalty to kernal weights. Values too large will results in models that weight all groups as zero.
- metrics (list[str]):
Metrics that should be calculated on predictions. Options are
['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']. When set toNone, all metrics are calculated.
Returns
results (dict): Results with keys and values:
'Metrics'(dict): A nested dictionary as[alpha][metric] = value.'Group_names'(np.ndarray): Array of group names used in model(s).'Selected_groups'(dict): A nested dictionary as[alpha] = np.array([nonzero_groups]). Nonzero groups are groups that had a kernel weight above zero.'Norms'(dict): A nested dictionary as[alpha] = np.array([kernel_weights])Order ofkernel_weightsis respective to'Group_names'values.'Observed'(np.nparray): An array of ground truth cell labels from the test set.'Predictions'(dict): A nested dictionary as[alpha] = predicted_classrespective to'Observations'foralpha.'Test_indices'(np.array: Indices of samples respective to adata used in the training set.'Model'(dict): A nested dictionary where[alpha] = celer.GroupLassoobject foralpha.'RAM_usage'(dict): A nested dictionary with memory usage in GB after training models for eachalpha.
Examples
>>> results = scmkl.run(adata = adata,
... alpha_list = np.array([0.05, 0.1, 0.5]))
>>> results
dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions',
... 'Observed', 'Test_indices', 'Group_names', 'Models',
... 'Train_time', 'RAM_usage'])
>>>
>>> alpha values
>>> results['Metrics'].keys()
dict_keys([0.05, 0.1, 0.5])
>>>
>>> results['Metrics'][0.05]
{'AUROC': 0.9859,
'Accuracy': 0.945,
'F1-Score': 0.9452736318407959,
'Precision': 0.9405940594059405,
'Recall': 0.95}