scmkl.run
1import anndata as ad 2import numpy as np 3import time 4import tracemalloc 5 6from scmkl.train_model import train_model 7from scmkl.test import predict, find_selected_groups 8 9 10def run(adata: ad.AnnData, alpha_list: np.ndarray, 11 metrics: list | None = None, 12 return_probs: bool=False) -> dict: 13 """ 14 Wrapper function for training and test with multiple alpha values. 15 Returns metrics, predictions, group weights, and resource usage. 16 17 Parameters 18 ---------- 19 adata : ad.AnnData 20 A processed `ad.AnnData` with `'Z_train'`, `'Z_test'`, and 21 `'group_dict'` keys in `adata.uns`. 22 23 alpha_list : np.ndarray 24 Sparsity values to create models with. Alpha refers to the 25 penalty parameter in Group Lasso. Larger alphas force group 26 weights to shrink towards zero while smaller alphas apply a 27 lesser penalty to kernal weights. Values too large will results 28 in models that weight all groups as zero. 29 30 metrics : list[str] 31 Metrics that should be calculated on predictions. Options are 32 `['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']`. 33 When set to `None`, all metrics are calculated. 34 35 Returns 36 ------- 37 results : dict 38 Results with keys and values: 39 40 `'Metrics'` (dict): 41 A nested dictionary as `[alpha][metric] = value`. 42 43 `'Group_names'` (np.ndarray): 44 Array of group names used in model(s). 45 46 `'Selected_groups'` (dict): 47 A nested dictionary as `[alpha] = np.array([nonzero_groups])`. 48 Nonzero groups are groups that had a kernel weight above zero. 49 50 `'Norms'` (dict): 51 A nested dictionary as `[alpha] = np.array([kernel_weights])` 52 Order of `kernel_weights` is respective to `'Group_names'` 53 values. 54 55 `'Observed'` (np.nparray): 56 An array of ground truth cell labels from the test set. 57 58 `'Predictions'` (dict): 59 A nested dictionary as `[alpha] = predicted_class` respective 60 to `'Observations'` for `alpha`. 61 62 `'Test_indices'` (np.array: 63 Indices of samples respective to adata used in the training 64 set. 65 66 `'Model'` (dict): 67 A nested dictionary where `[alpha] = celer.GroupLasso` object 68 for `alpha`. 69 70 `'RAM_usage'` (dict): 71 A nested dictionary with memory usage in GB after 72 training models for each `alpha`. 73 74 Examples 75 -------- 76 >>> results = scmkl.run(adata = adata, 77 ... alpha_list = np.array([0.05, 0.1, 0.5])) 78 >>> results 79 dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 80 ... 'Observed', 'Test_indices', 'Group_names', 'Models', 81 ... 'Train_time', 'RAM_usage']) 82 >>> 83 >>> alpha values 84 >>> results['Metrics'].keys() 85 dict_keys([0.05, 0.1, 0.5]) 86 >>> 87 >>> results['Metrics'][0.05] 88 {'AUROC': 0.9859, 89 'Accuracy': 0.945, 90 'F1-Score': 0.9452736318407959, 91 'Precision': 0.9405940594059405, 92 'Recall': 0.95} 93 """ 94 if metrics is None: 95 metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall'] 96 97 # Initializing variables to capture metrics 98 group_names = list(adata.uns['group_dict'].keys()) 99 preds = {} 100 group_norms = {} 101 mets_dict = {} 102 selected_groups = {} 103 train_time = {} 104 models = {} 105 probs = {} 106 107 D = adata.uns['D'] 108 109 # Generating models for each alpha and outputs 110 tracemalloc.start() 111 for alpha in alpha_list: 112 113 print(f' Evaluating model. Alpha: {alpha}', flush = True) 114 115 train_start = time.time() 116 117 adata = train_model(adata, group_size= 2*D, alpha = alpha) 118 119 if return_probs: 120 alpha_res = predict(adata, 121 metrics = metrics, 122 return_probs = return_probs) 123 preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res 124 125 else: 126 alpha_res = predict(adata, 127 metrics = metrics, 128 return_probs = return_probs) 129 preds[alpha], mets_dict[alpha] = alpha_res 130 131 selected_groups[alpha] = find_selected_groups(adata) 132 133 kernel_weights = adata.uns['model'].coef_ 134 group_norms[alpha] = [ 135 np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1]) 136 for i in np.arange(len(group_names)) 137 ] 138 139 models[alpha] = adata.uns['model'] 140 141 train_end = time.time() 142 train_time[alpha] = train_end - train_start 143 144 # Combining results into one object 145 results = {} 146 results['Metrics'] = mets_dict 147 results['Selected_groups'] = selected_groups 148 results['Norms'] = group_norms 149 results['Predictions'] = preds 150 results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']] 151 results['Test_indices'] = adata.uns['test_indices'] 152 results['Group_names']= group_names 153 results['Models'] = models 154 results['Train_time'] = train_time 155 results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1]/1e9} GB' 156 results['Probabilities'] = probs 157 158 return results
11def run(adata: ad.AnnData, alpha_list: np.ndarray, 12 metrics: list | None = None, 13 return_probs: bool=False) -> dict: 14 """ 15 Wrapper function for training and test with multiple alpha values. 16 Returns metrics, predictions, group weights, and resource usage. 17 18 Parameters 19 ---------- 20 adata : ad.AnnData 21 A processed `ad.AnnData` with `'Z_train'`, `'Z_test'`, and 22 `'group_dict'` keys in `adata.uns`. 23 24 alpha_list : np.ndarray 25 Sparsity values to create models with. Alpha refers to the 26 penalty parameter in Group Lasso. Larger alphas force group 27 weights to shrink towards zero while smaller alphas apply a 28 lesser penalty to kernal weights. Values too large will results 29 in models that weight all groups as zero. 30 31 metrics : list[str] 32 Metrics that should be calculated on predictions. Options are 33 `['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']`. 34 When set to `None`, all metrics are calculated. 35 36 Returns 37 ------- 38 results : dict 39 Results with keys and values: 40 41 `'Metrics'` (dict): 42 A nested dictionary as `[alpha][metric] = value`. 43 44 `'Group_names'` (np.ndarray): 45 Array of group names used in model(s). 46 47 `'Selected_groups'` (dict): 48 A nested dictionary as `[alpha] = np.array([nonzero_groups])`. 49 Nonzero groups are groups that had a kernel weight above zero. 50 51 `'Norms'` (dict): 52 A nested dictionary as `[alpha] = np.array([kernel_weights])` 53 Order of `kernel_weights` is respective to `'Group_names'` 54 values. 55 56 `'Observed'` (np.nparray): 57 An array of ground truth cell labels from the test set. 58 59 `'Predictions'` (dict): 60 A nested dictionary as `[alpha] = predicted_class` respective 61 to `'Observations'` for `alpha`. 62 63 `'Test_indices'` (np.array: 64 Indices of samples respective to adata used in the training 65 set. 66 67 `'Model'` (dict): 68 A nested dictionary where `[alpha] = celer.GroupLasso` object 69 for `alpha`. 70 71 `'RAM_usage'` (dict): 72 A nested dictionary with memory usage in GB after 73 training models for each `alpha`. 74 75 Examples 76 -------- 77 >>> results = scmkl.run(adata = adata, 78 ... alpha_list = np.array([0.05, 0.1, 0.5])) 79 >>> results 80 dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 81 ... 'Observed', 'Test_indices', 'Group_names', 'Models', 82 ... 'Train_time', 'RAM_usage']) 83 >>> 84 >>> alpha values 85 >>> results['Metrics'].keys() 86 dict_keys([0.05, 0.1, 0.5]) 87 >>> 88 >>> results['Metrics'][0.05] 89 {'AUROC': 0.9859, 90 'Accuracy': 0.945, 91 'F1-Score': 0.9452736318407959, 92 'Precision': 0.9405940594059405, 93 'Recall': 0.95} 94 """ 95 if metrics is None: 96 metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall'] 97 98 # Initializing variables to capture metrics 99 group_names = list(adata.uns['group_dict'].keys()) 100 preds = {} 101 group_norms = {} 102 mets_dict = {} 103 selected_groups = {} 104 train_time = {} 105 models = {} 106 probs = {} 107 108 D = adata.uns['D'] 109 110 # Generating models for each alpha and outputs 111 tracemalloc.start() 112 for alpha in alpha_list: 113 114 print(f' Evaluating model. Alpha: {alpha}', flush = True) 115 116 train_start = time.time() 117 118 adata = train_model(adata, group_size= 2*D, alpha = alpha) 119 120 if return_probs: 121 alpha_res = predict(adata, 122 metrics = metrics, 123 return_probs = return_probs) 124 preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res 125 126 else: 127 alpha_res = predict(adata, 128 metrics = metrics, 129 return_probs = return_probs) 130 preds[alpha], mets_dict[alpha] = alpha_res 131 132 selected_groups[alpha] = find_selected_groups(adata) 133 134 kernel_weights = adata.uns['model'].coef_ 135 group_norms[alpha] = [ 136 np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1]) 137 for i in np.arange(len(group_names)) 138 ] 139 140 models[alpha] = adata.uns['model'] 141 142 train_end = time.time() 143 train_time[alpha] = train_end - train_start 144 145 # Combining results into one object 146 results = {} 147 results['Metrics'] = mets_dict 148 results['Selected_groups'] = selected_groups 149 results['Norms'] = group_norms 150 results['Predictions'] = preds 151 results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']] 152 results['Test_indices'] = adata.uns['test_indices'] 153 results['Group_names']= group_names 154 results['Models'] = models 155 results['Train_time'] = train_time 156 results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1]/1e9} GB' 157 results['Probabilities'] = probs 158 159 return results
Wrapper function for training and test with multiple alpha values. Returns metrics, predictions, group weights, and resource usage.
Parameters
- adata (ad.AnnData):
A processed
ad.AnnData
with'Z_train'
,'Z_test'
, and'group_dict'
keys inadata.uns
. - alpha_list (np.ndarray): Sparsity values to create models with. Alpha refers to the penalty parameter in Group Lasso. Larger alphas force group weights to shrink towards zero while smaller alphas apply a lesser penalty to kernal weights. Values too large will results in models that weight all groups as zero.
- metrics (list[str]):
Metrics that should be calculated on predictions. Options are
['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']
. When set toNone
, all metrics are calculated.
Returns
results (dict): Results with keys and values:
'Metrics'
(dict): A nested dictionary as[alpha][metric] = value
.'Group_names'
(np.ndarray): Array of group names used in model(s).'Selected_groups'
(dict): A nested dictionary as[alpha] = np.array([nonzero_groups])
. Nonzero groups are groups that had a kernel weight above zero.'Norms'
(dict): A nested dictionary as[alpha] = np.array([kernel_weights])
Order ofkernel_weights
is respective to'Group_names'
values.'Observed'
(np.nparray): An array of ground truth cell labels from the test set.'Predictions'
(dict): A nested dictionary as[alpha] = predicted_class
respective to'Observations'
foralpha
.'Test_indices'
(np.array: Indices of samples respective to adata used in the training set.'Model'
(dict): A nested dictionary where[alpha] = celer.GroupLasso
object foralpha
.'RAM_usage'
(dict): A nested dictionary with memory usage in GB after training models for eachalpha
.
Examples
>>> results = scmkl.run(adata = adata,
... alpha_list = np.array([0.05, 0.1, 0.5]))
>>> results
dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions',
... 'Observed', 'Test_indices', 'Group_names', 'Models',
... 'Train_time', 'RAM_usage'])
>>>
>>> alpha values
>>> results['Metrics'].keys()
dict_keys([0.05, 0.1, 0.5])
>>>
>>> results['Metrics'][0.05]
{'AUROC': 0.9859,
'Accuracy': 0.945,
'F1-Score': 0.9452736318407959,
'Precision': 0.9405940594059405,
'Recall': 0.95}