scmkl.run

  1import anndata as ad
  2import numpy as np
  3import time
  4import tracemalloc
  5
  6from scmkl.train_model import train_model
  7from scmkl.test import predict, find_selected_groups
  8
  9
 10def run(adata : ad.AnnData, alpha_list : np.ndarray, 
 11        metrics : list | None = None, return_probs = False) -> dict:
 12    '''
 13    Wrapper function for training and test with multiple alpha values.
 14    Returns metrics, predictions, group weights, and resource usage.
 15
 16    Parameters
 17    ----------
 18    **adata** : *AnnData* 
 19        > A processed *AnnData* with `'Z_train'`, `'Z_test'`, and 
 20        `'group_dict'` keys in `adata.uns`.
 21    
 22    **alpha_list** : *np.ndarray* 
 23        > `alpha` values to create models using. Alpha refers to the 
 24        penalty parameter in Group Lasso. Larger alphas force group 
 25        weights to shrink towards 0 while smaller alphas apply a lesser 
 26        penalty to kernal weights.
 27
 28    **metrics** : *list[str]*
 29        > What metrics should be calculated on predictions. Options are 
 30        ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']. When 
 31        set to `None`, all metrics are calculated.
 32    
 33    Returns
 34    -------
 35    **results** : *dict*
 36    > With keys and values: 
 37
 38    > `'Metrics'` : a nested dictionary as `[alpha][metric]` = value.
 39    
 40    > `'Selected_groups'` : a dictionary as `[alpha]` = array of 
 41        groups with nonzero weights.
 42
 43    > `'Norms'` : a dictionary as `[alpha]` = array of kernel weights
 44        for each group, order respective to 'Group_names'.
 45
 46    > `'Predictions'` : a dictionary as `[alpha]` = predicted class
 47        respective to 'Observations' for that `alpha`.
 48
 49    > `'Observed'` : an array of ground truth cell labels from the
 50        test set.
 51
 52    > `'Test_indices'` : indices of samples respective to adata 
 53        used in the training set.
 54
 55    > `'Group_names'` : an array of group names respective to each
 56        array in 'Norms'.
 57
 58    > `'Model'` : a dictionary where `[alpha]` = Celer Group Lasso
 59        object for that `alpha`.
 60
 61    > `'RAM_usage'` : memory usage after training models for each 
 62        `alpha`.
 63
 64    Examples
 65    --------
 66    >>> results = scmkl.run(adata = adata, 
 67    ...                     alpha_list = np.array([0.05, 0.1, 0.5]))
 68    >>> results
 69    dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 
 70    ...        'Observed', 'Test_indices', 'Group_names', 'Models', 
 71    ...        'Train_time', 'RAM_usage'])
 72    >>>
 73    >>> # List of alpha values
 74    >>> results['Metrics'].keys()
 75    dict_keys([0.05, 0.1, 0.5])
 76    >>>
 77    >>> results['Metrics'][0.05]
 78    {'AUROC': 0.9859,
 79    'Accuracy': 0.945,
 80    'F1-Score': 0.9452736318407959,
 81    'Precision': 0.9405940594059405,
 82    'Recall': 0.95}
 83    '''
 84    if metrics == None:
 85        metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall']
 86
 87    # Initializing variables to capture metrics
 88    group_names = list(adata.uns['group_dict'].keys())
 89    preds = {}
 90    group_norms = {}
 91    mets_dict = {}
 92    selected_groups = {}
 93    train_time = {}
 94    models = {}
 95    probs = {}
 96
 97    D = adata.uns['D']
 98
 99    # Generating models for each alpha and outputs
100    for alpha in alpha_list:
101        
102        print(f'  Evaluating model. Alpha: {alpha}', flush = True)
103
104        train_start = time.time()
105
106        adata = train_model(adata, group_size= 2*D, alpha = alpha)
107
108        if return_probs:
109            alpha_res = predict(adata, 
110                                metrics = metrics,
111                                return_probs = return_probs)
112            preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res
113
114        else:
115            alpha_res = predict(adata, 
116                                metrics = metrics,
117                                return_probs = return_probs)
118            preds[alpha], mets_dict[alpha] = alpha_res
119
120        selected_groups[alpha] = find_selected_groups(adata)
121
122        kernel_weights = adata.uns['model'].coef_
123        group_norms[alpha] = [
124            np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1])
125            for i in np.arange(len(group_names))
126            ]
127        
128        models[alpha] = adata.uns['model']
129        
130        train_end = time.time()
131        train_time[alpha] = train_end - train_start
132
133    # Combining results into one object
134    results = {}
135    results['Metrics'] = mets_dict
136    results['Selected_groups'] = selected_groups
137    results['Norms'] = group_norms
138    results['Predictions'] = preds
139    results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']]
140    results['Test_indices'] = adata.uns['test_indices']
141    results['Group_names']= group_names
142    results['Models'] = models
143    results['Train_time'] = train_time
144    results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1] / 1e9} GB'
145    results['Probabilities'] = probs
146
147    return results
def run( adata: anndata._core.anndata.AnnData, alpha_list: numpy.ndarray, metrics: list | None = None, return_probs=False) -> dict:
 11def run(adata : ad.AnnData, alpha_list : np.ndarray, 
 12        metrics : list | None = None, return_probs = False) -> dict:
 13    '''
 14    Wrapper function for training and test with multiple alpha values.
 15    Returns metrics, predictions, group weights, and resource usage.
 16
 17    Parameters
 18    ----------
 19    **adata** : *AnnData* 
 20        > A processed *AnnData* with `'Z_train'`, `'Z_test'`, and 
 21        `'group_dict'` keys in `adata.uns`.
 22    
 23    **alpha_list** : *np.ndarray* 
 24        > `alpha` values to create models using. Alpha refers to the 
 25        penalty parameter in Group Lasso. Larger alphas force group 
 26        weights to shrink towards 0 while smaller alphas apply a lesser 
 27        penalty to kernal weights.
 28
 29    **metrics** : *list[str]*
 30        > What metrics should be calculated on predictions. Options are 
 31        ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']. When 
 32        set to `None`, all metrics are calculated.
 33    
 34    Returns
 35    -------
 36    **results** : *dict*
 37    > With keys and values: 
 38
 39    > `'Metrics'` : a nested dictionary as `[alpha][metric]` = value.
 40    
 41    > `'Selected_groups'` : a dictionary as `[alpha]` = array of 
 42        groups with nonzero weights.
 43
 44    > `'Norms'` : a dictionary as `[alpha]` = array of kernel weights
 45        for each group, order respective to 'Group_names'.
 46
 47    > `'Predictions'` : a dictionary as `[alpha]` = predicted class
 48        respective to 'Observations' for that `alpha`.
 49
 50    > `'Observed'` : an array of ground truth cell labels from the
 51        test set.
 52
 53    > `'Test_indices'` : indices of samples respective to adata 
 54        used in the training set.
 55
 56    > `'Group_names'` : an array of group names respective to each
 57        array in 'Norms'.
 58
 59    > `'Model'` : a dictionary where `[alpha]` = Celer Group Lasso
 60        object for that `alpha`.
 61
 62    > `'RAM_usage'` : memory usage after training models for each 
 63        `alpha`.
 64
 65    Examples
 66    --------
 67    >>> results = scmkl.run(adata = adata, 
 68    ...                     alpha_list = np.array([0.05, 0.1, 0.5]))
 69    >>> results
 70    dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 
 71    ...        'Observed', 'Test_indices', 'Group_names', 'Models', 
 72    ...        'Train_time', 'RAM_usage'])
 73    >>>
 74    >>> # List of alpha values
 75    >>> results['Metrics'].keys()
 76    dict_keys([0.05, 0.1, 0.5])
 77    >>>
 78    >>> results['Metrics'][0.05]
 79    {'AUROC': 0.9859,
 80    'Accuracy': 0.945,
 81    'F1-Score': 0.9452736318407959,
 82    'Precision': 0.9405940594059405,
 83    'Recall': 0.95}
 84    '''
 85    if metrics == None:
 86        metrics = ['AUROC', 'F1-Score','Accuracy', 'Precision', 'Recall']
 87
 88    # Initializing variables to capture metrics
 89    group_names = list(adata.uns['group_dict'].keys())
 90    preds = {}
 91    group_norms = {}
 92    mets_dict = {}
 93    selected_groups = {}
 94    train_time = {}
 95    models = {}
 96    probs = {}
 97
 98    D = adata.uns['D']
 99
100    # Generating models for each alpha and outputs
101    for alpha in alpha_list:
102        
103        print(f'  Evaluating model. Alpha: {alpha}', flush = True)
104
105        train_start = time.time()
106
107        adata = train_model(adata, group_size= 2*D, alpha = alpha)
108
109        if return_probs:
110            alpha_res = predict(adata, 
111                                metrics = metrics,
112                                return_probs = return_probs)
113            preds[alpha], mets_dict[alpha], probs[alpha] = alpha_res
114
115        else:
116            alpha_res = predict(adata, 
117                                metrics = metrics,
118                                return_probs = return_probs)
119            preds[alpha], mets_dict[alpha] = alpha_res
120
121        selected_groups[alpha] = find_selected_groups(adata)
122
123        kernel_weights = adata.uns['model'].coef_
124        group_norms[alpha] = [
125            np.linalg.norm(kernel_weights[i * 2 * D : (i + 1) * 2 * D - 1])
126            for i in np.arange(len(group_names))
127            ]
128        
129        models[alpha] = adata.uns['model']
130        
131        train_end = time.time()
132        train_time[alpha] = train_end - train_start
133
134    # Combining results into one object
135    results = {}
136    results['Metrics'] = mets_dict
137    results['Selected_groups'] = selected_groups
138    results['Norms'] = group_norms
139    results['Predictions'] = preds
140    results['Observed'] = adata.obs['labels'].iloc[adata.uns['test_indices']]
141    results['Test_indices'] = adata.uns['test_indices']
142    results['Group_names']= group_names
143    results['Models'] = models
144    results['Train_time'] = train_time
145    results['RAM_usage'] = f'{tracemalloc.get_traced_memory()[1] / 1e9} GB'
146    results['Probabilities'] = probs
147
148    return results

Wrapper function for training and test with multiple alpha values. Returns metrics, predictions, group weights, and resource usage.

Parameters

adata : AnnData

A processed AnnData with 'Z_train', 'Z_test', and 'group_dict' keys in adata.uns.

alpha_list : np.ndarray

alpha values to create models using. Alpha refers to the penalty parameter in Group Lasso. Larger alphas force group weights to shrink towards 0 while smaller alphas apply a lesser penalty to kernal weights.

metrics : list[str]

What metrics should be calculated on predictions. Options are ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 'Recall']. When set to None, all metrics are calculated.

Returns

results : dict

With keys and values:

'Metrics' : a nested dictionary as [alpha][metric] = value.

'Selected_groups' : a dictionary as [alpha] = array of groups with nonzero weights.

'Norms' : a dictionary as [alpha] = array of kernel weights for each group, order respective to 'Group_names'.

'Predictions' : a dictionary as [alpha] = predicted class respective to 'Observations' for that alpha.

'Observed' : an array of ground truth cell labels from the test set.

'Test_indices' : indices of samples respective to adata used in the training set.

'Group_names' : an array of group names respective to each array in 'Norms'.

'Model' : a dictionary where [alpha] = Celer Group Lasso object for that alpha.

'RAM_usage' : memory usage after training models for each alpha.

Examples

>>> results = scmkl.run(adata = adata, 
...                     alpha_list = np.array([0.05, 0.1, 0.5]))
>>> results
dict_keys(['Metrics', 'Selected_groups', 'Norms', 'Predictions', 
...        'Observed', 'Test_indices', 'Group_names', 'Models', 
...        'Train_time', 'RAM_usage'])
>>>
>>> # List of alpha values
>>> results['Metrics'].keys()
dict_keys([0.05, 0.1, 0.5])
>>>
>>> results['Metrics'][0.05]
{'AUROC': 0.9859,
'Accuracy': 0.945,
'F1-Score': 0.9452736318407959,
'Precision': 0.9405940594059405,
'Recall': 0.95}