scmkl.test
1import numpy as np 2import sklearn.metrics as skm 3 4 5def predict(adata, metrics = None, return_probs = False): 6 ''' 7 Function to return predicted labels and calculate any of AUROC, 8 Accuracy, F1 Score, Precision, Recall for a classification. 9 10 ** If labeled_test flag is set to False, metrics cannot be 11 computed.** 12 13 Parameters 14 ---------- 15 **adata** : *AnnData* 16 > Has keys `'model'`, `'Z_train'`, and `'Z_test'` in 17 `adata.uns`. 18 19 **metrics** : *list[str]* | *None* 20 > Which metrics to calculate on the predicted values. Options 21 are `'AUROC'`, `'Accuracy'`, `'F1-Score'`, `'Precision'`, and 22 `'Recall'`. 23 24 **return_probs** : *bool* 25 > If `True`, will return a dictionary with class probabilities. 26 27 Returns 28 ------- 29 **y_pred** : *np.ndarray* 30 > Predicted cell classes. 31 32 **metrics_dict** : *dict* 33 > Contains `'AUROC'`, `'Accuracy'`, `'F1-Score'`, 34 `'Precision'`, and/or `'Recall'` keys depending on metrics 35 argument. 36 37 **probs** : *dict* 38 > If `return_probs` is `True`, will return a dictionary with 39 probabilities for each class in `y_test`. 40 41 Examples 42 -------- 43 >>> adata = scmkl.estimate_sigma(adata) 44 >>> adata = scmkl.calculate_z(adata) 45 >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 46 ... 'Recall'] 47 >>> adata = scmkl.train_model(adata, metrics = metrics) 48 >>> 49 >>> metrics_dict = scmkl.predict(adata) 50 >>> metrics_dict.keys() 51 dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']) 52 ''' 53 X_test = adata.uns['Z_test'] 54 55 allowed_mets = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 56 57 # Asserting all input metrics are valid 58 if metrics is not None: 59 mets_allowed = [metric in allowed_mets for metric in metrics] 60 assert all(mets_allowed), ("Unknown metric provided. Must be None, " 61 f"or one or more of {allowed_mets}") 62 63 # Capturing class labels 64 train_idx = adata.uns['train_indices'] 65 classes = np.unique(adata.obs['labels'].iloc[train_idx].to_numpy()) 66 67 # Sigmoid function to force probabilities into [0,1] 68 probabilities = 1 / (1 + np.exp(-adata.uns['model'].predict(X_test))) 69 70 #Convert numerical probabilities into binary phenotype 71 y_pred = np.array(np.repeat(classes[1], X_test.shape[0]), 72 dtype = 'object') 73 y_pred[np.round(probabilities, 0).astype(int) == 1] = classes[0] 74 75 if not adata.uns['labeled_test']: 76 if not metrics is None: 77 print("WARNING: Cannot calculate classification metrics " 78 "for unlabeled test data") 79 metrics = None 80 else: 81 y_test = adata.obs['labels'].iloc[adata.uns['test_indices']] 82 y_test = y_test.to_numpy() 83 X_test = adata.uns['Z_test'] 84 assert X_test.shape[0] == len(y_test), ("X rows and length of y must " 85 "be equal") 86 87 # Group Lasso requires 'continous' y values need to re-descritize it 88 y = np.zeros((len(y_test))) 89 y[y_test == classes[0]] = 1 90 91 metric_dict = {} 92 93 if (metrics is None) and (return_probs == False): 94 return y_pred 95 96 # Calculate and save metrics given in metrics 97 p_cl = classes[0] 98 if 'AUROC' in metrics: 99 fpr, tpr, _ = skm.roc_curve(y, probabilities) 100 metric_dict['AUROC'] = skm.auc(fpr, tpr) 101 if 'Accuracy' in metrics: 102 metric_dict['Accuracy'] = np.mean(y_test == y_pred) 103 if 'F1-Score' in metrics: 104 metric_dict['F1-Score'] = skm.f1_score(y_test, y_pred, 105 pos_label = p_cl) 106 if 'Precision' in metrics: 107 metric_dict['Precision'] = skm.precision_score(y_test, y_pred, 108 pos_label = p_cl) 109 if 'Recall' in metrics: 110 metric_dict['Recall'] = skm.recall_score(y_test, y_pred, 111 pos_label = p_cl) 112 113 if return_probs: 114 probs = {classes[0] : probabilities, 115 classes[1] : 1 - probabilities} 116 if metrics is not None: 117 return y_pred, metric_dict, probs 118 else: 119 return y_pred, probs 120 else: 121 if metrics is not None: 122 return y_pred, metric_dict 123 else: 124 return y_pred 125 126 127def find_selected_groups(adata) -> np.ndarray: 128 ''' 129 Find feature groups selected by the model during training. If 130 feature weight assigned by the model is non-0, then the group 131 containing that feature is selected. 132 133 Parameters 134 ---------- 135 **adata** : *AnnData* 136 > Has *celer.GroupLasso* object in `adata.uns['model']`. 137 138 Returns 139 ------- 140 **selected_groups** : *np.ndarray* 141 > Array containing the names of the groups with nonzero kernel 142 weights. 143 144 Examples 145 -------- 146 >>> adata = scmkl.estimate_sigma(adata) 147 >>> adata = scmkl.calculate_z(adata) 148 >>> adata = scmkl.train_model(adata) 149 >>> 150 >>> selected_groups = scmkl.find_selected_groups(adata) 151 >>> selected_groups 152 np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY', 153 'HALLMARK_HYPOXIA']) 154 ''' 155 156 selected_groups = [] 157 coefficients = adata.uns['model'].coef_ 158 group_size = adata.uns['model'].get_params()['groups'] 159 group_names = np.array(list(adata.uns['group_dict'].keys())) 160 161 # Loop over the model weights associated with each group and calculate 162 # the L2 norm 163 for i, group in enumerate(group_names): 164 if not isinstance(group_size, (list, set, np.ndarray, tuple)): 165 group_start = i * group_size 166 group_end = (i+1) * group_size - 1 167 group_cols = np.arange(group_start, group_end) 168 group_norm = np.linalg.norm(coefficients[group_cols]) 169 else: 170 group_norm = np.linalg.norm(coefficients[group_size[i]]) 171 172 # Only include the group if the model weights are > 0 173 if group_norm != 0: 174 selected_groups.append(group) 175 176 return np.array(selected_groups)
6def predict(adata, metrics = None, return_probs = False): 7 ''' 8 Function to return predicted labels and calculate any of AUROC, 9 Accuracy, F1 Score, Precision, Recall for a classification. 10 11 ** If labeled_test flag is set to False, metrics cannot be 12 computed.** 13 14 Parameters 15 ---------- 16 **adata** : *AnnData* 17 > Has keys `'model'`, `'Z_train'`, and `'Z_test'` in 18 `adata.uns`. 19 20 **metrics** : *list[str]* | *None* 21 > Which metrics to calculate on the predicted values. Options 22 are `'AUROC'`, `'Accuracy'`, `'F1-Score'`, `'Precision'`, and 23 `'Recall'`. 24 25 **return_probs** : *bool* 26 > If `True`, will return a dictionary with class probabilities. 27 28 Returns 29 ------- 30 **y_pred** : *np.ndarray* 31 > Predicted cell classes. 32 33 **metrics_dict** : *dict* 34 > Contains `'AUROC'`, `'Accuracy'`, `'F1-Score'`, 35 `'Precision'`, and/or `'Recall'` keys depending on metrics 36 argument. 37 38 **probs** : *dict* 39 > If `return_probs` is `True`, will return a dictionary with 40 probabilities for each class in `y_test`. 41 42 Examples 43 -------- 44 >>> adata = scmkl.estimate_sigma(adata) 45 >>> adata = scmkl.calculate_z(adata) 46 >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 47 ... 'Recall'] 48 >>> adata = scmkl.train_model(adata, metrics = metrics) 49 >>> 50 >>> metrics_dict = scmkl.predict(adata) 51 >>> metrics_dict.keys() 52 dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']) 53 ''' 54 X_test = adata.uns['Z_test'] 55 56 allowed_mets = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 57 58 # Asserting all input metrics are valid 59 if metrics is not None: 60 mets_allowed = [metric in allowed_mets for metric in metrics] 61 assert all(mets_allowed), ("Unknown metric provided. Must be None, " 62 f"or one or more of {allowed_mets}") 63 64 # Capturing class labels 65 train_idx = adata.uns['train_indices'] 66 classes = np.unique(adata.obs['labels'].iloc[train_idx].to_numpy()) 67 68 # Sigmoid function to force probabilities into [0,1] 69 probabilities = 1 / (1 + np.exp(-adata.uns['model'].predict(X_test))) 70 71 #Convert numerical probabilities into binary phenotype 72 y_pred = np.array(np.repeat(classes[1], X_test.shape[0]), 73 dtype = 'object') 74 y_pred[np.round(probabilities, 0).astype(int) == 1] = classes[0] 75 76 if not adata.uns['labeled_test']: 77 if not metrics is None: 78 print("WARNING: Cannot calculate classification metrics " 79 "for unlabeled test data") 80 metrics = None 81 else: 82 y_test = adata.obs['labels'].iloc[adata.uns['test_indices']] 83 y_test = y_test.to_numpy() 84 X_test = adata.uns['Z_test'] 85 assert X_test.shape[0] == len(y_test), ("X rows and length of y must " 86 "be equal") 87 88 # Group Lasso requires 'continous' y values need to re-descritize it 89 y = np.zeros((len(y_test))) 90 y[y_test == classes[0]] = 1 91 92 metric_dict = {} 93 94 if (metrics is None) and (return_probs == False): 95 return y_pred 96 97 # Calculate and save metrics given in metrics 98 p_cl = classes[0] 99 if 'AUROC' in metrics: 100 fpr, tpr, _ = skm.roc_curve(y, probabilities) 101 metric_dict['AUROC'] = skm.auc(fpr, tpr) 102 if 'Accuracy' in metrics: 103 metric_dict['Accuracy'] = np.mean(y_test == y_pred) 104 if 'F1-Score' in metrics: 105 metric_dict['F1-Score'] = skm.f1_score(y_test, y_pred, 106 pos_label = p_cl) 107 if 'Precision' in metrics: 108 metric_dict['Precision'] = skm.precision_score(y_test, y_pred, 109 pos_label = p_cl) 110 if 'Recall' in metrics: 111 metric_dict['Recall'] = skm.recall_score(y_test, y_pred, 112 pos_label = p_cl) 113 114 if return_probs: 115 probs = {classes[0] : probabilities, 116 classes[1] : 1 - probabilities} 117 if metrics is not None: 118 return y_pred, metric_dict, probs 119 else: 120 return y_pred, probs 121 else: 122 if metrics is not None: 123 return y_pred, metric_dict 124 else: 125 return y_pred
Function to return predicted labels and calculate any of AUROC, Accuracy, F1 Score, Precision, Recall for a classification.
* If labeled_test flag is set to False, metrics cannot be computed.*
Parameters
adata : AnnData
Has keys
'model'
,'Z_train'
, and'Z_test'
inadata.uns
.
metrics : list[str] | None
Which metrics to calculate on the predicted values. Options are
'AUROC'
,'Accuracy'
,'F1-Score'
,'Precision'
, and'Recall'
.
return_probs : bool
If
True
, will return a dictionary with class probabilities.
Returns
y_pred : np.ndarray
Predicted cell classes.
metrics_dict : dict
Contains
'AUROC'
,'Accuracy'
,'F1-Score'
,'Precision'
, and/or'Recall'
keys depending on metrics argument.
probs : dict
If
return_probs
isTrue
, will return a dictionary with probabilities for each class iny_test
.
Examples
>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision',
... 'Recall']
>>> adata = scmkl.train_model(adata, metrics = metrics)
>>>
>>> metrics_dict = scmkl.predict(adata)
>>> metrics_dict.keys()
dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'])
128def find_selected_groups(adata) -> np.ndarray: 129 ''' 130 Find feature groups selected by the model during training. If 131 feature weight assigned by the model is non-0, then the group 132 containing that feature is selected. 133 134 Parameters 135 ---------- 136 **adata** : *AnnData* 137 > Has *celer.GroupLasso* object in `adata.uns['model']`. 138 139 Returns 140 ------- 141 **selected_groups** : *np.ndarray* 142 > Array containing the names of the groups with nonzero kernel 143 weights. 144 145 Examples 146 -------- 147 >>> adata = scmkl.estimate_sigma(adata) 148 >>> adata = scmkl.calculate_z(adata) 149 >>> adata = scmkl.train_model(adata) 150 >>> 151 >>> selected_groups = scmkl.find_selected_groups(adata) 152 >>> selected_groups 153 np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY', 154 'HALLMARK_HYPOXIA']) 155 ''' 156 157 selected_groups = [] 158 coefficients = adata.uns['model'].coef_ 159 group_size = adata.uns['model'].get_params()['groups'] 160 group_names = np.array(list(adata.uns['group_dict'].keys())) 161 162 # Loop over the model weights associated with each group and calculate 163 # the L2 norm 164 for i, group in enumerate(group_names): 165 if not isinstance(group_size, (list, set, np.ndarray, tuple)): 166 group_start = i * group_size 167 group_end = (i+1) * group_size - 1 168 group_cols = np.arange(group_start, group_end) 169 group_norm = np.linalg.norm(coefficients[group_cols]) 170 else: 171 group_norm = np.linalg.norm(coefficients[group_size[i]]) 172 173 # Only include the group if the model weights are > 0 174 if group_norm != 0: 175 selected_groups.append(group) 176 177 return np.array(selected_groups)
Find feature groups selected by the model during training. If feature weight assigned by the model is non-0, then the group containing that feature is selected.
Parameters
adata : AnnData
Has celer.GroupLasso object in
adata.uns['model']
.
Returns
selected_groups : np.ndarray
Array containing the names of the groups with nonzero kernel weights.
Examples
>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> adata = scmkl.train_model(adata)
>>>
>>> selected_groups = scmkl.find_selected_groups(adata)
>>> selected_groups
np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY',
'HALLMARK_HYPOXIA'])