scmkl.test
1import numpy as np 2import sklearn.metrics as skm 3import anndata as ad 4 5 6def predict(adata: ad.AnnData, metrics: list | None=None, 7 return_probs: bool=False): 8 """ 9 Function to return predicted labels and calculate any of AUROC, 10 Accuracy, F1 Score, Precision, Recall for a classification. 11 12 **If labeled_test flag in `adata` is set to `False`, 13 metrics cannot be computed.** 14 15 Parameters 16 ---------- 17 adata : ad.AnnData 18 Has keys `'model'`, `'Z_train'`, and `'Z_test'` in `adata.uns`. 19 20 metrics : list[str] | None 21 Which metrics to calculate on the predicted values. Options 22 are `'AUROC'`, `'Accuracy'`, `'F1-Score'`, `'Precision'`, and 23 `'Recall'`. If `None`, all five metrics are calculated. 24 25 return_probs : bool 26 If `True`, will return a dictionary with class probabilities. 27 28 Returns 29 ------- 30 y_pred : np.ndarray 31 Predicted cell classes. 32 33 metrics_dict : dict 34 Contains `'AUROC'`, `'Accuracy'`, `'F1-Score'`, 35 `'Precision'`, and/or `'Recall'` keys depending on metrics 36 argument. 37 38 probs : dict 39 If `return_probs` is `True`, will return a dictionary with 40 probabilities for each class in `y_test`. 41 42 Examples 43 -------- 44 >>> adata = scmkl.estimate_sigma(adata) 45 >>> adata = scmkl.calculate_z(adata) 46 >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 47 ... 'Recall'] 48 >>> adata = scmkl.train_model(adata, metrics = metrics) 49 >>> 50 >>> metrics_dict = scmkl.predict(adata) 51 >>> metrics_dict.keys() 52 dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']) 53 """ 54 X_test = adata.uns['Z_test'] 55 56 allowed_mets = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 57 58 # Asserting all input metrics are valid 59 if metrics is not None: 60 mets_allowed = [metric in allowed_mets for metric in metrics] 61 assert all(mets_allowed), ("Unknown metric provided. Must be None, " 62 f"or one or more of {allowed_mets}") 63 64 # Capturing class labels 65 train_idx = adata.uns['train_indices'] 66 classes = np.unique(adata.obs['labels'].iloc[train_idx].to_numpy()) 67 68 # Sigmoid function to force probabilities into [0,1] 69 probabilities = 1/(1 + np.exp(-adata.uns['model'].predict(X_test))) 70 71 #Convert numerical probabilities into binary phenotype 72 y_pred = np.array(np.repeat(classes[1], X_test.shape[0]), 73 dtype = 'object') 74 y_pred[np.round(probabilities, 0).astype(int) == 1] = classes[0] 75 76 if not adata.uns['labeled_test']: 77 if not metrics is None: 78 print("WARNING: Cannot calculate classification metrics " 79 "for unlabeled test data") 80 metrics = None 81 else: 82 y_test = adata.obs['labels'].iloc[adata.uns['test_indices']] 83 y_test = y_test.to_numpy() 84 X_test = adata.uns['Z_test'] 85 assert X_test.shape[0] == len(y_test), ("X rows and length of y must " 86 "be equal") 87 88 # Group Lasso requires 'continous' y values need to re-descritize it 89 y = np.zeros((len(y_test))) 90 y[y_test == classes[0]] = 1 91 92 metric_dict = {} 93 94 if (metrics is None) and (return_probs == False): 95 return y_pred 96 97 # Calculate and save metrics given in metrics 98 p_cl = classes[0] 99 if 'AUROC' in metrics: 100 fpr, tpr, _ = skm.roc_curve(y, probabilities) 101 metric_dict['AUROC'] = skm.auc(fpr, tpr) 102 if 'Accuracy' in metrics: 103 metric_dict['Accuracy'] = np.mean(y_test == y_pred) 104 if 'F1-Score' in metrics: 105 metric_dict['F1-Score'] = skm.f1_score(y_test, y_pred, 106 pos_label = p_cl) 107 if 'Precision' in metrics: 108 metric_dict['Precision'] = skm.precision_score(y_test, y_pred, 109 pos_label = p_cl) 110 if 'Recall' in metrics: 111 metric_dict['Recall'] = skm.recall_score(y_test, y_pred, 112 pos_label = p_cl) 113 114 if return_probs: 115 probs = {classes[0] : probabilities, 116 classes[1] : 1 - probabilities} 117 if metrics is not None: 118 return y_pred, metric_dict, probs 119 else: 120 return y_pred, probs 121 else: 122 if metrics is not None: 123 return y_pred, metric_dict 124 else: 125 return y_pred 126 127 128def find_selected_groups(adata: ad.AnnData) -> np.ndarray: 129 """ 130 Find feature groups selected by the model during training. If 131 feature weight assigned by the model is non-0, then the group 132 containing that feature is selected. 133 134 Parameters 135 ---------- 136 adata : ad.AnnData 137 Has `celer.GroupLasso` object in `adata.uns['model']`. 138 139 Returns 140 ------- 141 selected_groups : np.ndarray 142 Array containing the names of the groups with nonzero kernel 143 weights. 144 145 Examples 146 -------- 147 >>> adata = scmkl.estimate_sigma(adata) 148 >>> adata = scmkl.calculate_z(adata) 149 >>> adata = scmkl.train_model(adata) 150 >>> 151 >>> selected_groups = scmkl.find_selected_groups(adata) 152 >>> selected_groups 153 np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY', 154 'HALLMARK_HYPOXIA']) 155 """ 156 157 selected_groups = [] 158 coefficients = adata.uns['model'].coef_ 159 group_size = adata.uns['model'].get_params()['groups'] 160 group_names = np.array(list(adata.uns['group_dict'].keys())) 161 162 # Loop over the model weights associated with each group and calculate 163 # the L2 norm 164 for i, group in enumerate(group_names): 165 if not isinstance(group_size, (list, set, np.ndarray, tuple)): 166 group_start = i * group_size 167 group_end = (i+1) * group_size - 1 168 group_cols = np.arange(group_start, group_end) 169 group_norm = np.linalg.norm(coefficients[group_cols]) 170 else: 171 group_norm = np.linalg.norm(coefficients[group_size[i]]) 172 173 # Only include the group if the model weights are > 0 174 if group_norm != 0: 175 selected_groups.append(group) 176 177 return np.array(selected_groups)
def
predict( adata: anndata._core.anndata.AnnData, metrics: list | None = None, return_probs: bool = False):
7def predict(adata: ad.AnnData, metrics: list | None=None, 8 return_probs: bool=False): 9 """ 10 Function to return predicted labels and calculate any of AUROC, 11 Accuracy, F1 Score, Precision, Recall for a classification. 12 13 **If labeled_test flag in `adata` is set to `False`, 14 metrics cannot be computed.** 15 16 Parameters 17 ---------- 18 adata : ad.AnnData 19 Has keys `'model'`, `'Z_train'`, and `'Z_test'` in `adata.uns`. 20 21 metrics : list[str] | None 22 Which metrics to calculate on the predicted values. Options 23 are `'AUROC'`, `'Accuracy'`, `'F1-Score'`, `'Precision'`, and 24 `'Recall'`. If `None`, all five metrics are calculated. 25 26 return_probs : bool 27 If `True`, will return a dictionary with class probabilities. 28 29 Returns 30 ------- 31 y_pred : np.ndarray 32 Predicted cell classes. 33 34 metrics_dict : dict 35 Contains `'AUROC'`, `'Accuracy'`, `'F1-Score'`, 36 `'Precision'`, and/or `'Recall'` keys depending on metrics 37 argument. 38 39 probs : dict 40 If `return_probs` is `True`, will return a dictionary with 41 probabilities for each class in `y_test`. 42 43 Examples 44 -------- 45 >>> adata = scmkl.estimate_sigma(adata) 46 >>> adata = scmkl.calculate_z(adata) 47 >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 48 ... 'Recall'] 49 >>> adata = scmkl.train_model(adata, metrics = metrics) 50 >>> 51 >>> metrics_dict = scmkl.predict(adata) 52 >>> metrics_dict.keys() 53 dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']) 54 """ 55 X_test = adata.uns['Z_test'] 56 57 allowed_mets = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 58 59 # Asserting all input metrics are valid 60 if metrics is not None: 61 mets_allowed = [metric in allowed_mets for metric in metrics] 62 assert all(mets_allowed), ("Unknown metric provided. Must be None, " 63 f"or one or more of {allowed_mets}") 64 65 # Capturing class labels 66 train_idx = adata.uns['train_indices'] 67 classes = np.unique(adata.obs['labels'].iloc[train_idx].to_numpy()) 68 69 # Sigmoid function to force probabilities into [0,1] 70 probabilities = 1/(1 + np.exp(-adata.uns['model'].predict(X_test))) 71 72 #Convert numerical probabilities into binary phenotype 73 y_pred = np.array(np.repeat(classes[1], X_test.shape[0]), 74 dtype = 'object') 75 y_pred[np.round(probabilities, 0).astype(int) == 1] = classes[0] 76 77 if not adata.uns['labeled_test']: 78 if not metrics is None: 79 print("WARNING: Cannot calculate classification metrics " 80 "for unlabeled test data") 81 metrics = None 82 else: 83 y_test = adata.obs['labels'].iloc[adata.uns['test_indices']] 84 y_test = y_test.to_numpy() 85 X_test = adata.uns['Z_test'] 86 assert X_test.shape[0] == len(y_test), ("X rows and length of y must " 87 "be equal") 88 89 # Group Lasso requires 'continous' y values need to re-descritize it 90 y = np.zeros((len(y_test))) 91 y[y_test == classes[0]] = 1 92 93 metric_dict = {} 94 95 if (metrics is None) and (return_probs == False): 96 return y_pred 97 98 # Calculate and save metrics given in metrics 99 p_cl = classes[0] 100 if 'AUROC' in metrics: 101 fpr, tpr, _ = skm.roc_curve(y, probabilities) 102 metric_dict['AUROC'] = skm.auc(fpr, tpr) 103 if 'Accuracy' in metrics: 104 metric_dict['Accuracy'] = np.mean(y_test == y_pred) 105 if 'F1-Score' in metrics: 106 metric_dict['F1-Score'] = skm.f1_score(y_test, y_pred, 107 pos_label = p_cl) 108 if 'Precision' in metrics: 109 metric_dict['Precision'] = skm.precision_score(y_test, y_pred, 110 pos_label = p_cl) 111 if 'Recall' in metrics: 112 metric_dict['Recall'] = skm.recall_score(y_test, y_pred, 113 pos_label = p_cl) 114 115 if return_probs: 116 probs = {classes[0] : probabilities, 117 classes[1] : 1 - probabilities} 118 if metrics is not None: 119 return y_pred, metric_dict, probs 120 else: 121 return y_pred, probs 122 else: 123 if metrics is not None: 124 return y_pred, metric_dict 125 else: 126 return y_pred
Function to return predicted labels and calculate any of AUROC, Accuracy, F1 Score, Precision, Recall for a classification.
If labeled_test flag in adata is set to False,
metrics cannot be computed.
Parameters
- adata (ad.AnnData):
Has keys
'model','Z_train', and'Z_test'inadata.uns. - metrics (list[str] | None):
Which metrics to calculate on the predicted values. Options
are
'AUROC','Accuracy','F1-Score','Precision', and'Recall'. IfNone, all five metrics are calculated. - return_probs (bool):
If
True, will return a dictionary with class probabilities.
Returns
- y_pred (np.ndarray): Predicted cell classes.
- metrics_dict (dict):
Contains
'AUROC','Accuracy','F1-Score','Precision', and/or'Recall'keys depending on metrics argument. - probs (dict):
If
return_probsisTrue, will return a dictionary with probabilities for each class iny_test.
Examples
>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision',
... 'Recall']
>>> adata = scmkl.train_model(adata, metrics = metrics)
>>>
>>> metrics_dict = scmkl.predict(adata)
>>> metrics_dict.keys()
dict_keys(['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'])
def
find_selected_groups(adata: anndata._core.anndata.AnnData) -> numpy.ndarray:
129def find_selected_groups(adata: ad.AnnData) -> np.ndarray: 130 """ 131 Find feature groups selected by the model during training. If 132 feature weight assigned by the model is non-0, then the group 133 containing that feature is selected. 134 135 Parameters 136 ---------- 137 adata : ad.AnnData 138 Has `celer.GroupLasso` object in `adata.uns['model']`. 139 140 Returns 141 ------- 142 selected_groups : np.ndarray 143 Array containing the names of the groups with nonzero kernel 144 weights. 145 146 Examples 147 -------- 148 >>> adata = scmkl.estimate_sigma(adata) 149 >>> adata = scmkl.calculate_z(adata) 150 >>> adata = scmkl.train_model(adata) 151 >>> 152 >>> selected_groups = scmkl.find_selected_groups(adata) 153 >>> selected_groups 154 np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY', 155 'HALLMARK_HYPOXIA']) 156 """ 157 158 selected_groups = [] 159 coefficients = adata.uns['model'].coef_ 160 group_size = adata.uns['model'].get_params()['groups'] 161 group_names = np.array(list(adata.uns['group_dict'].keys())) 162 163 # Loop over the model weights associated with each group and calculate 164 # the L2 norm 165 for i, group in enumerate(group_names): 166 if not isinstance(group_size, (list, set, np.ndarray, tuple)): 167 group_start = i * group_size 168 group_end = (i+1) * group_size - 1 169 group_cols = np.arange(group_start, group_end) 170 group_norm = np.linalg.norm(coefficients[group_cols]) 171 else: 172 group_norm = np.linalg.norm(coefficients[group_size[i]]) 173 174 # Only include the group if the model weights are > 0 175 if group_norm != 0: 176 selected_groups.append(group) 177 178 return np.array(selected_groups)
Find feature groups selected by the model during training. If feature weight assigned by the model is non-0, then the group containing that feature is selected.
Parameters
- adata (ad.AnnData):
Has
celer.GroupLassoobject inadata.uns['model'].
Returns
- selected_groups (np.ndarray): Array containing the names of the groups with nonzero kernel weights.
Examples
>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> adata = scmkl.train_model(adata)
>>>
>>> selected_groups = scmkl.find_selected_groups(adata)
>>> selected_groups
np.ndarray(['HALLMARK_ESTROGEN_RESPONSE_EARLY',
'HALLMARK_HYPOXIA'])