scmkl.one_v_rest
1import numpy as np 2import pandas as pd 3from sklearn.metrics import f1_score 4import gc 5 6from scmkl.run import run 7from scmkl.calculate_z import calculate_z 8from scmkl.multimodal_processing import multimodal_processing 9from scmkl._checks import _check_adatas 10 11 12def _eval_labels(cell_labels: np.ndarray, train_indices: np.ndarray, 13 test_indices: np.ndarray) -> np.ndarray: 14 """ 15 Takes an array of multiclass cell labels and returns a unique array 16 of cell labels to test for. 17 18 Parameters 19 ---------- 20 cell_labels : np.ndarray 21 Cell labels that coorespond to an AnnData object. 22 23 train_indices : np.ndarray 24 Indices for the training samples in an AnnData object. 25 26 test_indices : np.ndarray 27 Indices for the testing samples in an AnnData object. 28 29 remove_labels : bool 30 If `True`, models will only be created for cell labels in both 31 the training and test data, if `False`, models will be generated 32 for all cell labels in the training data. 33 34 Returns 35 ------- 36 uniq_labels : np.ndarray 37 Returns a numpy array of unique cell labels to be iterated 38 through during one versus all setups. 39 """ 40 train_uniq_labels = np.unique(cell_labels[train_indices]) 41 test_uniq_labels = np.unique(cell_labels[test_indices]) 42 43 # Getting only labels in both training and testing sets 44 uniq_labels = np.intersect1d(train_uniq_labels, test_uniq_labels) 45 46 # Ensuring that at least one cell type label between the two data 47 # are the same 48 cl_intersect = np.intersect1d(train_uniq_labels, test_uniq_labels) 49 assert len(cl_intersect) > 0, ("There are no common labels between cells " 50 "in the training and testing samples") 51 52 return uniq_labels 53 54 55def get_prob_table(results : dict, alpha: float): 56 """ 57 Takes a results dictionary with class and probabilities keys and 58 returns a table of probabilities for each class and the most 59 probable class for each cell. 60 61 Parameters 62 ---------- 63 results : dict 64 A nested dictionary that contains a dictionary for each class 65 containing probabilities for each cell class. 66 67 alpha : float 68 A float for which model probabilities should be evaluated 69 for. 70 71 Returns 72 ------- 73 prob_table : pd.DataFrame 74 Each column is a cell class and the elements are the 75 class probability outputs from the model. 76 77 pred_class : list[str] 78 The most probable cell classes respective to the training set 79 cells. 80 81 low_conf : list[bool] 82 A bool list where `True`, sample max probability is less than 83 0.5. 84 """ 85 prob_table = {class_ : results[class_]['Probabilities'][alpha][class_] 86 for class_ in results.keys()} 87 prob_table = pd.DataFrame(prob_table) 88 89 pred_class = [] 90 maxes = [] 91 92 for i, row in prob_table.iterrows(): 93 row_max = np.max(row) 94 indices = np.where(row == row_max) 95 prediction = prob_table.columns[indices] 96 97 if len(prediction) > 1: 98 prediction = " and ".join(prediction) 99 else: 100 prediction = prediction[0] 101 102 pred_class.append(prediction) 103 maxes.append(row_max) 104 105 maxes = np.round(maxes, 0) 106 low_conf = np.invert(np.array(maxes, dtype = np.bool_)) 107 108 return prob_table, pred_class, low_conf 109 110 111def per_model_summary(results: dict, uniq_labels: np.ndarray | list | tuple, 112 alpha: float) -> pd.DataFrame: 113 """ 114 Takes the results dictionary from `scmkl.one_v_rest()` and adds a 115 summary dataframe show metrics for each model generated from the 116 runs. 117 118 Parameters 119 ---------- 120 results : dict 121 Results from `scmkl.one_v_rest()`. 122 123 uniq_labels : array_like 124 Unique cell classes from the runs. 125 126 alpha : float 127 The alpha for creating the summary from. 128 129 Returns 130 ------- 131 summary_df : pd.DataFrame 132 Dataframe with classes on rows and metrics as cols. 133 """ 134 # Getting metrics availible in results 135 avail_mets = list(results[uniq_labels[0]]['Metrics'][alpha]) 136 137 summary_df = {metric : list() 138 for metric in avail_mets} 139 summary_df['Class'] = uniq_labels 140 141 for lab in summary_df['Class']: 142 for met in avail_mets: 143 val = results[lab]['Metrics'][alpha][met] 144 summary_df[met].append(val) 145 146 return pd.DataFrame(summary_df) 147 148 149def get_class_train(train_indices: np.ndarray, 150 cell_labels: np.ndarray | list | pd.Series, 151 seed_obj: np.random._generator.Generator, 152 other_factor = 1.5): 153 """ 154 This function returns a dict with each entry being a set of 155 training indices for each cell class to be used in 156 `scmkl.one_v_rest()`. 157 158 Parameters 159 ---------- 160 train_indices : np.ndarray 161 The indices in the `ad.AnnData` object of samples availible to 162 train on. 163 164 cell_labels : array_like 165 The identity of all cells in the anndata object. 166 167 seed_obj : np.random._generator.Generator 168 The seed object used to randomly sample non-target samples. 169 170 other_factor : float 171 The ratio of cells to sample for the other class for each 172 model. For example, if classifying B cells with 100 B cells in 173 training, if `other_factor=1`, 100 cells that are not B cells 174 will be trained on with the B cells. 175 176 Returns 177 ------- 178 train_idx : dict 179 Keys are cell classes and values are the train indices to 180 train scmkl that include both target and non-target samples. 181 """ 182 uniq_labels = set(cell_labels) 183 train_idx = dict() 184 185 for lab in uniq_labels: 186 target_pos = np.where(lab == cell_labels[train_indices])[0] 187 overlap = np.isin(target_pos, train_indices) 188 189 target_pos = target_pos[overlap] 190 other_pos = np.setdiff1d(train_indices, target_pos) 191 192 if (other_factor*target_pos.shape[0]) <= other_pos.shape[0]: 193 n_samples = int(other_factor*target_pos.shape[0]) 194 else: 195 n_samples = other_pos.shape[0] 196 197 other_pos = seed_obj.choice(other_pos, n_samples, False) 198 199 lab_train = np.concatenate([target_pos, other_pos]) 200 train_idx[lab] = lab_train.copy() 201 202 return train_idx 203 204 205def one_v_rest(adatas : list, names : list, alpha_list : np.ndarray, 206 tfidf : list, batches: int=10, batch_size: int=100, 207 force_balance: bool=False, other_factor: float=1.0) -> dict: 208 """ 209 For each cell class, creates model(s) comparing that class to all 210 others. Then, predicts on the training data using `scmkl.run()`. 211 Only labels in both training and testing will be run. 212 213 Parameters 214 ---------- 215 adatas : list[AnnData] 216 List of `ad.AnnData` objects created by `create_adata()` 217 where each `ad.AnnData` is one modality and composed of both 218 training and testing samples. Requires that `'train_indices'` 219 and `'test_indices'` are the same between all `ad.AnnData`s. 220 221 names : list[str] 222 String variables that describe each modality respective to 223 `adatas` for labeling. 224 225 alpha_list : np.ndarray | float 226 An array of alpha values to create each model with or a float 227 to run with a single alpha. 228 229 tfidf : list[bool] 230 If element `i` is `True`, `adatas[i]` will be TF-IDF 231 normalized. 232 233 batches : int 234 The number of batches to use for the distance calculation. 235 This will average the result of `batches` distance calculations 236 of `batch_size` randomly sampled cells. More batches will 237 converge to population distance values at the cost of 238 scalability. 239 240 batch_size : int 241 The number of cells to include per batch for distance 242 calculations. Higher batch size will converge to population 243 distance values at the cost of scalability. 244 If `batches*batch_size > num_training_cells`, 245 `batch_size` will be reduced to 246 `int(num_training_cells / batches)`. 247 248 force_balance : bool 249 If `True`, training sets will be balanced to reduce class label 250 imbalance. Defaults to `False`. 251 252 other_factor : float 253 The ratio of cells to sample for the other class for each 254 model. For example, if classifying B cells with 100 B cells in 255 training, if `other_factor=1`, 100 cells that are not B cells 256 will be trained on with the B cells. 257 258 Returns 259 ------- 260 results : dict 261 Contains keys for each cell class with results from cell class 262 versus all other samples. See `scmkl.run()` for futher details. 263 Will also include a probablilities table with the predictions 264 from each model. 265 266 Examples 267 -------- 268 >>> adata = scmkl.create_adata(X = data_mat, 269 ... feature_names = gene_names, 270 ... group_dict = group_dict) 271 >>> 272 >>> results = scmkl.one_v_rest(adatas = [adata], names = ['rna'], 273 ... alpha_list = np.array([0.05, 0.1]), 274 ... tfidf = [False]) 275 >>> 276 >>> adata.keys() 277 dict_keys(['B cells', 'Monocytes', 'Dendritic cells', ...]) 278 """ 279 # Formatting checks ensuring all adata elements are 280 # AnnData objects and train/test indices are all the same 281 _check_adatas(adatas, check_obs = True, check_uns = True) 282 283 284 # Extracting train and test indices 285 train_indices = adatas[0].uns['train_indices'] 286 test_indices = adatas[0].uns['test_indices'] 287 288 # Checking and capturing cell labels 289 uniq_labels = _eval_labels(cell_labels = adatas[0].obs['labels'], 290 train_indices = train_indices, 291 test_indices = test_indices) 292 293 294 # Calculating Z matrices, method depends on whether there are multiple 295 # adatas (modalities) 296 if (len(adatas) == 1) and ('Z_train' not in adatas[0].uns.keys()): 297 adata = calculate_z(adata, n_features = 5000, batches=batches, batch_size=batch_size) 298 elif len(adatas) > 1: 299 adata = multimodal_processing(adatas = adatas, 300 names = names, 301 tfidf = tfidf, 302 batches=batches, 303 batch_size=batch_size) 304 else: 305 adata = adatas[0].copy() 306 307 del adatas 308 gc.collect() 309 310 # Initializing for capturing model outputs 311 results = dict() 312 313 # Capturing cell labels before overwriting 314 cell_labels = np.array(adata.obs['labels'].copy()) 315 316 # Capturing perfect train/test splits for each class 317 if force_balance: 318 train_idx = get_class_train(adata.uns['train_indices'], 319 cell_labels, 320 adata.uns['seed_obj'], 321 other_factor) 322 323 for label in uniq_labels: 324 325 print(f"Comparing {label} to other types", flush = True) 326 cur_labels = cell_labels.copy() 327 cur_labels[cell_labels != label] = 'other' 328 329 # Replacing cell labels for current cell type vs rest 330 adata.obs['labels'] = cur_labels 331 332 if force_balance: 333 adata.uns['train_indices'] = train_idx[label] 334 335 # Running scMKL 336 results[label] = run(adata, alpha_list, return_probs = True) 337 338 # Getting final predictions 339 alpha = np.min(alpha_list) 340 prob_table, pred_class, low_conf = get_prob_table(results, alpha) 341 macro_f1 = f1_score(cell_labels[adata.uns['test_indices']], 342 pred_class, average='macro') 343 344 model_summary = per_model_summary(results, uniq_labels, alpha) 345 346 results['Per_model_summary'] = model_summary 347 results['Classes'] = uniq_labels 348 results['Probability_table'] = prob_table 349 results['Predicted_class'] = pred_class 350 results['Truth_labels'] = cell_labels[adata.uns['test_indices']] 351 results['Low_confidence'] = low_conf 352 results['Macro_F1-Score'] = macro_f1 353 354 if force_balance: 355 results['Training_indices'] = train_idx 356 357 return results
def
get_prob_table(results: dict, alpha: float):
56def get_prob_table(results : dict, alpha: float): 57 """ 58 Takes a results dictionary with class and probabilities keys and 59 returns a table of probabilities for each class and the most 60 probable class for each cell. 61 62 Parameters 63 ---------- 64 results : dict 65 A nested dictionary that contains a dictionary for each class 66 containing probabilities for each cell class. 67 68 alpha : float 69 A float for which model probabilities should be evaluated 70 for. 71 72 Returns 73 ------- 74 prob_table : pd.DataFrame 75 Each column is a cell class and the elements are the 76 class probability outputs from the model. 77 78 pred_class : list[str] 79 The most probable cell classes respective to the training set 80 cells. 81 82 low_conf : list[bool] 83 A bool list where `True`, sample max probability is less than 84 0.5. 85 """ 86 prob_table = {class_ : results[class_]['Probabilities'][alpha][class_] 87 for class_ in results.keys()} 88 prob_table = pd.DataFrame(prob_table) 89 90 pred_class = [] 91 maxes = [] 92 93 for i, row in prob_table.iterrows(): 94 row_max = np.max(row) 95 indices = np.where(row == row_max) 96 prediction = prob_table.columns[indices] 97 98 if len(prediction) > 1: 99 prediction = " and ".join(prediction) 100 else: 101 prediction = prediction[0] 102 103 pred_class.append(prediction) 104 maxes.append(row_max) 105 106 maxes = np.round(maxes, 0) 107 low_conf = np.invert(np.array(maxes, dtype = np.bool_)) 108 109 return prob_table, pred_class, low_conf
Takes a results dictionary with class and probabilities keys and returns a table of probabilities for each class and the most probable class for each cell.
Parameters
- results (dict): A nested dictionary that contains a dictionary for each class containing probabilities for each cell class.
- alpha (float): A float for which model probabilities should be evaluated for.
Returns
- prob_table (pd.DataFrame): Each column is a cell class and the elements are the class probability outputs from the model.
- pred_class (list[str]): The most probable cell classes respective to the training set cells.
- low_conf (list[bool]):
A bool list where
True
, sample max probability is less than 0.5.
def
per_model_summary( results: dict, uniq_labels: numpy.ndarray | list | tuple, alpha: float) -> pandas.core.frame.DataFrame:
112def per_model_summary(results: dict, uniq_labels: np.ndarray | list | tuple, 113 alpha: float) -> pd.DataFrame: 114 """ 115 Takes the results dictionary from `scmkl.one_v_rest()` and adds a 116 summary dataframe show metrics for each model generated from the 117 runs. 118 119 Parameters 120 ---------- 121 results : dict 122 Results from `scmkl.one_v_rest()`. 123 124 uniq_labels : array_like 125 Unique cell classes from the runs. 126 127 alpha : float 128 The alpha for creating the summary from. 129 130 Returns 131 ------- 132 summary_df : pd.DataFrame 133 Dataframe with classes on rows and metrics as cols. 134 """ 135 # Getting metrics availible in results 136 avail_mets = list(results[uniq_labels[0]]['Metrics'][alpha]) 137 138 summary_df = {metric : list() 139 for metric in avail_mets} 140 summary_df['Class'] = uniq_labels 141 142 for lab in summary_df['Class']: 143 for met in avail_mets: 144 val = results[lab]['Metrics'][alpha][met] 145 summary_df[met].append(val) 146 147 return pd.DataFrame(summary_df)
Takes the results dictionary from scmkl.one_v_rest
and adds a
summary dataframe show metrics for each model generated from the
runs.
Parameters
- results (dict):
Results from
scmkl.one_v_rest
. - uniq_labels (array_like): Unique cell classes from the runs.
- alpha (float): The alpha for creating the summary from.
Returns
- summary_df (pd.DataFrame): Dataframe with classes on rows and metrics as cols.
def
get_class_train( train_indices: numpy.ndarray, cell_labels: numpy.ndarray | list | pandas.core.series.Series, seed_obj: numpy.random._generator.Generator, other_factor=1.5):
150def get_class_train(train_indices: np.ndarray, 151 cell_labels: np.ndarray | list | pd.Series, 152 seed_obj: np.random._generator.Generator, 153 other_factor = 1.5): 154 """ 155 This function returns a dict with each entry being a set of 156 training indices for each cell class to be used in 157 `scmkl.one_v_rest()`. 158 159 Parameters 160 ---------- 161 train_indices : np.ndarray 162 The indices in the `ad.AnnData` object of samples availible to 163 train on. 164 165 cell_labels : array_like 166 The identity of all cells in the anndata object. 167 168 seed_obj : np.random._generator.Generator 169 The seed object used to randomly sample non-target samples. 170 171 other_factor : float 172 The ratio of cells to sample for the other class for each 173 model. For example, if classifying B cells with 100 B cells in 174 training, if `other_factor=1`, 100 cells that are not B cells 175 will be trained on with the B cells. 176 177 Returns 178 ------- 179 train_idx : dict 180 Keys are cell classes and values are the train indices to 181 train scmkl that include both target and non-target samples. 182 """ 183 uniq_labels = set(cell_labels) 184 train_idx = dict() 185 186 for lab in uniq_labels: 187 target_pos = np.where(lab == cell_labels[train_indices])[0] 188 overlap = np.isin(target_pos, train_indices) 189 190 target_pos = target_pos[overlap] 191 other_pos = np.setdiff1d(train_indices, target_pos) 192 193 if (other_factor*target_pos.shape[0]) <= other_pos.shape[0]: 194 n_samples = int(other_factor*target_pos.shape[0]) 195 else: 196 n_samples = other_pos.shape[0] 197 198 other_pos = seed_obj.choice(other_pos, n_samples, False) 199 200 lab_train = np.concatenate([target_pos, other_pos]) 201 train_idx[lab] = lab_train.copy() 202 203 return train_idx
This function returns a dict with each entry being a set of
training indices for each cell class to be used in
scmkl.one_v_rest
.
Parameters
- train_indices (np.ndarray):
The indices in the
ad.AnnData
object of samples availible to train on. - cell_labels (array_like): The identity of all cells in the anndata object.
- seed_obj (np.random._generator.Generator): The seed object used to randomly sample non-target samples.
- other_factor (float):
The ratio of cells to sample for the other class for each
model. For example, if classifying B cells with 100 B cells in
training, if
other_factor=1
, 100 cells that are not B cells will be trained on with the B cells.
Returns
- train_idx (dict): Keys are cell classes and values are the train indices to train scmkl that include both target and non-target samples.
def
one_v_rest( adatas: list, names: list, alpha_list: numpy.ndarray, tfidf: list, batches: int = 10, batch_size: int = 100, force_balance: bool = False, other_factor: float = 1.0) -> dict:
206def one_v_rest(adatas : list, names : list, alpha_list : np.ndarray, 207 tfidf : list, batches: int=10, batch_size: int=100, 208 force_balance: bool=False, other_factor: float=1.0) -> dict: 209 """ 210 For each cell class, creates model(s) comparing that class to all 211 others. Then, predicts on the training data using `scmkl.run()`. 212 Only labels in both training and testing will be run. 213 214 Parameters 215 ---------- 216 adatas : list[AnnData] 217 List of `ad.AnnData` objects created by `create_adata()` 218 where each `ad.AnnData` is one modality and composed of both 219 training and testing samples. Requires that `'train_indices'` 220 and `'test_indices'` are the same between all `ad.AnnData`s. 221 222 names : list[str] 223 String variables that describe each modality respective to 224 `adatas` for labeling. 225 226 alpha_list : np.ndarray | float 227 An array of alpha values to create each model with or a float 228 to run with a single alpha. 229 230 tfidf : list[bool] 231 If element `i` is `True`, `adatas[i]` will be TF-IDF 232 normalized. 233 234 batches : int 235 The number of batches to use for the distance calculation. 236 This will average the result of `batches` distance calculations 237 of `batch_size` randomly sampled cells. More batches will 238 converge to population distance values at the cost of 239 scalability. 240 241 batch_size : int 242 The number of cells to include per batch for distance 243 calculations. Higher batch size will converge to population 244 distance values at the cost of scalability. 245 If `batches*batch_size > num_training_cells`, 246 `batch_size` will be reduced to 247 `int(num_training_cells / batches)`. 248 249 force_balance : bool 250 If `True`, training sets will be balanced to reduce class label 251 imbalance. Defaults to `False`. 252 253 other_factor : float 254 The ratio of cells to sample for the other class for each 255 model. For example, if classifying B cells with 100 B cells in 256 training, if `other_factor=1`, 100 cells that are not B cells 257 will be trained on with the B cells. 258 259 Returns 260 ------- 261 results : dict 262 Contains keys for each cell class with results from cell class 263 versus all other samples. See `scmkl.run()` for futher details. 264 Will also include a probablilities table with the predictions 265 from each model. 266 267 Examples 268 -------- 269 >>> adata = scmkl.create_adata(X = data_mat, 270 ... feature_names = gene_names, 271 ... group_dict = group_dict) 272 >>> 273 >>> results = scmkl.one_v_rest(adatas = [adata], names = ['rna'], 274 ... alpha_list = np.array([0.05, 0.1]), 275 ... tfidf = [False]) 276 >>> 277 >>> adata.keys() 278 dict_keys(['B cells', 'Monocytes', 'Dendritic cells', ...]) 279 """ 280 # Formatting checks ensuring all adata elements are 281 # AnnData objects and train/test indices are all the same 282 _check_adatas(adatas, check_obs = True, check_uns = True) 283 284 285 # Extracting train and test indices 286 train_indices = adatas[0].uns['train_indices'] 287 test_indices = adatas[0].uns['test_indices'] 288 289 # Checking and capturing cell labels 290 uniq_labels = _eval_labels(cell_labels = adatas[0].obs['labels'], 291 train_indices = train_indices, 292 test_indices = test_indices) 293 294 295 # Calculating Z matrices, method depends on whether there are multiple 296 # adatas (modalities) 297 if (len(adatas) == 1) and ('Z_train' not in adatas[0].uns.keys()): 298 adata = calculate_z(adata, n_features = 5000, batches=batches, batch_size=batch_size) 299 elif len(adatas) > 1: 300 adata = multimodal_processing(adatas = adatas, 301 names = names, 302 tfidf = tfidf, 303 batches=batches, 304 batch_size=batch_size) 305 else: 306 adata = adatas[0].copy() 307 308 del adatas 309 gc.collect() 310 311 # Initializing for capturing model outputs 312 results = dict() 313 314 # Capturing cell labels before overwriting 315 cell_labels = np.array(adata.obs['labels'].copy()) 316 317 # Capturing perfect train/test splits for each class 318 if force_balance: 319 train_idx = get_class_train(adata.uns['train_indices'], 320 cell_labels, 321 adata.uns['seed_obj'], 322 other_factor) 323 324 for label in uniq_labels: 325 326 print(f"Comparing {label} to other types", flush = True) 327 cur_labels = cell_labels.copy() 328 cur_labels[cell_labels != label] = 'other' 329 330 # Replacing cell labels for current cell type vs rest 331 adata.obs['labels'] = cur_labels 332 333 if force_balance: 334 adata.uns['train_indices'] = train_idx[label] 335 336 # Running scMKL 337 results[label] = run(adata, alpha_list, return_probs = True) 338 339 # Getting final predictions 340 alpha = np.min(alpha_list) 341 prob_table, pred_class, low_conf = get_prob_table(results, alpha) 342 macro_f1 = f1_score(cell_labels[adata.uns['test_indices']], 343 pred_class, average='macro') 344 345 model_summary = per_model_summary(results, uniq_labels, alpha) 346 347 results['Per_model_summary'] = model_summary 348 results['Classes'] = uniq_labels 349 results['Probability_table'] = prob_table 350 results['Predicted_class'] = pred_class 351 results['Truth_labels'] = cell_labels[adata.uns['test_indices']] 352 results['Low_confidence'] = low_conf 353 results['Macro_F1-Score'] = macro_f1 354 355 if force_balance: 356 results['Training_indices'] = train_idx 357 358 return results
For each cell class, creates model(s) comparing that class to all
others. Then, predicts on the training data using scmkl.run
.
Only labels in both training and testing will be run.
Parameters
- adatas (list[AnnData]):
List of
ad.AnnData
objects created bycreate_adata()
where eachad.AnnData
is one modality and composed of both training and testing samples. Requires that'train_indices'
and'test_indices'
are the same between allad.AnnData
s. - names (list[str]):
String variables that describe each modality respective to
adatas
for labeling. - alpha_list (np.ndarray | float): An array of alpha values to create each model with or a float to run with a single alpha.
- tfidf (list[bool]):
If element
i
isTrue
,adatas[i]
will be TF-IDF normalized. - batches (int):
The number of batches to use for the distance calculation.
This will average the result of
batches
distance calculations ofbatch_size
randomly sampled cells. More batches will converge to population distance values at the cost of scalability. - batch_size (int):
The number of cells to include per batch for distance
calculations. Higher batch size will converge to population
distance values at the cost of scalability.
If
batches*batch_size > num_training_cells
,batch_size
will be reduced toint(num_training_cells / batches)
. - force_balance (bool):
If
True
, training sets will be balanced to reduce class label imbalance. Defaults toFalse
. - other_factor (float):
The ratio of cells to sample for the other class for each
model. For example, if classifying B cells with 100 B cells in
training, if
other_factor=1
, 100 cells that are not B cells will be trained on with the B cells.
Returns
- results (dict):
Contains keys for each cell class with results from cell class
versus all other samples. See
scmkl.run
for futher details. Will also include a probablilities table with the predictions from each model.
Examples
>>> adata = scmkl.create_adata(X = data_mat,
... feature_names = gene_names,
... group_dict = group_dict)
>>>
>>> results = scmkl.one_v_rest(adatas = [adata], names = ['rna'],
... alpha_list = np.array([0.05, 0.1]),
... tfidf = [False])
>>>
>>> adata.keys()
dict_keys(['B cells', 'Monocytes', 'Dendritic cells', ...])