scmkl.train_model

 1import numpy as np
 2import celer
 3import anndata as ad
 4
 5
 6def train_model(adata: ad.AnnData, group_size: int | None=None, alpha:float=0.9):
 7    """
 8    Fit a grouplasso model to the provided data.
 9
10    Parameters
11    ----------
12    adata : ad.AnnData 
13        Has `'Z_train'` and `'Z_test'` keys in `.uns.keys()`.
14
15    group_size : None | int
16        Argument describing how the features are grouped. If `None`, 
17        `2 * adata.uns['D']` will be used. For more information see 
18        [celer documentation](https://mathurinm.github.io/celer/
19        generated/celer.GroupLasso.html).
20            
21    alpha : float
22        Group Lasso regularization coefficient, is a floating point 
23        value controlling model solution sparsity. Must be a positive 
24        float. The smaller the value, the more feature groups will be 
25        selected in the trained model.
26    
27    Returns
28    -------
29    adata : ad.AnnData 
30        Trained model accessible with `adata.uns['model']`.
31
32    Examples
33    --------
34    >>> adata = scmkl.estimate_sigma(adata)
35    >>> adata = scmkl.calculate_z(adata)
36    >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
37    ...            'Recall']
38    >>> d = scmkl.calculate_d(adata.shape[0])
39    >>> group_size = 2 * d
40    >>> adata = scmkl.train_model(adata, group_size)
41    >>>
42    >>> 'model' in adata.uns.keys()
43    True
44
45    See Also
46    --------
47    celer :
48        https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
49    """
50    assert alpha > 0, 'Alpha must be positive'
51
52    if group_size is None:
53        group_size = 2*adata.uns['D']
54
55    y_train = adata.obs['labels'].iloc[adata.uns['train_indices']]
56    X_train = adata.uns['Z_train'][adata.uns['train_indices']]
57
58    cell_labels = np.unique(y_train)
59
60    # This is a regression algorithm. We need to make the labels 'continuous' 
61    # for classification, but they will remain binary. Casts training labels 
62    # to array of -1,1
63    train_labels = np.ones(y_train.shape)
64    train_labels[y_train == cell_labels[1]] = -1
65
66    # Alphamax is a calculation to regularize the effect of alpha across 
67    # different data sets
68    alphamax = np.max(np.abs(X_train.T.dot(train_labels)))
69    alphamax /= X_train.shape[0] 
70    alphamax *= alpha
71
72    # Instantiate celer Group Lasso Regression Model Object
73    model = celer.GroupLasso(groups = group_size, alpha = alphamax)
74
75    # Fit model using training data
76    model.fit(X_train, train_labels.ravel())
77
78    adata.uns['model'] = model
79    return adata
def train_model( adata: anndata._core.anndata.AnnData, group_size: int | None = None, alpha: float = 0.9):
 7def train_model(adata: ad.AnnData, group_size: int | None=None, alpha:float=0.9):
 8    """
 9    Fit a grouplasso model to the provided data.
10
11    Parameters
12    ----------
13    adata : ad.AnnData 
14        Has `'Z_train'` and `'Z_test'` keys in `.uns.keys()`.
15
16    group_size : None | int
17        Argument describing how the features are grouped. If `None`, 
18        `2 * adata.uns['D']` will be used. For more information see 
19        [celer documentation](https://mathurinm.github.io/celer/
20        generated/celer.GroupLasso.html).
21            
22    alpha : float
23        Group Lasso regularization coefficient, is a floating point 
24        value controlling model solution sparsity. Must be a positive 
25        float. The smaller the value, the more feature groups will be 
26        selected in the trained model.
27    
28    Returns
29    -------
30    adata : ad.AnnData 
31        Trained model accessible with `adata.uns['model']`.
32
33    Examples
34    --------
35    >>> adata = scmkl.estimate_sigma(adata)
36    >>> adata = scmkl.calculate_z(adata)
37    >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
38    ...            'Recall']
39    >>> d = scmkl.calculate_d(adata.shape[0])
40    >>> group_size = 2 * d
41    >>> adata = scmkl.train_model(adata, group_size)
42    >>>
43    >>> 'model' in adata.uns.keys()
44    True
45
46    See Also
47    --------
48    celer :
49        https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
50    """
51    assert alpha > 0, 'Alpha must be positive'
52
53    if group_size is None:
54        group_size = 2*adata.uns['D']
55
56    y_train = adata.obs['labels'].iloc[adata.uns['train_indices']]
57    X_train = adata.uns['Z_train'][adata.uns['train_indices']]
58
59    cell_labels = np.unique(y_train)
60
61    # This is a regression algorithm. We need to make the labels 'continuous' 
62    # for classification, but they will remain binary. Casts training labels 
63    # to array of -1,1
64    train_labels = np.ones(y_train.shape)
65    train_labels[y_train == cell_labels[1]] = -1
66
67    # Alphamax is a calculation to regularize the effect of alpha across 
68    # different data sets
69    alphamax = np.max(np.abs(X_train.T.dot(train_labels)))
70    alphamax /= X_train.shape[0] 
71    alphamax *= alpha
72
73    # Instantiate celer Group Lasso Regression Model Object
74    model = celer.GroupLasso(groups = group_size, alpha = alphamax)
75
76    # Fit model using training data
77    model.fit(X_train, train_labels.ravel())
78
79    adata.uns['model'] = model
80    return adata

Fit a grouplasso model to the provided data.

Parameters
  • adata (ad.AnnData): Has 'Z_train' and 'Z_test' keys in .uns.keys().
  • group_size (None | int): Argument describing how the features are grouped. If None, 2 * adata.uns['D'] will be used. For more information see celer documentation.
  • alpha (float): Group Lasso regularization coefficient, is a floating point value controlling model solution sparsity. Must be a positive float. The smaller the value, the more feature groups will be selected in the trained model.
Returns
  • adata (ad.AnnData): Trained model accessible with adata.uns['model'].
Examples
>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
...            'Recall']
>>> d = scmkl.calculate_d(adata.shape[0])
>>> group_size = 2 * d
>>> adata = scmkl.train_model(adata, group_size)
>>>
>>> 'model' in adata.uns.keys()
True
See Also

celer: https://mathurinm.github.io/celer/generated/celer.GroupLasso.html