scmkl.train_model

View Source

 1import numpy as np
 2import celer
 3
 4
 5def train_model(adata, group_size = 1, alpha = 0.9):
 6    '''
 7    Fit a grouplasso model to the provided data.
 8
 9    Parameters
10    ----------
11    **adata** : *AnnData* 
12        > Has `'Z_train'` and `'Z_test'` keys in `adata.uns`.
13
14    **group_size** : *int* 
15        > Argument describing how the features are grouped. Should be
16        `2 * D`. For more information see celer documentation. 
17            
18    **alpha** : *float*
19        > Group Lasso regularization coefficient. alpha is a floating 
20        point value controlling model solution sparsity. Must be a 
21        positive float. The smaller the value, the more feature groups 
22        will be selected in the trained model.
23    
24    Returns
25    -------
26    **adata** : *AnnData* 
27        > Trained model accessible with `adata.uns['model']`.
28
29    Examples
30    --------
31    >>> adata = scmkl.estimate_sigma(adata)
32    >>> adata = scmkl.calculate_z(adata)
33    >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
34    ...            'Recall']
35    >>> d = scmkl.calculate_d(adata.shape[0])
36    >>> group_size = 2 * d
37    >>> adata = scmkl.train_model(adata, group_size)
38    >>>
39    >>> 'model' in adata.uns.keys()
40    True
41
42    See Also
43    --------
44    celer documentation :
45    https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
46    '''
47    assert alpha > 0, 'Alpha must be positive'
48
49    y_train = adata.obs['labels'].iloc[adata.uns['train_indices']]
50    X_train = adata.uns['Z_train']
51
52    cell_labels = np.unique(y_train)
53
54    # This is a regression algorithm. We need to make the labels 'continuous' 
55    # for classification, but they will remain binary. Casts training labels 
56    # to array of -1,1
57    train_labels = np.ones(y_train.shape)
58    train_labels[y_train == cell_labels[1]] = -1
59
60    # Alphamax is a calculation to regularize the effect of alpha across 
61    # different data sets
62    alphamax = np.max(np.abs(X_train.T.dot(train_labels)))
63    alphamax /= X_train.shape[0] 
64    alphamax *= alpha
65
66    # Instantiate celer Group Lasso Regression Model Object
67    model = celer.GroupLasso(groups = group_size, alpha = alphamax)
68
69    # Fit model using training data
70    model.fit(X_train, train_labels.ravel())
71
72    adata.uns['model'] = model
73    return adata

def train_model(adata, group_size=1, alpha=0.9): View Source

 6def train_model(adata, group_size = 1, alpha = 0.9):
 7    '''
 8    Fit a grouplasso model to the provided data.
 9
10    Parameters
11    ----------
12    **adata** : *AnnData* 
13        > Has `'Z_train'` and `'Z_test'` keys in `adata.uns`.
14
15    **group_size** : *int* 
16        > Argument describing how the features are grouped. Should be
17        `2 * D`. For more information see celer documentation. 
18            
19    **alpha** : *float*
20        > Group Lasso regularization coefficient. alpha is a floating 
21        point value controlling model solution sparsity. Must be a 
22        positive float. The smaller the value, the more feature groups 
23        will be selected in the trained model.
24    
25    Returns
26    -------
27    **adata** : *AnnData* 
28        > Trained model accessible with `adata.uns['model']`.
29
30    Examples
31    --------
32    >>> adata = scmkl.estimate_sigma(adata)
33    >>> adata = scmkl.calculate_z(adata)
34    >>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
35    ...            'Recall']
36    >>> d = scmkl.calculate_d(adata.shape[0])
37    >>> group_size = 2 * d
38    >>> adata = scmkl.train_model(adata, group_size)
39    >>>
40    >>> 'model' in adata.uns.keys()
41    True
42
43    See Also
44    --------
45    celer documentation :
46    https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
47    '''
48    assert alpha > 0, 'Alpha must be positive'
49
50    y_train = adata.obs['labels'].iloc[adata.uns['train_indices']]
51    X_train = adata.uns['Z_train']
52
53    cell_labels = np.unique(y_train)
54
55    # This is a regression algorithm. We need to make the labels 'continuous' 
56    # for classification, but they will remain binary. Casts training labels 
57    # to array of -1,1
58    train_labels = np.ones(y_train.shape)
59    train_labels[y_train == cell_labels[1]] = -1
60
61    # Alphamax is a calculation to regularize the effect of alpha across 
62    # different data sets
63    alphamax = np.max(np.abs(X_train.T.dot(train_labels)))
64    alphamax /= X_train.shape[0] 
65    alphamax *= alpha
66
67    # Instantiate celer Group Lasso Regression Model Object
68    model = celer.GroupLasso(groups = group_size, alpha = alphamax)
69
70    # Fit model using training data
71    model.fit(X_train, train_labels.ravel())
72
73    adata.uns['model'] = model
74    return adata

Fit a grouplasso model to the provided data.

Parameters

adata : AnnData

Has 'Z_train' and 'Z_test' keys in adata.uns.

group_size : int

Argument describing how the features are grouped. Should be 2 * D. For more information see celer documentation.

alpha : float

Group Lasso regularization coefficient. alpha is a floating point value controlling model solution sparsity. Must be a positive float. The smaller the value, the more feature groups will be selected in the trained model.

Returns

adata : AnnData

Trained model accessible with adata.uns['model'].

Examples

>>> adata = scmkl.estimate_sigma(adata)
>>> adata = scmkl.calculate_z(adata)
>>> metrics = ['AUROC', 'F1-Score', 'Accuracy', 'Precision', 
...            'Recall']
>>> d = scmkl.calculate_d(adata.shape[0])
>>> group_size = 2 * d
>>> adata = scmkl.train_model(adata, group_size)
>>>
>>> 'model' in adata.uns.keys()
True