scmkl.optimize_sparsity

View Source

 1import numpy as np
 2
 3from scmkl.train_model import train_model
 4from scmkl.test import find_selected_groups
 5
 6
 7def optimize_sparsity(adata, group_size, starting_alpha = 1.9, 
 8                      increment = 0.2, target = 1, n_iter = 10):
 9    '''
10    Iteratively train a grouplasso model and update alpha to find the 
11    parameter yielding the desired sparsity.
12    
13    Parameters
14    ----------
15    **adata** : *AnnData*
16        > `AnnData` with `'Z_train'` and `'Z_test'` in 
17        `adata.uns.keys()`.
18
19    **group_size** : *int* 
20        > Argument describing how the features are grouped. Should be
21        `2 * D`. For more information see celer documentation.
22
23    **starting_alpha** : *float*
24        > The alpha value to start the search at.
25    
26    **increment** : *float* 
27        > Amount to adjust alpha by between iterations.
28    
29    **target** : *int*
30        > The desired number of groups selected by the model.
31
32    **n_iter** : *int*
33        > The maximum number of iterations to run.
34            
35    Returns
36    -------
37    **sparsity_dict** : *dict*
38        > Tested alpha as keys and the number of selected pathways as 
39        the values.
40        
41    **alpha** : *float*
42        >The alpha value yielding the number of selected groups closest 
43        to the target.
44
45    Examples
46    --------
47    >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, (2 * D), 
48    ...                                                target = 1)
49    >>>
50    >>> alpha
51    0.01
52
53    See Also
54    --------
55    celer.GroupLasso : 
56    https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
57    '''
58    assert increment > 0 and increment < starting_alpha, ("Choose a positive "
59                                                          "increment less "
60                                                          "than alpha")
61    assert target > 0 and isinstance(target, int), ("Choose an integer "
62                                                    "target number of groups "
63                                                     "that is greater than 0")
64    assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer "
65                                                    "number of iterations "
66                                                    "that is greater than 0")
67
68    sparsity_dict = {}
69    alpha = starting_alpha
70
71    for _ in np.arange(n_iter):
72        adata = train_model(adata, group_size, alpha)
73        num_selected = len(find_selected_groups(adata))
74
75        sparsity_dict[np.round(alpha, 4)] = num_selected
76
77        if num_selected < target:
78            #Decreasing alpha will increase the number of selected pathways
79            if alpha - increment in sparsity_dict.keys():
80                # Make increment smaller so the model can't go back and forth 
81                # between alpha values
82                increment /= 2
83            # Ensures that alpha will never be negative
84            alpha = np.max([alpha - increment, 1e-3]) 
85
86        elif num_selected > target:
87            if alpha + increment in sparsity_dict.keys():
88                increment /= 2
89
90            alpha += increment
91        elif num_selected == target:
92            break
93
94    # Find the alpha that minimizes the difference between target and observed
95    # number of selected groups
96    spar_idx = np.argmin([np.abs(selected - target) 
97                          for selected in sparsity_dict.values()])
98    optimal_alpha = list(sparsity_dict.keys())[spar_idx]
99    return sparsity_dict, optimal_alpha

def optimize_sparsity( adata, group_size, starting_alpha=1.9, increment=0.2, target=1, n_iter=10): View Source

  8def optimize_sparsity(adata, group_size, starting_alpha = 1.9, 
  9                      increment = 0.2, target = 1, n_iter = 10):
 10    '''
 11    Iteratively train a grouplasso model and update alpha to find the 
 12    parameter yielding the desired sparsity.
 13    
 14    Parameters
 15    ----------
 16    **adata** : *AnnData*
 17        > `AnnData` with `'Z_train'` and `'Z_test'` in 
 18        `adata.uns.keys()`.
 19
 20    **group_size** : *int* 
 21        > Argument describing how the features are grouped. Should be
 22        `2 * D`. For more information see celer documentation.
 23
 24    **starting_alpha** : *float*
 25        > The alpha value to start the search at.
 26    
 27    **increment** : *float* 
 28        > Amount to adjust alpha by between iterations.
 29    
 30    **target** : *int*
 31        > The desired number of groups selected by the model.
 32
 33    **n_iter** : *int*
 34        > The maximum number of iterations to run.
 35            
 36    Returns
 37    -------
 38    **sparsity_dict** : *dict*
 39        > Tested alpha as keys and the number of selected pathways as 
 40        the values.
 41        
 42    **alpha** : *float*
 43        >The alpha value yielding the number of selected groups closest 
 44        to the target.
 45
 46    Examples
 47    --------
 48    >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, (2 * D), 
 49    ...                                                target = 1)
 50    >>>
 51    >>> alpha
 52    0.01
 53
 54    See Also
 55    --------
 56    celer.GroupLasso : 
 57    https://mathurinm.github.io/celer/generated/celer.GroupLasso.html
 58    '''
 59    assert increment > 0 and increment < starting_alpha, ("Choose a positive "
 60                                                          "increment less "
 61                                                          "than alpha")
 62    assert target > 0 and isinstance(target, int), ("Choose an integer "
 63                                                    "target number of groups "
 64                                                     "that is greater than 0")
 65    assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer "
 66                                                    "number of iterations "
 67                                                    "that is greater than 0")
 68
 69    sparsity_dict = {}
 70    alpha = starting_alpha
 71
 72    for _ in np.arange(n_iter):
 73        adata = train_model(adata, group_size, alpha)
 74        num_selected = len(find_selected_groups(adata))
 75
 76        sparsity_dict[np.round(alpha, 4)] = num_selected
 77
 78        if num_selected < target:
 79            #Decreasing alpha will increase the number of selected pathways
 80            if alpha - increment in sparsity_dict.keys():
 81                # Make increment smaller so the model can't go back and forth 
 82                # between alpha values
 83                increment /= 2
 84            # Ensures that alpha will never be negative
 85            alpha = np.max([alpha - increment, 1e-3]) 
 86
 87        elif num_selected > target:
 88            if alpha + increment in sparsity_dict.keys():
 89                increment /= 2
 90
 91            alpha += increment
 92        elif num_selected == target:
 93            break
 94
 95    # Find the alpha that minimizes the difference between target and observed
 96    # number of selected groups
 97    spar_idx = np.argmin([np.abs(selected - target) 
 98                          for selected in sparsity_dict.values()])
 99    optimal_alpha = list(sparsity_dict.keys())[spar_idx]
100    return sparsity_dict, optimal_alpha

Iteratively train a grouplasso model and update alpha to find the parameter yielding the desired sparsity.

Parameters

adata : AnnData

AnnData with 'Z_train' and 'Z_test' in adata.uns.keys().

group_size : int

Argument describing how the features are grouped. Should be 2 * D. For more information see celer documentation.

starting_alpha : float

The alpha value to start the search at.

increment : float

Amount to adjust alpha by between iterations.

target : int

The desired number of groups selected by the model.

n_iter : int

The maximum number of iterations to run.

Returns

sparsity_dict : dict

Tested alpha as keys and the number of selected pathways as the values.

alpha : float

The alpha value yielding the number of selected groups closest to the target.

Examples

>>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, (2 * D), 
...                                                target = 1)
>>>
>>> alpha
0.01