scmkl.optimize_sparsity

  1import numpy as np
  2import anndata as ad
  3
  4from scmkl.train_model import train_model
  5from scmkl.test import find_selected_groups
  6
  7
  8def optimize_sparsity(adata: ad.AnnData, group_size: int | None=None, starting_alpha = 1.9, 
  9                      increment = 0.2, target = 1, n_iter = 10):
 10    """
 11    Iteratively train a grouplasso model and update alpha to find the 
 12    parameter yielding the desired sparsity.
 13    
 14    Parameters
 15    ----------
 16    adata : ad.AnnData
 17        `ad.AnnData` with `'Z_train'` and `'Z_test'` in 
 18        `adata.uns.keys()`.
 19
 20    group_size : None | int
 21        Argument describing how the features are grouped. If `None`, 
 22        `2 * adata.uns['D']` will be used. For more information see 
 23        [celer documentation](https://mathurinm.github.io/celer/
 24        generated/celer.GroupLasso.html).
 25
 26    starting_alpha : float
 27        The alpha value to start the search at.
 28    
 29    increment : float 
 30        Amount to adjust alpha by between iterations.
 31    
 32    target : int
 33        The desired number of groups selected by the model.
 34
 35    n_iter : int
 36        The maximum number of iterations to run.
 37            
 38    Returns
 39    -------
 40    sparsity_dict : dict
 41        Tested alpha as keys and the number of selected groups as 
 42        the values.
 43        
 44    alpha : float
 45        The alpha value yielding the number of selected groups closest 
 46        to the target.
 47
 48    Examples
 49    --------
 50    >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, 
 51    ...                                                target = 1)
 52    >>>
 53    >>> alpha
 54    0.01
 55
 56    See Also
 57    --------
 58    celer.GroupLasso : https://mathurinm.github.io/celer/
 59    """
 60    assert increment > 0 and increment < starting_alpha, ("Choose a positive "
 61                                                          "increment less "
 62                                                          "than alpha")
 63    assert target > 0 and isinstance(target, int), ("Choose an integer "
 64                                                    "target number of groups "
 65                                                     "that is greater than 0")
 66    assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer "
 67                                                    "number of iterations "
 68                                                    "that is greater than 0")
 69
 70    if group_size == None:
 71        group_size = adata.uns['D']*2
 72
 73    sparsity_dict = {}
 74    alpha = starting_alpha
 75
 76    for _ in np.arange(n_iter):
 77        adata = train_model(adata, group_size, alpha)
 78        num_selected = len(find_selected_groups(adata))
 79
 80        sparsity_dict[np.round(alpha, 4)] = num_selected
 81
 82        if num_selected < target:
 83            #Decreasing alpha will increase the number of selected pathways
 84            if alpha - increment in sparsity_dict.keys():
 85                # Make increment smaller so the model can't go back and forth 
 86                # between alpha values
 87                increment/=2
 88            # Ensures that alpha will never be negative
 89            alpha = np.max([alpha - increment, 1e-3]) 
 90
 91        elif num_selected > target:
 92            if alpha + increment in sparsity_dict.keys():
 93                increment/=2
 94
 95            alpha += increment
 96        elif num_selected == target:
 97            break
 98
 99    # Find the alpha that minimizes the difference between target and observed
100    # number of selected groups
101    spar_idx = np.argmin([np.abs(selected - target) 
102                          for selected in sparsity_dict.values()])
103    optimal_alpha = list(sparsity_dict.keys())[spar_idx]
104    
105    return sparsity_dict, optimal_alpha
def optimize_sparsity( adata: anndata._core.anndata.AnnData, group_size: int | None = None, starting_alpha=1.9, increment=0.2, target=1, n_iter=10):
  9def optimize_sparsity(adata: ad.AnnData, group_size: int | None=None, starting_alpha = 1.9, 
 10                      increment = 0.2, target = 1, n_iter = 10):
 11    """
 12    Iteratively train a grouplasso model and update alpha to find the 
 13    parameter yielding the desired sparsity.
 14    
 15    Parameters
 16    ----------
 17    adata : ad.AnnData
 18        `ad.AnnData` with `'Z_train'` and `'Z_test'` in 
 19        `adata.uns.keys()`.
 20
 21    group_size : None | int
 22        Argument describing how the features are grouped. If `None`, 
 23        `2 * adata.uns['D']` will be used. For more information see 
 24        [celer documentation](https://mathurinm.github.io/celer/
 25        generated/celer.GroupLasso.html).
 26
 27    starting_alpha : float
 28        The alpha value to start the search at.
 29    
 30    increment : float 
 31        Amount to adjust alpha by between iterations.
 32    
 33    target : int
 34        The desired number of groups selected by the model.
 35
 36    n_iter : int
 37        The maximum number of iterations to run.
 38            
 39    Returns
 40    -------
 41    sparsity_dict : dict
 42        Tested alpha as keys and the number of selected groups as 
 43        the values.
 44        
 45    alpha : float
 46        The alpha value yielding the number of selected groups closest 
 47        to the target.
 48
 49    Examples
 50    --------
 51    >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, 
 52    ...                                                target = 1)
 53    >>>
 54    >>> alpha
 55    0.01
 56
 57    See Also
 58    --------
 59    celer.GroupLasso : https://mathurinm.github.io/celer/
 60    """
 61    assert increment > 0 and increment < starting_alpha, ("Choose a positive "
 62                                                          "increment less "
 63                                                          "than alpha")
 64    assert target > 0 and isinstance(target, int), ("Choose an integer "
 65                                                    "target number of groups "
 66                                                     "that is greater than 0")
 67    assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer "
 68                                                    "number of iterations "
 69                                                    "that is greater than 0")
 70
 71    if group_size == None:
 72        group_size = adata.uns['D']*2
 73
 74    sparsity_dict = {}
 75    alpha = starting_alpha
 76
 77    for _ in np.arange(n_iter):
 78        adata = train_model(adata, group_size, alpha)
 79        num_selected = len(find_selected_groups(adata))
 80
 81        sparsity_dict[np.round(alpha, 4)] = num_selected
 82
 83        if num_selected < target:
 84            #Decreasing alpha will increase the number of selected pathways
 85            if alpha - increment in sparsity_dict.keys():
 86                # Make increment smaller so the model can't go back and forth 
 87                # between alpha values
 88                increment/=2
 89            # Ensures that alpha will never be negative
 90            alpha = np.max([alpha - increment, 1e-3]) 
 91
 92        elif num_selected > target:
 93            if alpha + increment in sparsity_dict.keys():
 94                increment/=2
 95
 96            alpha += increment
 97        elif num_selected == target:
 98            break
 99
100    # Find the alpha that minimizes the difference between target and observed
101    # number of selected groups
102    spar_idx = np.argmin([np.abs(selected - target) 
103                          for selected in sparsity_dict.values()])
104    optimal_alpha = list(sparsity_dict.keys())[spar_idx]
105    
106    return sparsity_dict, optimal_alpha

Iteratively train a grouplasso model and update alpha to find the parameter yielding the desired sparsity.

Parameters
  • adata (ad.AnnData): ad.AnnData with 'Z_train' and 'Z_test' in adata.uns.keys().
  • group_size (None | int): Argument describing how the features are grouped. If None, 2 * adata.uns['D'] will be used. For more information see celer documentation.
  • starting_alpha (float): The alpha value to start the search at.
  • increment (float): Amount to adjust alpha by between iterations.
  • target (int): The desired number of groups selected by the model.
  • n_iter (int): The maximum number of iterations to run.
Returns
  • sparsity_dict (dict): Tested alpha as keys and the number of selected groups as the values.
  • alpha (float): The alpha value yielding the number of selected groups closest to the target.
Examples
>>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, 
...                                                target = 1)
>>>
>>> alpha
0.01
See Also

celer.GroupLasso: https://mathurinm.github.io/celer/