scmkl.optimize_sparsity
1import numpy as np 2import anndata as ad 3 4from scmkl.train_model import train_model 5from scmkl.test import find_selected_groups 6 7 8def optimize_sparsity(adata: ad.AnnData, group_size: int | None=None, starting_alpha = 1.9, 9 increment = 0.2, target = 1, n_iter = 10): 10 """ 11 Iteratively train a grouplasso model and update alpha to find the 12 parameter yielding the desired sparsity. 13 14 Parameters 15 ---------- 16 adata : ad.AnnData 17 `ad.AnnData` with `'Z_train'` and `'Z_test'` in 18 `adata.uns.keys()`. 19 20 group_size : None | int 21 Argument describing how the features are grouped. If `None`, 22 `2 * adata.uns['D']` will be used. For more information see 23 [celer documentation](https://mathurinm.github.io/celer/ 24 generated/celer.GroupLasso.html). 25 26 starting_alpha : float 27 The alpha value to start the search at. 28 29 increment : float 30 Amount to adjust alpha by between iterations. 31 32 target : int 33 The desired number of groups selected by the model. 34 35 n_iter : int 36 The maximum number of iterations to run. 37 38 Returns 39 ------- 40 sparsity_dict : dict 41 Tested alpha as keys and the number of selected groups as 42 the values. 43 44 alpha : float 45 The alpha value yielding the number of selected groups closest 46 to the target. 47 48 Examples 49 -------- 50 >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, 51 ... target = 1) 52 >>> 53 >>> alpha 54 0.01 55 56 See Also 57 -------- 58 celer.GroupLasso : https://mathurinm.github.io/celer/ 59 """ 60 assert increment > 0 and increment < starting_alpha, ("Choose a positive " 61 "increment less " 62 "than alpha") 63 assert target > 0 and isinstance(target, int), ("Choose an integer " 64 "target number of groups " 65 "that is greater than 0") 66 assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer " 67 "number of iterations " 68 "that is greater than 0") 69 70 if group_size == None: 71 group_size = adata.uns['D']*2 72 73 sparsity_dict = {} 74 alpha = starting_alpha 75 76 for _ in np.arange(n_iter): 77 adata = train_model(adata, group_size, alpha) 78 num_selected = len(find_selected_groups(adata)) 79 80 sparsity_dict[np.round(alpha, 4)] = num_selected 81 82 if num_selected < target: 83 #Decreasing alpha will increase the number of selected pathways 84 if alpha - increment in sparsity_dict.keys(): 85 # Make increment smaller so the model can't go back and forth 86 # between alpha values 87 increment/=2 88 # Ensures that alpha will never be negative 89 alpha = np.max([alpha - increment, 1e-3]) 90 91 elif num_selected > target: 92 if alpha + increment in sparsity_dict.keys(): 93 increment/=2 94 95 alpha += increment 96 elif num_selected == target: 97 break 98 99 # Find the alpha that minimizes the difference between target and observed 100 # number of selected groups 101 spar_idx = np.argmin([np.abs(selected - target) 102 for selected in sparsity_dict.values()]) 103 optimal_alpha = list(sparsity_dict.keys())[spar_idx] 104 105 return sparsity_dict, optimal_alpha
def
optimize_sparsity( adata: anndata._core.anndata.AnnData, group_size: int | None = None, starting_alpha=1.9, increment=0.2, target=1, n_iter=10):
9def optimize_sparsity(adata: ad.AnnData, group_size: int | None=None, starting_alpha = 1.9, 10 increment = 0.2, target = 1, n_iter = 10): 11 """ 12 Iteratively train a grouplasso model and update alpha to find the 13 parameter yielding the desired sparsity. 14 15 Parameters 16 ---------- 17 adata : ad.AnnData 18 `ad.AnnData` with `'Z_train'` and `'Z_test'` in 19 `adata.uns.keys()`. 20 21 group_size : None | int 22 Argument describing how the features are grouped. If `None`, 23 `2 * adata.uns['D']` will be used. For more information see 24 [celer documentation](https://mathurinm.github.io/celer/ 25 generated/celer.GroupLasso.html). 26 27 starting_alpha : float 28 The alpha value to start the search at. 29 30 increment : float 31 Amount to adjust alpha by between iterations. 32 33 target : int 34 The desired number of groups selected by the model. 35 36 n_iter : int 37 The maximum number of iterations to run. 38 39 Returns 40 ------- 41 sparsity_dict : dict 42 Tested alpha as keys and the number of selected groups as 43 the values. 44 45 alpha : float 46 The alpha value yielding the number of selected groups closest 47 to the target. 48 49 Examples 50 -------- 51 >>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata, 52 ... target = 1) 53 >>> 54 >>> alpha 55 0.01 56 57 See Also 58 -------- 59 celer.GroupLasso : https://mathurinm.github.io/celer/ 60 """ 61 assert increment > 0 and increment < starting_alpha, ("Choose a positive " 62 "increment less " 63 "than alpha") 64 assert target > 0 and isinstance(target, int), ("Choose an integer " 65 "target number of groups " 66 "that is greater than 0") 67 assert n_iter > 0 and isinstance(n_iter, int), ("Choose an integer " 68 "number of iterations " 69 "that is greater than 0") 70 71 if group_size == None: 72 group_size = adata.uns['D']*2 73 74 sparsity_dict = {} 75 alpha = starting_alpha 76 77 for _ in np.arange(n_iter): 78 adata = train_model(adata, group_size, alpha) 79 num_selected = len(find_selected_groups(adata)) 80 81 sparsity_dict[np.round(alpha, 4)] = num_selected 82 83 if num_selected < target: 84 #Decreasing alpha will increase the number of selected pathways 85 if alpha - increment in sparsity_dict.keys(): 86 # Make increment smaller so the model can't go back and forth 87 # between alpha values 88 increment/=2 89 # Ensures that alpha will never be negative 90 alpha = np.max([alpha - increment, 1e-3]) 91 92 elif num_selected > target: 93 if alpha + increment in sparsity_dict.keys(): 94 increment/=2 95 96 alpha += increment 97 elif num_selected == target: 98 break 99 100 # Find the alpha that minimizes the difference between target and observed 101 # number of selected groups 102 spar_idx = np.argmin([np.abs(selected - target) 103 for selected in sparsity_dict.values()]) 104 optimal_alpha = list(sparsity_dict.keys())[spar_idx] 105 106 return sparsity_dict, optimal_alpha
Iteratively train a grouplasso model and update alpha to find the parameter yielding the desired sparsity.
Parameters
- adata (ad.AnnData):
ad.AnnData
with'Z_train'
and'Z_test'
inadata.uns.keys()
. - group_size (None | int):
Argument describing how the features are grouped. If
None
,2 * adata.uns['D']
will be used. For more information see celer documentation. - starting_alpha (float): The alpha value to start the search at.
- increment (float): Amount to adjust alpha by between iterations.
- target (int): The desired number of groups selected by the model.
- n_iter (int): The maximum number of iterations to run.
Returns
- sparsity_dict (dict): Tested alpha as keys and the number of selected groups as the values.
- alpha (float): The alpha value yielding the number of selected groups closest to the target.
Examples
>>> sparcity_dict, alpha = scmkl.optimize_sparsity(adata,
... target = 1)
>>>
>>> alpha
0.01
See Also
celer.GroupLasso
: https://mathurinm.github.io/celer/