scmkl.plotting
1import numpy as np 2import pandas as pd 3import matplotlib.pyplot as plt 4import itertools 5 6from plotnine import * 7from sklearn.metrics import confusion_matrix 8 9 10def plot_conf_mat(results, title = '', cmap = None, normalize = True, 11 alpha = None, save = None) -> None: 12 ''' 13 Creates a confusion matrix from the output of scMKL. 14 15 Parameters 16 ---------- 17 **results** : *dict* 18 > The output from either scmkl.run() or scmkl.one_v_rest() 19 containing results from scMKL. 20 21 **title** : *str* 22 > The text to display at the top of the matrix. 23 24 **cmap** : *matplotlib.colors.LinearSegmentedColormap* 25 > The gradient of the values displayed from matplotlib.pyplot. 26 If *None*, `'Purples'` is used see matplotlib color map 27 reference for more information. 28 29 **normalize** : *bool* 30 > If False, plot the raw numbers. If True, plot the 31 proportions. 32 33 **alpha** : *None* | *float* 34 > Alpha that matrix should be created for. If `results` is from 35 `scmkl.one_v_all()`, this is ignored. If *None*, smallest alpha 36 will be used. 37 38 **save** : *None* | *str* 39 > File path to save plot. If *None*, plot is not saved. 40 41 Returns 42 ------- 43 *None* 44 45 Examples 46 -------- 47 >>> # Running scmkl and capturing results 48 >>> results = scmkl.run(adata = adata, alpha_list = alpha_list) 49 >>> 50 >>> from matplotlib.pyplot import get_cmap 51 >>> 52 >>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues')) 53 54 Citiation 55 --------- 56 http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html 57 ''' 58 # Determining type of results 59 if ('Observed' in results.keys()) and ('Metrics' in results.keys()): 60 multi_class = False 61 names = np.unique(results['Observed']) 62 else: 63 multi_class = True 64 names = np.unique(results['Truth_labels']) 65 66 if multi_class: 67 cm = confusion_matrix(y_true = results['Truth_labels'], 68 y_pred = results['Predicted_class'], 69 labels = names) 70 else: 71 min_alpha = np.min(list(results['Metrics'].keys())) 72 alpha = alpha if alpha != None else min_alpha 73 cm = confusion_matrix(y_true = results['Observed'], 74 y_pred = results['Predictions'][alpha], 75 labels = names) 76 77 accuracy = np.trace(cm) / float(np.sum(cm)) 78 misclass = 1 - accuracy 79 80 if cmap is None: 81 cmap = plt.get_cmap('Purples') 82 83 if normalize: 84 cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 85 86 plt.figure(figsize=(8, 6)) 87 plt.imshow(cm, interpolation='nearest', cmap=cmap) 88 plt.title(title) 89 plt.colorbar() 90 91 tick_marks = np.arange(len(names)) 92 plt.xticks(tick_marks, names, rotation=45) 93 plt.yticks(tick_marks, names) 94 95 96 thresh = cm.max() / 1.5 if normalize else cm.max() / 2 97 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 98 if normalize: 99 plt.text(j, i, "{:0.4f}".format(cm[i, j]), 100 horizontalalignment="center", 101 color="white" if cm[i, j] > thresh else "black") 102 else: 103 plt.text(j, i, "{:,}".format(cm[i, j]), 104 horizontalalignment="center", 105 color="white" if cm[i, j] > thresh else "black") 106 107 acc_label = 'Predicted label\naccuracy={:0.4f}; misclass={:0.4f}' 108 acc_label = acc_label.format(accuracy, misclass) 109 110 plt.tight_layout() 111 plt.ylabel('True label') 112 plt.xlabel(acc_label) 113 plt.show() 114 115 if save != None: 116 plt.savefig(save) 117 118 119def plot_metric(summary_df : pd.DataFrame, alpha_star = None, color = 'red'): 120 ''' 121 Takes a data frame of model metrics and optionally alpha star and 122 creates a scatter plot given metric against alpha values. 123 124 Parameters 125 ---------- 126 **summary_df** : *pd.DataFrame* 127 > A data frame created by `scmkl.get_summary()`. 128 129 **alpha_star** : *None* | *float* 130 > If not *None*, a label will be added for tuned alpha_star 131 being optimal model parameter for performance from cross 132 validation on the training data. Can be calculated with 133 `scmkl.optimize_alpha()`. 134 135 **color** : *str* 136 > Color to make points on plot. 137 138 Returns 139 ------- 140 **metric_plot** : *plotnine.ggplot* 141 > A plot with alpha values on x-axis and metric on y-axis. 142 143 Examples 144 -------- 145 >>> results = scmkl.run(adata, alpha_list) 146 >>> summary_df = scmkl.get_summary(results) 147 >>> metric_plot = plot_metric(results) 148 >>> 149 >>> metric_plot.save('scMKL_performance.png') 150 ''' 151 # Capturing metric from summary_df 152 metric_options = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 153 metric = np.intersect1d(metric_options, summary_df.columns)[0] 154 155 alpha_list = np.unique(summary_df['Alpha']) 156 157 # Calculating alpha_star y_pos if present 158 if alpha_star != None: 159 best_rows = summary_df['Alpha'] == alpha_star 160 alpha_star_metric = float(summary_df[best_rows][metric]) 161 162 metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 163 + geom_point(fill = color, color = color) 164 + theme_classic() 165 + ylim(0.6, 1) 166 + scale_x_reverse(breaks = alpha_list) 167 + annotate('text', x = alpha_star, 168 y = alpha_star_metric - 0.04, 169 label='|\nAlpha\nStar') 170 ) 171 172 else: 173 metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 174 + geom_point(fill = color, color = color) 175 + theme_classic() 176 + ylim(0.6, 1) 177 + scale_x_reverse(breaks = alpha_list)) 178 179 return metric_plot
11def plot_conf_mat(results, title = '', cmap = None, normalize = True, 12 alpha = None, save = None) -> None: 13 ''' 14 Creates a confusion matrix from the output of scMKL. 15 16 Parameters 17 ---------- 18 **results** : *dict* 19 > The output from either scmkl.run() or scmkl.one_v_rest() 20 containing results from scMKL. 21 22 **title** : *str* 23 > The text to display at the top of the matrix. 24 25 **cmap** : *matplotlib.colors.LinearSegmentedColormap* 26 > The gradient of the values displayed from matplotlib.pyplot. 27 If *None*, `'Purples'` is used see matplotlib color map 28 reference for more information. 29 30 **normalize** : *bool* 31 > If False, plot the raw numbers. If True, plot the 32 proportions. 33 34 **alpha** : *None* | *float* 35 > Alpha that matrix should be created for. If `results` is from 36 `scmkl.one_v_all()`, this is ignored. If *None*, smallest alpha 37 will be used. 38 39 **save** : *None* | *str* 40 > File path to save plot. If *None*, plot is not saved. 41 42 Returns 43 ------- 44 *None* 45 46 Examples 47 -------- 48 >>> # Running scmkl and capturing results 49 >>> results = scmkl.run(adata = adata, alpha_list = alpha_list) 50 >>> 51 >>> from matplotlib.pyplot import get_cmap 52 >>> 53 >>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues')) 54 55 Citiation 56 --------- 57 http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html 58 ''' 59 # Determining type of results 60 if ('Observed' in results.keys()) and ('Metrics' in results.keys()): 61 multi_class = False 62 names = np.unique(results['Observed']) 63 else: 64 multi_class = True 65 names = np.unique(results['Truth_labels']) 66 67 if multi_class: 68 cm = confusion_matrix(y_true = results['Truth_labels'], 69 y_pred = results['Predicted_class'], 70 labels = names) 71 else: 72 min_alpha = np.min(list(results['Metrics'].keys())) 73 alpha = alpha if alpha != None else min_alpha 74 cm = confusion_matrix(y_true = results['Observed'], 75 y_pred = results['Predictions'][alpha], 76 labels = names) 77 78 accuracy = np.trace(cm) / float(np.sum(cm)) 79 misclass = 1 - accuracy 80 81 if cmap is None: 82 cmap = plt.get_cmap('Purples') 83 84 if normalize: 85 cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 86 87 plt.figure(figsize=(8, 6)) 88 plt.imshow(cm, interpolation='nearest', cmap=cmap) 89 plt.title(title) 90 plt.colorbar() 91 92 tick_marks = np.arange(len(names)) 93 plt.xticks(tick_marks, names, rotation=45) 94 plt.yticks(tick_marks, names) 95 96 97 thresh = cm.max() / 1.5 if normalize else cm.max() / 2 98 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 99 if normalize: 100 plt.text(j, i, "{:0.4f}".format(cm[i, j]), 101 horizontalalignment="center", 102 color="white" if cm[i, j] > thresh else "black") 103 else: 104 plt.text(j, i, "{:,}".format(cm[i, j]), 105 horizontalalignment="center", 106 color="white" if cm[i, j] > thresh else "black") 107 108 acc_label = 'Predicted label\naccuracy={:0.4f}; misclass={:0.4f}' 109 acc_label = acc_label.format(accuracy, misclass) 110 111 plt.tight_layout() 112 plt.ylabel('True label') 113 plt.xlabel(acc_label) 114 plt.show() 115 116 if save != None: 117 plt.savefig(save)
Creates a confusion matrix from the output of scMKL.
Parameters
results : dict
The output from either scmkl.run or scmkl.one_v_rest containing results from scMKL.
title : str
The text to display at the top of the matrix.
cmap : matplotlib.colors.LinearSegmentedColormap
The gradient of the values displayed from matplotlib.pyplot. If None,
'Purples'
is used see matplotlib color map reference for more information.
normalize : bool
If False, plot the raw numbers. If True, plot the proportions.
alpha : None | float
Alpha that matrix should be created for. If
results
is fromscmkl.one_v_all()
, this is ignored. If None, smallest alpha will be used.
save : None | str
File path to save plot. If None, plot is not saved.
Returns
None
Examples
>>> # Running scmkl and capturing results
>>> results = scmkl.run(adata = adata, alpha_list = alpha_list)
>>>
>>> from matplotlib.pyplot import get_cmap
>>>
>>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues'))
Citiation
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
120def plot_metric(summary_df : pd.DataFrame, alpha_star = None, color = 'red'): 121 ''' 122 Takes a data frame of model metrics and optionally alpha star and 123 creates a scatter plot given metric against alpha values. 124 125 Parameters 126 ---------- 127 **summary_df** : *pd.DataFrame* 128 > A data frame created by `scmkl.get_summary()`. 129 130 **alpha_star** : *None* | *float* 131 > If not *None*, a label will be added for tuned alpha_star 132 being optimal model parameter for performance from cross 133 validation on the training data. Can be calculated with 134 `scmkl.optimize_alpha()`. 135 136 **color** : *str* 137 > Color to make points on plot. 138 139 Returns 140 ------- 141 **metric_plot** : *plotnine.ggplot* 142 > A plot with alpha values on x-axis and metric on y-axis. 143 144 Examples 145 -------- 146 >>> results = scmkl.run(adata, alpha_list) 147 >>> summary_df = scmkl.get_summary(results) 148 >>> metric_plot = plot_metric(results) 149 >>> 150 >>> metric_plot.save('scMKL_performance.png') 151 ''' 152 # Capturing metric from summary_df 153 metric_options = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall'] 154 metric = np.intersect1d(metric_options, summary_df.columns)[0] 155 156 alpha_list = np.unique(summary_df['Alpha']) 157 158 # Calculating alpha_star y_pos if present 159 if alpha_star != None: 160 best_rows = summary_df['Alpha'] == alpha_star 161 alpha_star_metric = float(summary_df[best_rows][metric]) 162 163 metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 164 + geom_point(fill = color, color = color) 165 + theme_classic() 166 + ylim(0.6, 1) 167 + scale_x_reverse(breaks = alpha_list) 168 + annotate('text', x = alpha_star, 169 y = alpha_star_metric - 0.04, 170 label='|\nAlpha\nStar') 171 ) 172 173 else: 174 metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 175 + geom_point(fill = color, color = color) 176 + theme_classic() 177 + ylim(0.6, 1) 178 + scale_x_reverse(breaks = alpha_list)) 179 180 return metric_plot
Takes a data frame of model metrics and optionally alpha star and creates a scatter plot given metric against alpha values.
Parameters
summary_df : pd.DataFrame
A data frame created by
scmkl.get_summary()
.
alpha_star : None | float
If not None, a label will be added for tuned alpha_star being optimal model parameter for performance from cross validation on the training data. Can be calculated with
scmkl.optimize_alpha
.
color : str
Color to make points on plot.
Returns
metric_plot : plotnine.ggplot
A plot with alpha values on x-axis and metric on y-axis.
Examples
>>> results = scmkl.run(adata, alpha_list)
>>> summary_df = scmkl.get_summary(results)
>>> metric_plot = plot_metric(results)
>>>
>>> metric_plot.save('scMKL_performance.png')