scmkl.plotting

View Source

  1import numpy as np
  2import pandas as pd
  3import matplotlib.pyplot as plt
  4import itertools
  5
  6from plotnine import *
  7from sklearn.metrics import confusion_matrix
  8
  9
 10def plot_conf_mat(results, title = '', cmap = None, normalize = True,
 11                          alpha = None, save = None) -> None:
 12    '''
 13    Creates a confusion matrix from the output of scMKL.
 14
 15    Parameters
 16    ----------
 17    **results** : *dict*
 18        > The output from either scmkl.run() or scmkl.one_v_rest()
 19        containing results from scMKL.
 20
 21    **title** : *str*
 22        > The text to display at the top of the matrix.
 23
 24    **cmap** : *matplotlib.colors.LinearSegmentedColormap*
 25        > The gradient of the values displayed from matplotlib.pyplot.
 26        If *None*, `'Purples'` is used see matplotlib color map 
 27        reference for more information. 
 28
 29    **normalize** : *bool*
 30        > If False, plot the raw numbers. If True, plot the 
 31        proportions.
 32
 33    **alpha** : *None* | *float*
 34        > Alpha that matrix should be created for. If `results` is from
 35        `scmkl.one_v_all()`, this is ignored. If *None*, smallest alpha
 36        will be used.
 37
 38    **save** : *None* | *str*
 39        > File path to save plot. If *None*, plot is not saved.
 40
 41    Returns
 42    -------
 43    *None*
 44    
 45    Examples
 46    --------
 47    >>> # Running scmkl and capturing results
 48    >>> results = scmkl.run(adata = adata, alpha_list = alpha_list)
 49    >>> 
 50    >>> from matplotlib.pyplot import get_cmap
 51    >>> 
 52    >>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues'))
 53
 54    Citiation
 55    ---------
 56    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
 57    '''
 58    # Determining type of results
 59    if ('Observed' in results.keys()) and ('Metrics' in results.keys()):
 60        multi_class = False
 61        names = np.unique(results['Observed'])
 62    else:
 63        multi_class = True
 64        names = np.unique(results['Truth_labels'])
 65
 66    if multi_class:
 67        cm = confusion_matrix(y_true = results['Truth_labels'], 
 68                              y_pred = results['Predicted_class'], 
 69                              labels = names)
 70    else:
 71        min_alpha = np.min(list(results['Metrics'].keys()))
 72        alpha = alpha if alpha != None else min_alpha
 73        cm = confusion_matrix(y_true = results['Observed'],
 74                              y_pred = results['Predictions'][alpha],
 75                              labels = names)
 76
 77    accuracy = np.trace(cm) / float(np.sum(cm))
 78    misclass = 1 - accuracy
 79
 80    if cmap is None:
 81        cmap = plt.get_cmap('Purples')
 82
 83    if normalize:
 84        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
 85
 86    plt.figure(figsize=(8, 6))
 87    plt.imshow(cm, interpolation='nearest', cmap=cmap)
 88    plt.title(title)
 89    plt.colorbar()
 90
 91    tick_marks = np.arange(len(names))
 92    plt.xticks(tick_marks, names, rotation=45)
 93    plt.yticks(tick_marks, names)
 94
 95
 96    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
 97    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
 98        if normalize:
 99            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
100                     horizontalalignment="center",
101                     color="white" if cm[i, j] > thresh else "black")
102        else:
103            plt.text(j, i, "{:,}".format(cm[i, j]),
104                     horizontalalignment="center",
105                     color="white" if cm[i, j] > thresh else "black")
106
107    acc_label = 'Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'
108    acc_label = acc_label.format(accuracy, misclass)
109
110    plt.tight_layout()
111    plt.ylabel('True label')
112    plt.xlabel(acc_label)
113    plt.show()
114
115    if save != None:
116        plt.savefig(save)
117
118
119def plot_metric(summary_df : pd.DataFrame, alpha_star = None, color = 'red'):
120    '''
121    Takes a data frame of model metrics and optionally alpha star and
122    creates a scatter plot given metric against alpha values.
123    
124    Parameters
125    ----------
126    **summary_df** : *pd.DataFrame*
127        > A data frame created by `scmkl.get_summary()`.
128
129    **alpha_star** : *None* | *float*
130        > If not *None*, a label will be added for tuned alpha_star 
131        being optimal model parameter for performance from cross 
132        validation on the training data. Can be calculated with 
133        `scmkl.optimize_alpha()`. 
134
135    **color** : *str*
136        > Color to make points on plot.
137
138    Returns
139    -------
140    **metric_plot** : *plotnine.ggplot*
141        > A plot with alpha values on x-axis and metric on y-axis.
142
143    Examples
144    --------
145    >>> results = scmkl.run(adata, alpha_list)
146    >>> summary_df = scmkl.get_summary(results)
147    >>> metric_plot = plot_metric(results)
148    >>>
149    >>> metric_plot.save('scMKL_performance.png')
150    '''
151    # Capturing metric from summary_df
152    metric_options = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']
153    metric = np.intersect1d(metric_options, summary_df.columns)[0]
154
155    alpha_list = np.unique(summary_df['Alpha'])
156
157    # Calculating alpha_star y_pos if present
158    if alpha_star != None:
159        best_rows = summary_df['Alpha'] == alpha_star
160        alpha_star_metric = float(summary_df[best_rows][metric])
161
162        metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 
163                        + geom_point(fill = color, color = color) 
164                        + theme_classic() 
165                        + ylim(0.6, 1)
166                        + scale_x_reverse(breaks = alpha_list)
167                        + annotate('text', x = alpha_star, 
168                                   y = alpha_star_metric - 0.04, 
169                                   label='|\nAlpha\nStar')
170                        )
171        
172    else:
173        metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 
174                + geom_point(fill = color, color = color) 
175                + theme_classic() 
176                + ylim(0.6, 1)
177                + scale_x_reverse(breaks = alpha_list))
178        
179    return metric_plot

def plot_conf_mat( results, title='', cmap=None, normalize=True, alpha=None, save=None) -> None: View Source

 11def plot_conf_mat(results, title = '', cmap = None, normalize = True,
 12                          alpha = None, save = None) -> None:
 13    '''
 14    Creates a confusion matrix from the output of scMKL.
 15
 16    Parameters
 17    ----------
 18    **results** : *dict*
 19        > The output from either scmkl.run() or scmkl.one_v_rest()
 20        containing results from scMKL.
 21
 22    **title** : *str*
 23        > The text to display at the top of the matrix.
 24
 25    **cmap** : *matplotlib.colors.LinearSegmentedColormap*
 26        > The gradient of the values displayed from matplotlib.pyplot.
 27        If *None*, `'Purples'` is used see matplotlib color map 
 28        reference for more information. 
 29
 30    **normalize** : *bool*
 31        > If False, plot the raw numbers. If True, plot the 
 32        proportions.
 33
 34    **alpha** : *None* | *float*
 35        > Alpha that matrix should be created for. If `results` is from
 36        `scmkl.one_v_all()`, this is ignored. If *None*, smallest alpha
 37        will be used.
 38
 39    **save** : *None* | *str*
 40        > File path to save plot. If *None*, plot is not saved.
 41
 42    Returns
 43    -------
 44    *None*
 45    
 46    Examples
 47    --------
 48    >>> # Running scmkl and capturing results
 49    >>> results = scmkl.run(adata = adata, alpha_list = alpha_list)
 50    >>> 
 51    >>> from matplotlib.pyplot import get_cmap
 52    >>> 
 53    >>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues'))
 54
 55    Citiation
 56    ---------
 57    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
 58    '''
 59    # Determining type of results
 60    if ('Observed' in results.keys()) and ('Metrics' in results.keys()):
 61        multi_class = False
 62        names = np.unique(results['Observed'])
 63    else:
 64        multi_class = True
 65        names = np.unique(results['Truth_labels'])
 66
 67    if multi_class:
 68        cm = confusion_matrix(y_true = results['Truth_labels'], 
 69                              y_pred = results['Predicted_class'], 
 70                              labels = names)
 71    else:
 72        min_alpha = np.min(list(results['Metrics'].keys()))
 73        alpha = alpha if alpha != None else min_alpha
 74        cm = confusion_matrix(y_true = results['Observed'],
 75                              y_pred = results['Predictions'][alpha],
 76                              labels = names)
 77
 78    accuracy = np.trace(cm) / float(np.sum(cm))
 79    misclass = 1 - accuracy
 80
 81    if cmap is None:
 82        cmap = plt.get_cmap('Purples')
 83
 84    if normalize:
 85        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
 86
 87    plt.figure(figsize=(8, 6))
 88    plt.imshow(cm, interpolation='nearest', cmap=cmap)
 89    plt.title(title)
 90    plt.colorbar()
 91
 92    tick_marks = np.arange(len(names))
 93    plt.xticks(tick_marks, names, rotation=45)
 94    plt.yticks(tick_marks, names)
 95
 96
 97    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
 98    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
 99        if normalize:
100            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
101                     horizontalalignment="center",
102                     color="white" if cm[i, j] > thresh else "black")
103        else:
104            plt.text(j, i, "{:,}".format(cm[i, j]),
105                     horizontalalignment="center",
106                     color="white" if cm[i, j] > thresh else "black")
107
108    acc_label = 'Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'
109    acc_label = acc_label.format(accuracy, misclass)
110
111    plt.tight_layout()
112    plt.ylabel('True label')
113    plt.xlabel(acc_label)
114    plt.show()
115
116    if save != None:
117        plt.savefig(save)

Creates a confusion matrix from the output of scMKL.

Parameters

results : dict

The output from either scmkl.run or scmkl.one_v_rest containing results from scMKL.

title : str

The text to display at the top of the matrix.

cmap : matplotlib.colors.LinearSegmentedColormap

The gradient of the values displayed from matplotlib.pyplot. If None, 'Purples' is used see matplotlib color map reference for more information.

normalize : bool

If False, plot the raw numbers. If True, plot the proportions.

alpha : None | float

Alpha that matrix should be created for. If results is from scmkl.one_v_all(), this is ignored. If None, smallest alpha will be used.

save : None | str

File path to save plot. If None, plot is not saved.

Returns

None

Examples

>>> # Running scmkl and capturing results
>>> results = scmkl.run(adata = adata, alpha_list = alpha_list)
>>> 
>>> from matplotlib.pyplot import get_cmap
>>> 
>>> scmkl.plot_conf_mat(results, title = '', cmap = get_cmap('Blues'))

Citiation

http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

def plot_metric( summary_df: pandas.core.frame.DataFrame, alpha_star=None, color='red'): View Source

120def plot_metric(summary_df : pd.DataFrame, alpha_star = None, color = 'red'):
121    '''
122    Takes a data frame of model metrics and optionally alpha star and
123    creates a scatter plot given metric against alpha values.
124    
125    Parameters
126    ----------
127    **summary_df** : *pd.DataFrame*
128        > A data frame created by `scmkl.get_summary()`.
129
130    **alpha_star** : *None* | *float*
131        > If not *None*, a label will be added for tuned alpha_star 
132        being optimal model parameter for performance from cross 
133        validation on the training data. Can be calculated with 
134        `scmkl.optimize_alpha()`. 
135
136    **color** : *str*
137        > Color to make points on plot.
138
139    Returns
140    -------
141    **metric_plot** : *plotnine.ggplot*
142        > A plot with alpha values on x-axis and metric on y-axis.
143
144    Examples
145    --------
146    >>> results = scmkl.run(adata, alpha_list)
147    >>> summary_df = scmkl.get_summary(results)
148    >>> metric_plot = plot_metric(results)
149    >>>
150    >>> metric_plot.save('scMKL_performance.png')
151    '''
152    # Capturing metric from summary_df
153    metric_options = ['AUROC', 'Accuracy', 'F1-Score', 'Precision', 'Recall']
154    metric = np.intersect1d(metric_options, summary_df.columns)[0]
155
156    alpha_list = np.unique(summary_df['Alpha'])
157
158    # Calculating alpha_star y_pos if present
159    if alpha_star != None:
160        best_rows = summary_df['Alpha'] == alpha_star
161        alpha_star_metric = float(summary_df[best_rows][metric])
162
163        metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 
164                        + geom_point(fill = color, color = color) 
165                        + theme_classic() 
166                        + ylim(0.6, 1)
167                        + scale_x_reverse(breaks = alpha_list)
168                        + annotate('text', x = alpha_star, 
169                                   y = alpha_star_metric - 0.04, 
170                                   label='|\nAlpha\nStar')
171                        )
172        
173    else:
174        metric_plot = (ggplot(summary_df, aes(x = 'Alpha', y = metric)) 
175                + geom_point(fill = color, color = color) 
176                + theme_classic() 
177                + ylim(0.6, 1)
178                + scale_x_reverse(breaks = alpha_list))
179        
180    return metric_plot

Takes a data frame of model metrics and optionally alpha star and creates a scatter plot given metric against alpha values.

Parameters

summary_df : pd.DataFrame

A data frame created by scmkl.get_summary().

alpha_star : None | float

If not None, a label will be added for tuned alpha_star being optimal model parameter for performance from cross validation on the training data. Can be calculated with scmkl.optimize_alpha.

color : str

Color to make points on plot.

Returns

metric_plot : plotnine.ggplot

A plot with alpha values on x-axis and metric on y-axis.

Examples

>>> results = scmkl.run(adata, alpha_list)
>>> summary_df = scmkl.get_summary(results)
>>> metric_plot = plot_metric(results)
>>>
>>> metric_plot.save('scMKL_performance.png')