Source code for lib.utils

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, \
                            roc_auc_score

import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import logging

from sklearn.metrics import precision_recall_curve
from plotly.subplots import make_subplots
from plotly import graph_objects as go

[docs]def plot_confusion_matrix(cm, labels, suptitle = 'Confusion Matrix'): """ _subplot_cm wapper - Plot normalized and not normilized confusion matrix :type cm: array :param cm: confusion matrix array :type labels: list :param labels: list containing label strings :type suptitle: string :param suptitle: plot title, defaults to Confusion Matrix """ fig, ax = plt.subplots(1,2, sharey=True) fig.suptitle(suptitle) _subplot_cm(cm, labels ,fig, ax[0], normalize=False) _subplot_cm(cm, labels ,fig, ax[1], normalize=True) plt.show()
def _subplot_cm(cm, classes, fig, ax, normalize=False, title=None): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. :type cm: array :param cm: confusion matrix array :type classes: list :param classes: list containing label strings :type normalize: boolean :param normalize: normilize by rows :type title: string :param title: plot title, defaults to None """ if not title: if normalize: title = 'Normalized' else: title = 'Without normalization' if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] ax.imshow(cm, interpolation='nearest') ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout()
[docs]def Evaluate(true_label, predicted_label, predicted_prob, labels): """ Plot confusion Matrix and displays accuracy f1 and roc_auc scores :type true_label: array :param true_label: ground truth values :type predicted_label: array :param predicted_label: predicted values :type predicted_prob: array :param predicted_prob: probability for each predicted class :type labels: list :param labels: list containing label strings """ cm = confusion_matrix(true_label, predicted_label) acc = accuracy_score(true_label, predicted_label) f1_weighted = f1_score(true_label, predicted_label, average = 'macro') roc_auc = roc_auc_score(true_label, predicted_prob, multi_class= 'ovr' ) plot_confusion_matrix(cm, labels ) return pd.DataFrame([[acc, f1_weighted, roc_auc]], columns = ['Accuracy', 'F1 Score Weighted', 'ROC AUC'])
[docs]def plot_precision_recall(y_true, preds_proba): """ Plot precision recall curves :type y_true: array :param y_true: ground truth values :type preds_proba: array :param preds_proba: probability for positive predicted class """ # calculate model precision-recall curve precision, recall, threshold = precision_recall_curve(y_true, preds_proba) # plot the model precision-recall curve fig = make_subplots(1,2, subplot_titles=("Recall x Precision", "Recall and Precision Curves")) fig.add_trace(go.Scatter( x=recall, y=precision, name = 'Recall x Precision', ), row = 1, col = 1 ) fig.add_trace(go.Scatter( x=threshold, y=precision[:-1], name= 'Precision', ), row = 1, col = 2 ) fig.add_trace(go.Scatter( x=threshold, y=recall[:-1], name = 'Recall', ), row = 1, col = 2 ) for trace in fig['data']: if(trace['name'] == 'Precision x Recall'): trace['showlegend'] = False fig.update_yaxes(title_text="Precision", row=1, col=1) fig.update_xaxes(title_text="Recall", row=1, col=1) fig.update_xaxes(title_text="Threshold", row=1, col=2) return fig
[docs]def arg_nearest(array, value): """ Find index of nearest value for a given number :type array: array :param array: numpy array :type value: float :param value: desired value :return: index :rtype: int """ array = np.asarray(array) idx = (np.abs(array - value)).argmin() return idx