How to plot ROC curve in Python
Here are two ways you may try, assuming your model
is an sklearn predictor:
import sklearn.metrics as metrics# calculate the fpr and tpr for all thresholds of the classificationprobs = model.predict_proba(X_test)preds = probs[:,1]fpr, tpr, threshold = metrics.roc_curve(y_test, preds)roc_auc = metrics.auc(fpr, tpr)# method I: pltimport matplotlib.pyplot as pltplt.title('Receiver Operating Characteristic')plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)plt.legend(loc = 'lower right')plt.plot([0, 1], [0, 1],'r--')plt.xlim([0, 1])plt.ylim([0, 1])plt.ylabel('True Positive Rate')plt.xlabel('False Positive Rate')plt.show()# method II: ggplotfrom ggplot import *df = pd.DataFrame(dict(fpr = fpr, tpr = tpr))ggplot(df, aes(x = 'fpr', y = 'tpr')) + geom_line() + geom_abline(linetype = 'dashed')
or try
ggplot(df, aes(x = 'fpr', ymin = 0, ymax = 'tpr')) + geom_line(aes(y = 'tpr')) + geom_area(alpha = 0.2) + ggtitle("ROC Curve w/ AUC = %s" % str(roc_auc))
This is the simplest way to plot an ROC curve, given a set of ground truth labels and predicted probabilities. Best part is, it plots the ROC curve for ALL classes, so you get multiple neat-looking curves as well
import scikitplot as skpltimport matplotlib.pyplot as plty_true = # ground truth labelsy_probas = # predicted probabilities generated by sklearn classifierskplt.metrics.plot_roc_curve(y_true, y_probas)plt.show()
Here's a sample curve generated by plot_roc_curve. I used the sample digits dataset from scikit-learn so there are 10 classes. Notice that one ROC curve is plotted for each class.
Disclaimer: Note that this uses the scikit-plot library, which I built.
AUC curve For Binary Classification using matplotlib
from sklearn import svm, datasetsfrom sklearn import metricsfrom sklearn.linear_model import LogisticRegressionfrom sklearn.model_selection import train_test_splitfrom sklearn.datasets import load_breast_cancerimport matplotlib.pyplot as plt
Load Breast Cancer Dataset
breast_cancer = load_breast_cancer()X = breast_cancer.datay = breast_cancer.target
Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33, random_state=44)
Model
clf = LogisticRegression(penalty='l2', C=0.1)clf.fit(X_train, y_train)y_pred = clf.predict(X_test)
Accuracy
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
AUC Curve
y_pred_proba = clf.predict_proba(X_test)[::,1]fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)auc = metrics.roc_auc_score(y_test, y_pred_proba)plt.plot(fpr,tpr,label="data 1, auc="+str(auc))plt.legend(loc=4)plt.show()