How to graph grid scores from GridSearchCV?
The code shown by @sascha is correct. However, the grid_scores_
attribute will be soon deprecated. It is better to use the cv_results
attribute.
It can be implemente in a similar fashion to that of @sascha method:
def plot_grid_search(cv_results, grid_param_1, grid_param_2, name_param_1, name_param_2): # Get Test Scores Mean and std for each grid search scores_mean = cv_results['mean_test_score'] scores_mean = np.array(scores_mean).reshape(len(grid_param_2),len(grid_param_1)) scores_sd = cv_results['std_test_score'] scores_sd = np.array(scores_sd).reshape(len(grid_param_2),len(grid_param_1)) # Plot Grid search scores _, ax = plt.subplots(1,1) # Param1 is the X-axis, Param 2 is represented as a different curve (color line) for idx, val in enumerate(grid_param_2): ax.plot(grid_param_1, scores_mean[idx,:], '-o', label= name_param_2 + ': ' + str(val)) ax.set_title("Grid Search Scores", fontsize=20, fontweight='bold') ax.set_xlabel(name_param_1, fontsize=16) ax.set_ylabel('CV Average Score', fontsize=16) ax.legend(loc="best", fontsize=15) ax.grid('on')# Calling Method plot_grid_search(pipe_grid.cv_results_, n_estimators, max_features, 'N Estimators', 'Max Features')
The above results in the following plot:
from sklearn.svm import SVCfrom sklearn.grid_search import GridSearchCVfrom sklearn import datasetsimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npdigits = datasets.load_digits()X = digits.datay = digits.targetclf_ = SVC(kernel='rbf')Cs = [1, 10, 100, 1000]Gammas = [1e-3, 1e-4]clf = GridSearchCV(clf_, dict(C=Cs, gamma=Gammas), cv=2, pre_dispatch='1*n_jobs', n_jobs=1)clf.fit(X, y)scores = [x[1] for x in clf.grid_scores_]scores = np.array(scores).reshape(len(Cs), len(Gammas))for ind, i in enumerate(Cs): plt.plot(Gammas, scores[ind], label='C: ' + str(i))plt.legend()plt.xlabel('Gamma')plt.ylabel('Mean score')plt.show()
- Code is based on this.
- Only puzzling part: will sklearn always respect the order of C & Gamma -> official example uses this "ordering"
Output:
For plotting the results when tuning several hyperparameters, what I did was fixed all parameters to their best value except for one and plotted the mean score for the other parameter for each of its values.
def plot_search_results(grid): """ Params: grid: A trained GridSearchCV object. """ ## Results from grid search results = grid.cv_results_ means_test = results['mean_test_score'] stds_test = results['std_test_score'] means_train = results['mean_train_score'] stds_train = results['std_train_score'] ## Getting indexes of values per hyper-parameter masks=[] masks_names= list(grid.best_params_.keys()) for p_k, p_v in grid.best_params_.items(): masks.append(list(results['param_'+p_k].data==p_v)) params=grid.param_grid ## Ploting results fig, ax = plt.subplots(1,len(params),sharex='none', sharey='all',figsize=(20,5)) fig.suptitle('Score per parameter') fig.text(0.04, 0.5, 'MEAN SCORE', va='center', rotation='vertical') pram_preformace_in_best = {} for i, p in enumerate(masks_names): m = np.stack(masks[:i] + masks[i+1:]) pram_preformace_in_best best_parms_mask = m.all(axis=0) best_index = np.where(best_parms_mask)[0] x = np.array(params[p]) y_1 = np.array(means_test[best_index]) e_1 = np.array(stds_test[best_index]) y_2 = np.array(means_train[best_index]) e_2 = np.array(stds_train[best_index]) ax[i].errorbar(x, y_1, e_1, linestyle='--', marker='o', label='test') ax[i].errorbar(x, y_2, e_2, linestyle='-', marker='^',label='train' ) ax[i].set_xlabel(p.upper()) plt.legend() plt.show()