How to optimise learning rates in XGBoost example 2?
0

How to optimise learning rates in XGBoost example 2?

This recipe helps you optimise learning rates in XGBoost example 2
In [2]:
def Snippet_194():
    print()
    print(format('How to optimise multiple parameters in XGBoost','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from xgboost import XGBClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.model_selection import StratifiedKFold
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot
    pyplot.style.use('ggplot')
    import numpy

    # load the iris datasets
    dataset = datasets.load_wine()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    # grid search
    model = XGBClassifier()
    n_estimators = [100, 200, 300, 400, 500]
    learning_rate = [0.0001, 0.001, 0.01, 0.1]
    param_grid = dict(learning_rate=learning_rate, n_estimators=n_estimators)
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
    grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold)
    grid_result = grid_search.fit(X, y)

    # summarize results
    print(); print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']

    for mean, stdev, param in zip(means, stds, params):
	     print("%f (%f) with: %r" % (mean, stdev, param))
         # plot results
    pyplot.subplots(figsize=(12,12))
    scores = numpy.array(means).reshape(len(learning_rate), len(n_estimators))

    for i, value in enumerate(learning_rate):
        pyplot.plot(n_estimators, scores[i], label='learning_rate: ' + str(value))
    pyplot.legend()
    pyplot.xlabel('n_estimators')
    pyplot.ylabel('Log Loss')
    pyplot.show()
    pyplot.savefig('n_estimators_vs_learning_rate.png')

Snippet_194()
******************How to optimise multiple parameters in XGBoost******************

Best: -0.077744 using {'learning_rate': 0.1, 'n_estimators': 200}
-1.086580 (0.000540) with: {'learning_rate': 0.0001, 'n_estimators': 100}
-1.074749 (0.001075) with: {'learning_rate': 0.0001, 'n_estimators': 200}
-1.063108 (0.001606) with: {'learning_rate': 0.0001, 'n_estimators': 300}
-1.051659 (0.002129) with: {'learning_rate': 0.0001, 'n_estimators': 400}
-1.040399 (0.002644) with: {'learning_rate': 0.0001, 'n_estimators': 500}
-0.986720 (0.005130) with: {'learning_rate': 0.001, 'n_estimators': 100}
-0.891290 (0.009532) with: {'learning_rate': 0.001, 'n_estimators': 200}
-0.808672 (0.013497) with: {'learning_rate': 0.001, 'n_estimators': 300}
-0.736644 (0.016322) with: {'learning_rate': 0.001, 'n_estimators': 400}
-0.673494 (0.018456) with: {'learning_rate': 0.001, 'n_estimators': 500}
-0.443082 (0.032684) with: {'learning_rate': 0.01, 'n_estimators': 100}
-0.236992 (0.048798) with: {'learning_rate': 0.01, 'n_estimators': 200}
-0.159902 (0.052830) with: {'learning_rate': 0.01, 'n_estimators': 300}
-0.125207 (0.057096) with: {'learning_rate': 0.01, 'n_estimators': 400}
-0.108330 (0.059207) with: {'learning_rate': 0.01, 'n_estimators': 500}
-0.083225 (0.059937) with: {'learning_rate': 0.1, 'n_estimators': 100}
-0.077744 (0.057482) with: {'learning_rate': 0.1, 'n_estimators': 200}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 300}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 400}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 500}