Recipe: How to optimise learning rates in XGBoost example 2?
MACHINE LEARNING RECIPES

How to optimise learning rates in XGBoost example 2?

This recipe helps you optimise learning rates in XGBoost example 2
In [2]:
def Snippet_194():
    print()
    print(format('How to optimise multiple parameters in XGBoost','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from xgboost import XGBClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.model_selection import StratifiedKFold
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot
    pyplot.style.use('ggplot')
    import numpy

    # load the iris datasets
    dataset = datasets.load_wine()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    # grid search
    model = XGBClassifier()
    n_estimators = [100, 200, 300, 400, 500]
    learning_rate = [0.0001, 0.001, 0.01, 0.1]
    param_grid = dict(learning_rate=learning_rate, n_estimators=n_estimators)
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
    grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold)
    grid_result = grid_search.fit(X, y)

    # summarize results
    print(); print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']

    for mean, stdev, param in zip(means, stds, params):
	     print("%f (%f) with: %r" % (mean, stdev, param))
         # plot results
    pyplot.subplots(figsize=(12,12))
    scores = numpy.array(means).reshape(len(learning_rate), len(n_estimators))

    for i, value in enumerate(learning_rate):
        pyplot.plot(n_estimators, scores[i], label='learning_rate: ' + str(value))
    pyplot.legend()
    pyplot.xlabel('n_estimators')
    pyplot.ylabel('Log Loss')
    pyplot.show()
    pyplot.savefig('n_estimators_vs_learning_rate.png')

Snippet_194()
******************How to optimise multiple parameters in XGBoost******************

Best: -0.077744 using {'learning_rate': 0.1, 'n_estimators': 200}
-1.086580 (0.000540) with: {'learning_rate': 0.0001, 'n_estimators': 100}
-1.074749 (0.001075) with: {'learning_rate': 0.0001, 'n_estimators': 200}
-1.063108 (0.001606) with: {'learning_rate': 0.0001, 'n_estimators': 300}
-1.051659 (0.002129) with: {'learning_rate': 0.0001, 'n_estimators': 400}
-1.040399 (0.002644) with: {'learning_rate': 0.0001, 'n_estimators': 500}
-0.986720 (0.005130) with: {'learning_rate': 0.001, 'n_estimators': 100}
-0.891290 (0.009532) with: {'learning_rate': 0.001, 'n_estimators': 200}
-0.808672 (0.013497) with: {'learning_rate': 0.001, 'n_estimators': 300}
-0.736644 (0.016322) with: {'learning_rate': 0.001, 'n_estimators': 400}
-0.673494 (0.018456) with: {'learning_rate': 0.001, 'n_estimators': 500}
-0.443082 (0.032684) with: {'learning_rate': 0.01, 'n_estimators': 100}
-0.236992 (0.048798) with: {'learning_rate': 0.01, 'n_estimators': 200}
-0.159902 (0.052830) with: {'learning_rate': 0.01, 'n_estimators': 300}
-0.125207 (0.057096) with: {'learning_rate': 0.01, 'n_estimators': 400}
-0.108330 (0.059207) with: {'learning_rate': 0.01, 'n_estimators': 500}
-0.083225 (0.059937) with: {'learning_rate': 0.1, 'n_estimators': 100}
-0.077744 (0.057482) with: {'learning_rate': 0.1, 'n_estimators': 200}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 300}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 400}
-0.077754 (0.057472) with: {'learning_rate': 0.1, 'n_estimators': 500}


Stuck at work?
Can't find the recipe you are looking for. Let us know and we will find an expert to create the recipe for you. Click here
Companies using this Recipe
1 developer from Candela Labs