How to optimise size depth of trees in XGBoost?
0

How to optimise size depth of trees in XGBoost?

This recipe helps you optimise size (depth) of trees in XGBoost
In [2]:
def Snippet_192():
    print()
    print(format('How to optimise size (depth) of trees in XGBoost','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from xgboost import XGBClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.model_selection import StratifiedKFold
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot

    # load the iris datasets
    dataset = datasets.load_wine()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    # grid search
    model = XGBClassifier()
    max_depth = range(1, 11, 2)
    print(max_depth)
    param_grid = dict(max_depth=max_depth)
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
    grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)
    grid_result = grid_search.fit(X, y)

    # summarize results
    print()
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    print()

    for mean, stdev, param in zip(means, stds, params):
	     print("%f (%f) with: %r" % (mean, stdev, param))
         # plot
    pyplot.errorbar(max_depth, means, yerr=stds)
    pyplot.title("XGBoost max_depth vs Log Loss")
    pyplot.xlabel('max_depth')
    pyplot.ylabel('Log Loss')
    pyplot.savefig('max_depth.png')

Snippet_192()
*****************How to optimise size (depth) of trees in XGBoost*****************
range(1, 11, 2)
Fitting 10 folds for each of 5 candidates, totalling 50 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:    2.1s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    2.2s finished
Best: -0.069259 using {'max_depth': 1}

-0.069259 (0.034427) with: {'max_depth': 1}
-0.083225 (0.059937) with: {'max_depth': 3}
-0.086606 (0.061344) with: {'max_depth': 5}
-0.086606 (0.061344) with: {'max_depth': 7}
-0.086606 (0.061344) with: {'max_depth': 9}