How to optimize hyper parameters of a Logistic Regression model using Grid Search in Python?
0

How to optimize hyper parameters of a Logistic Regression model using Grid Search in Python?

This recipe helps you optimize hyper parameters of a Logistic Regression model using Grid Search in Python
In [2]:
## How to optimize hyper-parameters of a Logistic Regression model using Grid Search in Python
def Snippet_145():
    print()
    print(format('How to optimize hyper-parameters of a LR model using Grid Search in Python','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    import numpy as np
    from sklearn import linear_model, decomposition, datasets
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import GridSearchCV, cross_val_score
    from sklearn.preprocessing import StandardScaler

    # Load the iris flower data
    dataset = datasets.load_iris()
    X = dataset.data
    y = dataset.target

    # Create an scaler object
    sc = StandardScaler()

    # Create a pca object
    pca = decomposition.PCA()

    # Create a logistic regression object with an L2 penalty
    logistic = linear_model.LogisticRegression()

    # Create a pipeline of three steps. First, standardize the data.
    # Second, tranform the data with PCA.
    # Third, train a logistic regression on the data.
    pipe = Pipeline(steps=[('sc', sc),
                           ('pca', pca),
                           ('logistic', logistic)])

    # Create Parameter Space
    # Create a list of a sequence of integers from 1 to 30 (the number of features in X + 1)
    n_components = list(range(1,X.shape[1]+1,1))
    # Create a list of values of the regularization parameter
    C = np.logspace(-4, 4, 50)
    # Create a list of options for the regularization penalty
    penalty = ['l1', 'l2']
    # Create a dictionary of all the parameter options 
    # Note has you can access the parameters of steps of a pipeline by using '__’
    parameters = dict(pca__n_components=n_components,
                      logistic__C=C,
                      logistic__penalty=penalty)

    # Conduct Parameter Optmization With Pipeline
    # Create a grid search object
    clf = GridSearchCV(pipe, parameters)

    # Fit the grid search
    clf.fit(X, y)
    # View The Best Parameters
    print('Best Penalty:', clf.best_estimator_.get_params()['logistic__penalty'])
    print('Best C:', clf.best_estimator_.get_params()['logistic__C'])
    print('Best Number Of Components:', clf.best_estimator_.get_params()['pca__n_components'])
    print(); print(clf.best_estimator_.get_params()['logistic'])

    # Use Cross Validation To Evaluate Model
    CV_Result = cross_val_score(clf, X, y, cv=4, n_jobs=-1)
    print(); print(CV_Result)
    print(); print(CV_Result.mean())
    print(); print(CV_Result.std())

Snippet_145()
****How to optimize hyper-parameters of a LR model using Grid Search in Python****
Best Penalty: l1
Best C: 7.9060432109076855
Best Number Of Components: 4

LogisticRegression(C=7.9060432109076855, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=None, penalty='l1', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

[0.97435897 0.97435897 0.91666667 1.        ]

0.9663461538461539

0.03053294692829753