How to create and optimize a baseline Ridge Regression model?
0

How to create and optimize a baseline Ridge Regression model?

This recipe helps you create and optimize a baseline Ridge Regression model
In [2]:
## How to create and optimize a baseline Ridge Regression model
def Snippet_148():
    print()
    print(format('How to create and optimize a baseline Ridge regression model','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import decomposition, datasets
    from sklearn import linear_model
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import GridSearchCV, cross_val_score
    from sklearn.preprocessing import StandardScaler

    # Load the iris flower data
    dataset = datasets.load_boston()
    X = dataset.data
    y = dataset.target

    # Create an scaler object
    sc = StandardScaler()

    # Create a pca object
    pca = decomposition.PCA()

    # Create a logistic regression object with an L2 penalty
    ridge = linear_model.Ridge()

    # Create a pipeline of three steps. First, standardize the data.
    # Second, tranform the data with PCA.
    # Third, train a Decision Tree Classifier on the data.
    pipe = Pipeline(steps=[('sc', sc),
                           ('pca', pca),
                           ('ridge', ridge)])

    # Create Parameter Space
    # Create a list of a sequence of integers from 1 to 30 (the number of features in X + 1)
    n_components = list(range(1,X.shape[1]+1,1))

    # Create lists of parameter for Ridge Regression
    normalize = [True, False]
    solver = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']

    # Create a dictionary of all the parameter options 
    # Note has you can access the parameters of steps of a pipeline by using '__’
    parameters = dict(pca__n_components=n_components,
                      ridge__normalize=normalize,
                      ridge__solver=solver)

    # Conduct Parameter Optmization With Pipeline
    # Create a grid search object
    clf = GridSearchCV(pipe, parameters)

    # Fit the grid search
    clf.fit(X, y)

    # View The Best Parameters
    print('Best Number Of Components:', clf.best_estimator_.get_params()['pca__n_components'])
    print(); print(clf.best_estimator_.get_params()['ridge'])

    # Use Cross Validation To Evaluate Model
    CV_Result = cross_val_score(clf, X, y, cv=10, n_jobs=-1, scoring='r2')
    print(); print(CV_Result)
    print(); print(CV_Result.mean())
    print(); print(CV_Result.std())

Snippet_148()
***********How to create and optimize a baseline Ridge regression model***********
Best Number Of Components: 4

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='sparse_cg', tol=0.001)

[ 0.7366215   0.74783865 -0.1684087   0.57370647  0.62934032  0.66905734
  0.29067879  0.10846035 -0.21149751  0.21975957]

0.35955567831516977

0.34634691622793723