## How to create and optimize a baseline Lasso Regression model
def Snippet_149():
print()
print(format('How to create and optimize a baseline Lasso regression model','*^82'))
import warnings
warnings.filterwarnings("ignore")
# load libraries
from sklearn import decomposition, datasets
from sklearn import linear_model
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
# Load the iris flower data
dataset = datasets.load_boston()
X = dataset.data
y = dataset.target
# Create an scaler object
sc = StandardScaler()
# Create a pca object
pca = decomposition.PCA()
# Create a logistic regression object with an L2 penalty
lasso = linear_model.Lasso()
# Create a pipeline of three steps. First, standardize the data.
# Second, tranform the data with PCA.
# Third, train a Decision Tree Classifier on the data.
pipe = Pipeline(steps=[('sc', sc),
('pca', pca),
('lasso', lasso)])
# Create Parameter Space
# Create a list of a sequence of integers from 1 to 30 (the number of features in X + 1)
n_components = list(range(1,X.shape[1]+1,1))
# Create lists of parameter for Lasso Regression
normalize = [True, False]
selection = ['cyclic', 'random']
# Create a dictionary of all the parameter options
# Note has you can access the parameters of steps of a pipeline by using '__’
parameters = dict(pca__n_components=n_components,
lasso__normalize=normalize,
lasso__selection=selection)
# Conduct Parameter Optmization With Pipeline
# Create a grid search object
clf = GridSearchCV(pipe, parameters)
# Fit the grid search
clf.fit(X, y)
# View The Best Parameters
print('Best Number Of Components:', clf.best_estimator_.get_params()['pca__n_components'])
print(); print(clf.best_estimator_.get_params()['lasso'])
# Use Cross Validation To Evaluate Model
CV_Result = cross_val_score(clf, X, y, cv=10, n_jobs=-1, scoring='r2')
print(); print(CV_Result)
print(); print(CV_Result.mean())
print(); print(CV_Result.std())
Snippet_149()