How to compare sklearn classification algorithms in Python?
0

How to compare sklearn classification algorithms in Python?

This recipe helps you compare sklearn classification algorithms in Python
In [2]:
## How to compare sklearn classification algorithms in Python
## DataSet: skleran.datasets.load_breast_cancer()
def Snippet_183():
    print()
    print(format('How to compare sklearn classification algorithms in Python','*^82'))
    import warnings
    warnings.filterwarnings("ignore")
    # load libraries
    import matplotlib.pyplot as plt
    from sklearn import model_selection
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    from sklearn.naive_bayes import GaussianNB
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn import datasets
    import matplotlib.pyplot as plt

    plt.style.use('ggplot')
    # load datasets
    seed = 42
    dataset = datasets.load_breast_cancer()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    # prepare models
    models = []
    models.append(('LR', LogisticRegression()))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('CART', DecisionTreeClassifier()))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    # evaluate each model
    results = []
    names = []
    scoring = 'accuracy'
    for name, model in models:
	     kfold = model_selection.KFold(n_splits=10, random_state=seed)
	     cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
	     results.append(cv_results)
	     names.append(name)
	     msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	     print(msg)
    # boxplot algorithm comparison
    fig = plt.figure(figsize=(10,10))
    fig.suptitle('How to compare sklearn classification algorithms')
    ax = fig.add_subplot(111)
    plt.boxplot(results)
    ax.set_xticklabels(names)
    plt.show()
Snippet_183()
************How to compare sklearn classification algorithms in Python************
LR: 0.960244 (0.031257)
LDA: 0.953212 (0.033168)
KNN: 0.938926 (0.033551)
CART: 0.913511 (0.042661)
NB: 0.941584 (0.031492)
SVM: 0.617220 (0.058031)