In [2]:
## How to determine Spearman's correlation in Python
def Snippet_121():
    print(format('How to determine Spearman\'s correlation in Python','*^82'))

    import warnings

    # load libraries
    import matplotlib.pyplot as plt
    import scipy.stats
    import pandas as pd
    import random
    import seaborn as sns

    # Create empty dataframe
    df = pd.DataFrame()

    # Add columns
    df['x'] = random.sample(range(1, 100), 75)
    df['y'] = random.sample(range(1, 100), 75)

    # View first few rows of data
    print(); print(df.head())

    # Calculate Pearson’s Correlation Coefficient
    def spearmans_rank_correlation(xs, ys):
        # Calculate the rank of x's
        xranks = pd.Series(xs).rank()
        # Caclulate the ranking of the y's
        yranks = pd.Series(ys).rank()
        # Calculate Pearson's correlation coefficient on the ranked versions of the data
        return scipy.stats.pearsonr(xranks, yranks)

    # Show Pearson's Correlation Coefficient
    result = spearmans_rank_correlation(df.x, df.y)[0]
    print("spearmans_rank_correlation is: ", result)

    # Calculate Spearman’s Correlation Using SciPy
    print("Scipy spearmans_rank_correlation is: ", scipy.stats.spearmanr(df.x, df.y)[0])

    # reg plot
    sns.lmplot('x', 'y', data=df, fit_reg=True)

    x   y
0  94  78
1  14  72
2  72  45
3  13  97
4  49  49

spearmans_rank_correlation is:  0.0745945945945946
Scipy spearmans_rank_correlation is:  0.0745945945945946

