How to determine Pearsons correlation in Python?
0

How to determine Pearsons correlation in Python?

This recipe helps you determine Pearsons correlation in Python
In [2]:
def Snippet_120():
    print()
    print(format('How to determine Pearson\'s correlation in Python','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    import matplotlib.pyplot as plt
    import statistics as stats
    import pandas as pd
    import random
    import seaborn as sns

    # Create empty dataframe
    df = pd.DataFrame()
    # Add columns
    df['x'] = random.sample(range(1, 100), 75)
    df['y'] = random.sample(range(1, 100), 75)

    # View first few rows of data
    print(); print(df.head())

    # Calculate Pearson’s Correlation Coefficient
    def pearson(x,y):
        # Create n, the number of observations in the data
        n = len(x)
        # Create lists to store the standard scores
        standard_score_x = []; standard_score_y = [];
        # Calculate the mean of x
        mean_x = stats.mean(x)
        # Calculate the standard deviation of x
        standard_deviation_x = stats.stdev(x)
        # Calculate the mean of y
        mean_y = stats.mean(y)
        # Calculate the standard deviation of y
        standard_deviation_y = stats.stdev(y)
        # For each observation in x
        for observation in x:
            # Calculate the standard score of x
            standard_score_x.append((observation - mean_x)/standard_deviation_x)
        # For each observation in y
        for observation in y:
            # Calculate the standard score of y
            standard_score_y.append((observation - mean_y)/standard_deviation_y)
        # Multiple the standard scores together, sum them, then divide by n-1, return that value
        return (sum([i*j for i,j in zip(standard_score_x, standard_score_y)]))/(n-1)

    # Show Pearson's Correlation Coefficient
    result = pearson(df.x, df.y)
    print()
    print("Pearson\'s correlation coefficient is: ", result)
    sns.lmplot('x', 'y', data=df, fit_reg=True)
    plt.show()

Snippet_120()
*****************How to determine Pearson's correlation in Python*****************

    x   y
0  69  99
1  56  30
2  64  62
3  58   8
4  14  64

Pearson's correlation coefficient is:  0.3810462941506265
In [ ]: