DATA MUNGING

In [2]:

```
def Snippet_120():
print()
print(format('How to determine Pearson\'s correlation in Python','*^82'))
import warnings
warnings.filterwarnings("ignore")
# load libraries
import matplotlib.pyplot as plt
import statistics as stats
import pandas as pd
import random
import seaborn as sns
# Create empty dataframe
df = pd.DataFrame()
# Add columns
df['x'] = random.sample(range(1, 100), 75)
df['y'] = random.sample(range(1, 100), 75)
# View first few rows of data
print(); print(df.head())
# Calculate Pearsonâ€™s Correlation Coefficient
def pearson(x,y):
# Create n, the number of observations in the data
n = len(x)
# Create lists to store the standard scores
standard_score_x = []; standard_score_y = [];
# Calculate the mean of x
mean_x = stats.mean(x)
# Calculate the standard deviation of x
standard_deviation_x = stats.stdev(x)
# Calculate the mean of y
mean_y = stats.mean(y)
# Calculate the standard deviation of y
standard_deviation_y = stats.stdev(y)
# For each observation in x
for observation in x:
# Calculate the standard score of x
standard_score_x.append((observation - mean_x)/standard_deviation_x)
# For each observation in y
for observation in y:
# Calculate the standard score of y
standard_score_y.append((observation - mean_y)/standard_deviation_y)
# Multiple the standard scores together, sum them, then divide by n-1, return that value
return (sum([i*j for i,j in zip(standard_score_x, standard_score_y)]))/(n-1)
# Show Pearson's Correlation Coefficient
result = pearson(df.x, df.y)
print()
print("Pearson\'s correlation coefficient is: ", result)
sns.lmplot('x', 'y', data=df, fit_reg=True)
plt.show()
Snippet_120()
```

In [ ]:

```
```

Stuck at work?

Can't find the recipe you are looking for. Let us know and we will find an expert to create the recipe for you.
Click here

Companies using this Recipe

1
developer from
Vodafone

1
developer from
ANAC

1
developer from
HvH

1
developer from
ICU Medical