How to get descriptive statistics of a Pandas DataFrame?

How to get descriptive statistics of a Pandas DataFrame?

This recipe helps you get descriptive statistics of a Pandas DataFrame
In [1]:
## How to get descriptive statistics of a Pandas DataFrame
def Kickstarter_Example_85():
    print()
    print(format('How to get descriptive statistics of a Pandas DataFrame','*^82'))
    import warnings
    warnings.filterwarnings("ignore")
    # load libraries
    import pandas as pd
    #Create dataframe
    data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
            'age': [42, 52, 36, 24, 73],
            'preTestScore': [4, 24, 31, 2, 3],
            'postTestScore': [25, 94, 57, 62, 70]}
    df = pd.DataFrame(data, columns = ['name', 'age', 'preTestScore', 'postTestScore'])
    print(); print(df)
    print(); print(df.info())
    # The sum of all the ages
    print(); print(df['age'].sum())
    # Mean preTestScore
    print(); print(df['preTestScore'].mean())

    # Cumulative sum of preTestScores, moving from the rows from the top
    print(); print(df['preTestScore'].cumsum())

    # Summary statistics on preTestScore
    print(); print(df['preTestScore'].describe())

    # Count the number of non-NA values
    print(); print(df['preTestScore'].count())

    # Minimum value of preTestScore
    print(); print(df['preTestScore'].min())

    # Maximum value of preTestScore
    print(); print(df['preTestScore'].max())

    # Median value of preTestScore
    print(); print(df['preTestScore'].median())

    # Sample variance of preTestScore values
    print(); print(df['preTestScore'].var())

    # Sample standard deviation of preTestScore values
    print(); print(df['preTestScore'].std())

    # Skewness of preTestScore values
    print(); print(df['preTestScore'].skew())

    # Kurtosis of preTestScore values
    print(); print(df['preTestScore'].kurt())

    # Correlation Matrix Of Values
    print(); print(df.corr())

    # Covariance Matrix Of Values
    print(); print(df.cov())

Kickstarter_Example_85()
*************How to get descriptive statistics of a Pandas DataFrame**************

    name  age  preTestScore  postTestScore
0  Jason   42             4             25
1  Molly   52            24             94
2   Tina   36            31             57
3   Jake   24             2             62
4    Amy   73             3             70

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
name             5 non-null object
age              5 non-null int64
preTestScore     5 non-null int64
postTestScore    5 non-null int64
dtypes: int64(3), object(1)
memory usage: 240.0+ bytes
None

227

12.8

0     4
1    28
2    59
3    61
4    64
Name: preTestScore, dtype: int64

count     5.000000
mean     12.800000
std      13.663821
min       2.000000
25%       3.000000
50%       4.000000
75%      24.000000
max      31.000000
Name: preTestScore, dtype: float64

5

2

31

4.0

186.7

13.663820841916802

0.7433452457326751

-2.4673543738411547

                    age  preTestScore  postTestScore
age            1.000000     -0.105651       0.328852
preTestScore  -0.105651      1.000000       0.378039
postTestScore  0.328852      0.378039       1.000000

                  age  preTestScore  postTestScore
age            340.80        -26.65         151.20
preTestScore   -26.65        186.70         128.65
postTestScore  151.20        128.65         620.30