How to do Data Analysis in a Pandas DataFrame?
0

How to do Data Analysis in a Pandas DataFrame?

This recipe helps you do Data Analysis in a Pandas DataFrame
In [1]:
## How to do Data Analysis in a Pandas DataFrame
def Kickstarter_Example_70():
    print()
    print(format('How to Data Analysis in a Pandas DataFrame','*^82'))
    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    import pandas as pd

    # Create dataframe
    raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'],
                'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],
                'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'],
                'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
                'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}
    df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'name', 'preTestScore', 'postTestScore']); print(df)

    # Create a groupby variable that groups preTestScores by regiment
    groupby_regiment = df['preTestScore'].groupby(df['regiment'])

    # Descriptive statistics by group
    print(); print(df['preTestScore'].groupby(df['regiment']).describe())

    # Mean of each regiment’s preTestScore
    print(); print(groupby_regiment.mean())

    # Mean preTestScores grouped by regiment and company
    print(); print(df['preTestScore'].groupby([df['regiment'], df['company']]).mean())

    # Mean preTestScores grouped by regiment and company without heirarchical indexing
    print(); print(df['preTestScore'].groupby([df['regiment'], df['company']]).mean().unstack())

    # Group the entire dataframe by regiment and company
    print(); print(df.groupby(['regiment', 'company']).mean())

    # Number of observations in each regiment and company
    print(); print(df.groupby(['regiment', 'company']).size())

    # Iterate an operations over groups # Group the dataframe by regiment, and for each regiment,
    for name, group in df.groupby('regiment'):
        # print the name of the regiment
        print(); print(name)
        # print the data of that regiment
        print(); print(group)

    # Group by columns
    print(); print(list(df.groupby(df.dtypes, axis=1)))
    print(); print(df.groupby('regiment').mean().add_prefix('mean_'))

    # Create a function to get the stats of a group
    def get_stats(group):
        return {'min': group.min(), 'max': group.max(), 'count': group.count(), 'mean': group.mean()}

    #Create bins and bin up postTestScore by those pins
    bins = [0, 25, 50, 75, 100]
    group_names = ['Low', 'Okay', 'Good', 'Great']
    df['categories'] = pd.cut(df['postTestScore'], bins, labels=group_names)

    # Apply the get_stats() function to each postTestScore bin
    print(); print(df['postTestScore'].groupby(df['categories']).apply(get_stats).unstack())

Kickstarter_Example_70()
********************How to Data Analysis in a Pandas DataFrame********************
      regiment company      name  preTestScore  postTestScore
0   Nighthawks     1st    Miller             4             25
1   Nighthawks     1st  Jacobson            24             94
2   Nighthawks     2nd       Ali            31             57
3   Nighthawks     2nd    Milner             2             62
4     Dragoons     1st     Cooze             3             70
5     Dragoons     1st     Jacon             4             25
6     Dragoons     2nd    Ryaner            24             94
7     Dragoons     2nd      Sone            31             57
8       Scouts     1st     Sloan             2             62
9       Scouts     1st     Piger             3             70
10      Scouts     2nd     Riani             2             62
11      Scouts     2nd       Ali             3             70

            count   mean        std  min   25%   50%    75%   max
regiment
Dragoons      4.0  15.50  14.153916  3.0  3.75  14.0  25.75  31.0
Nighthawks    4.0  15.25  14.453950  2.0  3.50  14.0  25.75  31.0
Scouts        4.0   2.50   0.577350  2.0  2.00   2.5   3.00   3.0

regiment
Dragoons      15.50
Nighthawks    15.25
Scouts         2.50
Name: preTestScore, dtype: float64

regiment    company
Dragoons    1st         3.5
            2nd        27.5
Nighthawks  1st        14.0
            2nd        16.5
Scouts      1st         2.5
            2nd         2.5
Name: preTestScore, dtype: float64

company      1st   2nd
regiment
Dragoons     3.5  27.5
Nighthawks  14.0  16.5
Scouts       2.5   2.5

                    preTestScore  postTestScore
regiment   company
Dragoons   1st               3.5           47.5
           2nd              27.5           75.5
Nighthawks 1st              14.0           59.5
           2nd              16.5           59.5
Scouts     1st               2.5           66.0
           2nd               2.5           66.0

regiment    company
Dragoons    1st        2
            2nd        2
Nighthawks  1st        2
            2nd        2
Scouts      1st        2
            2nd        2
dtype: int64

Dragoons

   regiment company    name  preTestScore  postTestScore
4  Dragoons     1st   Cooze             3             70
5  Dragoons     1st   Jacon             4             25
6  Dragoons     2nd  Ryaner            24             94
7  Dragoons     2nd    Sone            31             57

Nighthawks

     regiment company      name  preTestScore  postTestScore
0  Nighthawks     1st    Miller             4             25
1  Nighthawks     1st  Jacobson            24             94
2  Nighthawks     2nd       Ali            31             57
3  Nighthawks     2nd    Milner             2             62

Scouts

   regiment company   name  preTestScore  postTestScore
8    Scouts     1st  Sloan             2             62
9    Scouts     1st  Piger             3             70
10   Scouts     2nd  Riani             2             62
11   Scouts     2nd    Ali             3             70

[(dtype('int64'),     preTestScore  postTestScore
0              4             25
1             24             94
2             31             57
3              2             62
4              3             70
5              4             25
6             24             94
7             31             57
8              2             62
9              3             70
10             2             62
11             3             70), (dtype('O'),       regiment company      name
0   Nighthawks     1st    Miller
1   Nighthawks     1st  Jacobson
2   Nighthawks     2nd       Ali
3   Nighthawks     2nd    Milner
4     Dragoons     1st     Cooze
5     Dragoons     1st     Jacon
6     Dragoons     2nd    Ryaner
7     Dragoons     2nd      Sone
8       Scouts     1st     Sloan
9       Scouts     1st     Piger
10      Scouts     2nd     Riani
11      Scouts     2nd       Ali)]

            mean_preTestScore  mean_postTestScore
regiment
Dragoons                15.50                61.5
Nighthawks              15.25                59.5
Scouts                   2.50                66.0

            count   max   mean   min
categories
Low           2.0  25.0  25.00  25.0
Okay          0.0   NaN    NaN   NaN
Good          8.0  70.0  63.75  57.0
Great         2.0  94.0  94.00  94.0