How to do string munging in Pandas?
0

How to do string munging in Pandas?

This recipe helps you do string munging in Pandas
In [1]:
## How to do string munging in Pandas
def Snippet_110():
    print()
    print(format('How to do string munging in Pandas','*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    import pandas as pd
    import numpy as np
    import re as re

    # Create dataframe
    raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
                'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
                'email': ['jas203@gmail.com', 'momomolly@gmail.com', np.NAN,
                          'battler@milner.com', 'Ames1234@yahoo.com'],
                'preTestScore': [4, 24, 31, 2, 3],
                'postTestScore': [25, 94, 57, 62, 70]}

    df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'email',
                                           'preTestScore', 'postTestScore'])
    print(); print(df)

    # Which strings in the email column contains ‘gmail’
    print(); print(df['email'].str.contains('gmail'))

    # Create a regular expression pattern that breaks apart emails
    pattern = '([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'

    # Find everything in df.email that contains that pattern
    print(); print(df['email'].str.findall(pattern, flags=re.IGNORECASE))

    # Create a pandas series containing the email elements
    matches = df['email'].str.match(pattern, flags=re.IGNORECASE)
    print(); print(matches)

Snippet_110()
************************How to do string munging in Pandas************************

  first_name last_name                email  preTestScore  postTestScore
0      Jason    Miller     jas203@gmail.com             4             25
1      Molly  Jacobson  momomolly@gmail.com            24             94
2       Tina       Ali                  NaN            31             57
3       Jake    Milner   battler@milner.com             2             62
4        Amy     Cooze   Ames1234@yahoo.com             3             70

0     True
1     True
2      NaN
3    False
4    False
Name: email, dtype: object

0       [(jas203, gmail, com)]
1    [(momomolly, gmail, com)]
2                          NaN
3     [(battler, milner, com)]
4     [(Ames1234, yahoo, com)]
Name: email, dtype: object

0    True
1    True
2     NaN
3    True
4    True
Name: email, dtype: object