How to deal with imbalance classes with upsampling in Python?
0

How to deal with imbalance classes with upsampling in Python?

This recipe helps you deal with imbalance classes with upsampling in Python
In [1]:
## How to deal with imbalance classes with upsampling in Python 
def Kickstarter_Example_33():
    print()
    print(format('How to deal with imbalance classes with upsampling in Python', '*^82'))

    import warnings
    warnings.filterwarnings("ignore")

    # Load libraries
    import numpy as np
    from sklearn.datasets import load_iris

    # Load iris data
    iris = load_iris()

    # Create feature matrix
    X = iris.data

    # Create target vector
    y = iris.target

    # Make Iris Dataset Imbalanced # Remove first 40 observations
    X = X[40:,:]
    y = y[40:]

    # Create binary target vector indicating if class 0
    y = np.where((y == 0), 0, 1)

    # Look at the imbalanced target vector
    print(); print("Look at the imbalanced target vector:\n", y)

    # Downsample Majority Class To Match Minority Class
    # Indicies of each class' observations
    i_class0 = np.where(y == 0)[0]
    i_class1 = np.where(y == 1)[0]

    # Number of observations in each class
    n_class0 = len(i_class0); print(); print("n_class0: ", n_class0)
    n_class1 = len(i_class1); print(); print("n_class1: ", n_class1)

    # For every observation of class 1, randomly sample from class 0 with replacement
    i_class0_upsampled = np.random.choice(i_class0, size=n_class1, replace=True)

    # Join together class 1's target vector with the upsampled class 0's target vector
    print(); print(np.hstack((y[i_class0_upsampled], y[i_class1])))

Kickstarter_Example_33()
***********How to deal with imbalance classes with upsampling in Python***********

Look at the imbalanced target vector:
 [0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

n_class0:  10

n_class1:  100

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]