from sklearn.utils import resample df_maj = df[df.SEVERITYCODE == 1] df_min = df[df.SEVERITYCODE == 2] df_majdown = resample(df_maj, replace = False, n_samples = 57