I want to resample my dataset. This consists in categorical transformed data with labels of 3 classes. The amount of samples per class are:
I will create each point for a 2-dim array and then reshape it as 3 dim array. I have provided my scripts. If there is any confusion, comment; please reply.
x_train, y_train = zip(*train_dataset)
x_test, y_test = zip(*test_dataset)
dim_1 = np.array(x_train).shape[0]
dim_2 = np.array(x_train).shape[1]
dim_3 = np.array(x_train).shape[2]
new_dim = dim_1 * dim_2
new_x_train = np.array(x_train).reshape(new_dim, dim_3)
new_y_train = []
for i in range(len(y_train)):
# print(y_train[i])
new_y_train.extend([y_train[i]]*dim_2)
new_y_train = np.array(new_y_train)
# transform the dataset
oversample = SMOTE()
X_Train, Y_Train = oversample.fit_sample(new_x_train, new_y_train)
# summarize the new class distribution
counter = Counter(Y_Train)
print('The number of samples in TRAIN: ', counter)
x_train_SMOTE = X_Train.reshape(int(X_Train.shape[0]/dim_2), dim_2, dim_3)
y_train_SMOTE = []
for i in range(int(X_Train.shape[0]/dim_2)):
# print(i)
value_list = list(Y_Train.reshape(int(X_Train.shape[0]/dim_2), dim_2)[i])
# print(list(set(value_list)))
y_train_SMOTE.extend(list(set(value_list)))
## Check: if there is any different value in a list
if len(set(value_list)) != 1:
print('\n\n********* STOP: THERE IS SOMETHING WRONG IN TRAIN ******\n\n')
dim_1 = np.array(x_test).shape[0]
dim_2 = np.array(x_test).shape[1]
dim_3 = np.array(x_test).shape[2]
new_dim = dim_1 * dim_2
new_x_test = np.array(x_test).reshape(new_dim, dim_3)
new_y_test = []
for i in range(len(y_test)):
# print(y_train[i])
new_y_test.extend([y_test[i]]*dim_2)
new_y_test = np.array(new_y_test)
# transform the dataset
oversample = SMOTE()
X_Test, Y_Test = oversample.fit_sample(new_x_test, new_y_test)
# summarize the new class distribution
counter = Counter(Y_Test)
print('The number of samples in TEST: ', counter)
x_test_SMOTE = X_Test.reshape(int(X_Test.shape[0]/dim_2), dim_2, dim_3)
y_test_SMOTE = []
for i in range(int(X_Test.shape[0]/dim_2)):
# print(i)
value_list = list(Y_Test.reshape(int(X_Test.shape[0]/dim_2), dim_2)[i])
# print(list(set(value_list)))
y_test_SMOTE.extend(list(set(value_list)))
## Check: if there is any different value in a list
if len(set(value_list)) != 1:
print('\n\n********* STOP: THERE IS SOMETHING WRONG IN TEST ******\n\n')