I get an error implementing a DNNClassifier in Tensorflow 1.3.0 with Python 2.7. I got the sample code from the Tensorflow tf.estimator Quickstart
Tutorial and I want to run it with my own dataset: 3D coordinates and 10 different classes (int labels). Here is my implementation:
#!/usr/bin/env python # -*- coding: utf-8 -*- def ReadLabels(file): #load the labels from test file here labelFile = open(file, "r") Label = labelFile.readlines(); returnL = [[Label[i][j+1] for j in range(len(Label[0])-3)] for i in range(len(Label))] returnLint = list(); for i in range(len(returnL)): tmp = '' for j in range(len(returnL[0])): tmp += str(returnL[i][j]) returnLint.append(int(tmp)) return returnL, returnLint def NumpyReadBin(file,numcols,type): #load the data from binary file here import numpy as np trainData = np.fromfile(file,dtype=type) numrows = len(trainData)/numcols #print trainData[0:100] result = [[trainData[i+j*numcols] for i in range(numcols)] for j in range(numrows)] return result def TensorflowDNN(): #load sample dataset trainData = NumpyReadBin('data/TrainingData.dat',3,'float32') valData = NumpyReadBin('data/ValidationData.dat',3,'float32') testData = NumpyReadBin('data/TestingData.dat',3,'float32') #load sample labels trainL, trainLint = ReadLabels('data/TrainingLabels.txt') validateL, validateLint = ReadLabels('data/ValidationLabels.txt') testL, testLint = ReadLabels('data/TestingLabels.txt') import tensorflow as tf import numpy as np #get unique labels uniqueTrain = set() for l in trainLint: uniqueTrain.add(l) uniqueTrain = list(uniqueTrain) numClasses = len(uniqueTrain) numDims = len(trainData[0]) #All features have real-value data feature_columns = [tf.feature_column.numeric_column("x", shape=[3])] # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=numClasses, model_dir="../Classification/tmp") # Define training inputs train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": np.array(trainData)},y=np.array(trainLint), num_epochs = None, shuffle = True) #Train the model classifier.train(input_fn = train_input_fn, steps = 2000) #Define Validation inputs val_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": np.array(valData)},y=np.array(validateLint), num_epochs = 1, shuffle = False) # Evaluate accuracy. accuracy_score = classifier.evaluate(input_fn=val_input_fn)["accuracy"] print("\nTest Accuracy: {0:f}\n".format(accuracy_score)) if __name__ == '__main__': TensorflowDNN()
The Functions RedLabels(...)
and NumpyReadBin(...)
are loading my saved dataset in tensors. Since the labels are integer numbers that I read from a text file the function is a bit weird, but what I get in the end is an array with integers from tese labels: [11, 12, 21, 22, 23, 31, 32, 33, 41, 42].
However I am not able to classify anything, because upon calling classifier.train(input_fn = train_input_fn, steps = 2000)
, I get the following error:
...Traceback and stuff like that... InvalidArgumentError (see above for traceback): assertion failed: [Label IDs must < n_classes] [Condition x < y did not hold element-wise:x (dnn/head/labels:0) = ] [[21][32][42]...] [y (dnn/head/assert_range/Const:0) = ] [10] [[Node: dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert = Assert[T=[DT_STRING, DT_STRING, DT_INT64, DT_STRING, DT_INT64], summarize=3, _device="/job:localhost/replica:0/task:0/cpu:0"](dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/Switch/_117, dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/data_0, dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/data_1, dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/Switch_1/_119, dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/data_3, dnn/head/assert_range/assert_less/Assert/AssertGuard/Assert/Switch_2/_121)]]
Did anyone come across this error before or has an idea how to solve it? I guess it is somehow complaining about the number of classes/format of labels in my dataset, but I know that trainLint contains 10 different classlabels, and that is the value of numClasses
. Could it be the format of my trainLint
array?