问题
I am trying to train a Tensorflow Convolutional Neural Network, and I am always getting a cryptic error regardless of the environment in which i run the program.
In Jupyter Notebook, the kernel simply dies.
In Terminal, I get "Illegal Instruction: 4" with no Traceback.
In Pycharm, I get: "Process finished with exit code 132 (interrupted by signal 4: SIGILL)".
I have looked all over the Internet and I have not found any instance in which this particular error was thrown in this situation. I would appreciate it if someone could help shed some light on this error.
I am using Mac OS X High Sierra with python 3.6.2
My code can be found below and, as I said earlier, there is no traceback.
import tensorflow as tf
import numpy as np
import pandas as pd
# OS to load files and save checkpoints
import os
image_height = 60
image_width = 1
image1_height = 15
image2_width = 1
model_name = "tensorflowCNN"
train_data = np.asarray(pd.read_csv("/home/student/Desktop/TrainingInput.csv", usecols=[1]))
lis = train_data.tolist()
lis = lis[0:60]
lis = [x[0].strip('[]\n,') for x in lis]
nlis = []
for i in lis:
nlis.append(i.split())
for i in range(len(nlis)):
nlis[i] = [float(x) for x in nlis[i] if x != "...,"]
nlis = [np.mean(x) for x in nlis]
train_data = np.asarray(nlis)
train_labels = np.asarray(pd.read_csv("/home/student/Desktop/TrainingInput.csv", usecols=[2]))
mylist = train_labels.tolist()
mylist = mylist[0:60]
mylist = [x[0] for x in mylist]
for i in range(len(mylist)):
if mylist[i] == "GravelTraining":
mylist[i] = 1.0
elif mylist[i] == "WaterTraining":
mylist[i] = 2.0
else:
mylist[i] = 3.0
print(mylist)
train_labels = np.asarray(mylist)
eval_data = np.asarray(pd.read_csv("/home/student/Desktop/TestingInput.csv", usecols=[1]))
List = eval_data.tolist()
List = List[0:15]
eval_data = np.asarray(List)
eval_labels = np.asarray(pd.read_csv("/home/student/Desktop/TestingInput.csv", usecols=[2]))
myList = eval_labels.tolist()
myList = myList[0:15]
eval_labels = np.asarray(myList)
category_names = list(map(str, range(3)))
# TODO: Process mnist data
train_data = np.reshape(train_data, (-1, image_height, image_width, 1))
train_labels = np.reshape(train_labels, (-1, image_height, image_width, 1))
eval_labels = np.reshape(eval_labels, (-1, image1_height, image2_width, 1))
eval_data = np.reshape(eval_data, (-1, image1_height, image2_width, 1))
# TODO: The neural network
class ConvNet:
def __init__(self, image_height, Image_width, num_classes, chan):
self.input_layer = tf.placeholder(dtype=tf.float32, shape=[1, image_height, Image_width, chan], name="inputs")
conv_layer_1 = tf.layers.conv2d(self.input_layer, filters=32, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
pooling_layer_1 = tf.layers.max_pooling2d(conv_layer_1, pool_size=[2, 1], strides=1)
conv_layer_2 = tf.layers.conv2d(pooling_layer_1, filters=64, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
pooling_layer_2 = tf.layers.max_pooling2d(conv_layer_2, pool_size=[2,1], strides=2)
conv_layer_3 = tf.layers.conv2d(pooling_layer_2, filters=128, kernel_size=[5,5], padding="same",
activation=tf.nn.relu)
pooling_layer_3 = tf.layers.max_pooling2d(conv_layer_3, pool_size=[2,1], strides=2)
flattened_pooling = tf.layers.flatten(pooling_layer_1)
dense_layer = tf.layers.dense(flattened_pooling, 60, activation=tf.nn.relu)
dropout = tf.layers.dropout(dense_layer, rate=0.4, training=True)
output_dense_layer = tf.layers.dense(dropout, num_classes)
self.choice = tf.argmax(output_dense_layer, axis=1)
self.probabilities = tf.nn.softmax(output_dense_layer)
self.labels = tf.placeholder(dtype=tf.float32, name="labels")
self.accuracy, self.accuracy_op = tf.metrics.accuracy(self.labels, self.choice)
one_hot_labels = tf.one_hot(indices=tf.cast(self.labels, dtype=tf.int32), depth=num_classes)
self.loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=output_dense_layer)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
self.train_operation = optimizer.minimize(loss=self.loss, global_step=tf.train.get_global_step())
# Training process:variables
training_steps = 20000
batch_size = 60
path = "./" + model_name + "-cnn/"
load_checkpoint = False
tf.reset_default_graph()
dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
dataset = dataset.shuffle(buffer_size=train_labels.shape[0])
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
dataset_iterator = dataset.make_initializable_iterator()
next_element = dataset_iterator.get_next()
cnn = ConvNet(image_height, image_width, 1, 1)
print("milestone1")
saver = tf.train.Saver(max_to_keep=2)
print('milestone2')
if not os.path.exists(path):
os.makedirs(path)
print('milestone3')
with tf.Session() as sess:
# if load_checkpoint:
# print(path)
# checkpoint = tf.train.get_checkpoint_state(path)
# print(checkpoint)
# saver.restore(sess, checkpoint.model_checkpoint_path)
# else:
sess.run(tf.global_variables_initializer())
print('milestone4')
sess.run(tf.local_variables_initializer())
sess.run(dataset_iterator.initializer)
for step in range(training_steps):
current_batch = sess.run(next_element)
batch_inputs = current_batch[0]
batch_labels = current_batch[1]
sess.run((cnn.train_operation, cnn.accuracy_op),
feed_dict={cnn.input_layer: batch_inputs, cnn.labels: batch_labels})
if step % 1 == 0 and step > 0:
current_acc = sess.run(cnn.accuracy)
print("Accuracy at step " + str(step) + ":" + str(current_acc))
saver.save(sess, path + model_name, step)
print("Saving final checkpoint for training session.")
saver.save(sess, path + model_name, step)
Thanks in advance.
回答1:
OK If you have the 2.66 GHz version that seems to me to be the the Arrendale architecture released 2010 in which case there is absolutely no chance it's going to work since that thing doesn't have AVX instructions as needed by the latest binaries of tensor flow.
Unless your CPU is Sandy Bridge or newer (so AVX instructions)
your options are:
1) get a newer CPU
2) install an older version of tensor flow
3) compile tensor flow from source
for downgrade version see.
Illegal instruction(core dumped) tensorflow
Illegal instruction when import tensorflow in Python
EDIT
It seems that although all Core (i3, i5, i7) and Xeon CPU Sandy Bridge and newer support AVX, this is not true of Celron, nor Pentium CPU's to this day, as of 2018. Check what the machine contains if buying hardware.
来源:https://stackoverflow.com/questions/51599488/illegal-instruction-4-error-when-running-any-tensorflow-program