问题
I need to build an RNN (without using nn.RNN) with following specifications :
It should have set of weights [
It is a chanracter RNN.
It should have 1 hidden layer
Wxh (from input layer to hidden layer )
Whh (from the recurrent connection in the hidden layer)
W ho (from hidden layer to output layer)
I need to use
Tanh
for hidden layerI need to use softmax for output layer.
I have implemented the code . I am using CrossEntropyLoss()
as loss function .
Which is giving me error as
RuntimeError Traceback (most recent call last)
<ipython-input-33-94b42540bc4f> in <module>()
25 print("target ",target_tensor[timestep])
26
---> 27 loss += criterion(output,target_tensor[timestep].view(1,n_vocab))
28
29 loss.backward()
/opt/anaconda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/opt/anaconda/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
145 _assert_no_grad(target)
146 return F.nll_loss(input, target, self.weight, self.size_average,
--> 147 self.ignore_index, self.reduce)
148
149
/opt/anaconda/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce)
1047 weight = Variable(weight)
1048 if dim == 2:
-> 1049 return torch._C._nn.nll_loss(input, target, weight, size_average, ignore_index, reduce)
1050 elif dim == 4:
1051 return torch._C._nn.nll_loss2d(input, target, weight, size_average, ignore_index, reduce)
RuntimeError: multi-target not supported at /opt/conda/conda-bld/pytorch_1513368888240/work/torch/lib/THNN/generic/ClassNLLCriterion.c:22
Here is my code for model :
class CharRNN(torch.nn.Module):
def __init__(self,input_size,hidden_size,output_size, n_layers = 1):
super(CharRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = 1
self.x2h_i = torch.nn.Linear(input_size + hidden_size, hidden_size)
self.x2h_f = torch.nn.Linear(input_size + hidden_size, hidden_size)
self.x2h_o = torch.nn.Linear(input_size + hidden_size, hidden_size)
self.x2h_q = torch.nn.Linear(input_size + hidden_size, hidden_size)
self.h2o = torch.nn.Linear(hidden_size, output_size)
self.sigmoid = torch.nn.Sigmoid()
self.softmax = torch.nn.Softmax()
self.tanh = torch.nn.Tanh()
def forward(self, input, h_t, c_t):
combined_input = torch.cat((input,h_t),1)
i_t = self.sigmoid(self.x2h_i(combined_input))
f_t = self.sigmoid(self.x2h_f(combined_input))
o_t = self.sigmoid(self.x2h_o(combined_input))
q_t = self.tanh(self.x2h_q(combined_input))
c_t_next = f_t*c_t + i_t*q_t
h_t_next = o_t*self.tanh(c_t_next)
output = self.softmax(h_t_next)
return output, h_t, c_t
def initHidden(self):
return torch.autograd.Variable(torch.zeros(1, self.hidden_size))
def weights_init(self,model):
classname = model.__class__.__name__
if classname.find('Linear') != -1:
model.weight.data.normal_(0.0, 0.02)
model.bias.data.fill_(0)
`
and this is the code for training the model :
`
input_tensor = torch.autograd.Variable(torch.zeros(seq_length,n_vocab))
target_tensor = torch.autograd.Variable(torch.zeros(seq_length,n_vocab))
model = CharRNN(input_size = n_vocab, hidden_size = hidden_size, output_size = output_size)
model.apply(model.weights_init)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for i in range(n_epochs):
print("Iteration", i)
start_idx = np.random.randint(0, n_chars-seq_length-1)
train_data = raw_text[start_idx:start_idx + seq_length + 1]
input_tensor = torch.autograd.Variable(seq2tensor(train_data[:-1],n_vocab), requires_grad = True)
target_tensor= torch.autograd.Variable(seq2tensor(train_data[1:],n_vocab), requires_grad = False).long()
loss = 0
h_t = torch.autograd.Variable(torch.zeros(1,hidden_size))
c_t = torch.autograd.Variable(torch.zeros(1,hidden_size))
for timestep in range(seq_length):
output, h_t, c_t = model(input_tensor[timestep].view(1,n_vocab), h_t, c_t)
loss += criterion(output,target_tensor[timestep].view(1,n_vocab))
loss.backward()
optimizer.step()
optimizer.zero_grad()
x_t = input_tensor[0].view(1,n_vocab)
h_t = torch.autograd.Variable(torch.zeros(1,hidden_size))
c_t = torch.autograd.Variable(torch.zeros(1,hidden_size))
gen_seq = []
for timestep in range(100):
output, h_t, c_t = model(x_t, h_t, c_t)
ix = np.random.choice(range(n_vocab), p=output.data.numpy().ravel())
x_t = torch.autograd.Variable(torch.zeros(1,n_vocab))
x_t[0,ix] = 1
gen_seq.append(idx2char[ix])
txt = ''.join(gen_seq)
print ('----------------------')
print (txt)
print ('----------------------')
Can you please help me ?
Thanks in advance.
回答1:
The problem is with your target tensor. It is of shape 1, n_classes
, a 2D tensor, but CrossEntropyLoss expects a 1D tensor.
Or stated in other terms, you are providing a one-hot encoded target tensor, but the loss function is expecting class number from 0
to n_classes-1
. Change your loss calculation to -
one_hot_target = target_tensor[timestep].view(1,n_vocab)
_, class_target = torch.max(one_hot_target, dim=1)
loss += criterion(output, class_target)
来源:https://stackoverflow.com/questions/49987673/how-can-i-build-an-rnn-without-using-nn-rnn