问题
I am getting error when trying to run BERT model for NER task. "CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 3.82 GiB total capacity; 2.58 GiB already allocated; 25.38 MiB free; 6.33 MiB cached)I have also tried reducing batch size to 1c
enter code here
epochs = 10
max_grad_norm = 1.0
for _ in trange(epochs, desc="Epoch"):
# TRAIN loop
model.train()
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
for step, batch in enumerate(train_dataloader):
# add batch to gpu
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
# forward pass
loss = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
# backward pass
loss.backward()
# track train loss
tr_loss += loss.item()
nb_tr_examples += b_input_ids.size(0)
nb_tr_steps += 1
# gradient clipping
torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
# update parameters
optimizer.step()
model.zero_grad()
# print train loss per epoch
#print("Train loss: {}".format(tr_loss/nb_tr_steps))
# VALIDATION on validation set
model.eval()
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
predictions , true_labels, true_inputs = [], [],[]
for batch in valid_dataloader:
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
with torch.no_grad():
tmp_eval_loss = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
logits = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask)
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
inputs = b_input_ids.to('cpu').numpy()
true_inputs.append(inputs)
predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
true_labels.append(label_ids)
tmp_eval_accuracy = flat_accuracy(logits, label_ids)
eval_loss += tmp_eval_loss.mean().item()
eval_accuracy += tmp_eval_accuracy
nb_eval_examples += b_input_ids.size(0)
nb_eval_steps += 1
eval_loss = eval_loss/nb_eval_steps
pred_tags = [tags_vals[p_i] for p in predictions for p_i in p]
valid_tags = [tags_vals[l_ii] for l in true_labels for l_i in l for l_ii in l_i]
valid_inputs = [[idx2word[l_ii] for l_ii in l_i] for l in true_inputs for l_i in l ]
print("F1-Score: {}".format(f1_score(pred_tags, valid_tags)))
print("Validation loss: {}".format(eval_loss))
print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
Attached is the output of nvidia-smi:-
nvidia-smi
来源:https://stackoverflow.com/questions/60926878/cuda-out-of-memory