in LearningMachine.py [0:0]
def evaluate(self, data, length, target, input_types, evaluator,
loss_fn, pad_ids=None, cur_best_result=None, model_save_path=None, phase="", epoch=None, origin_data_path=None, predict_output_path=None):
"""
Args:
qp_net:
epoch:
data:
{
'string1': {
'word1': [...],
'postage_feature1': [..]
}
'string2': {
'word1': [...],
'postage_feature1': [..]
}
lengths:
{
'string1': [...],
'string2': [...]
}
target: [...]
input_types: {
"word": {
"cols": [
"word1",
"word2"
],
"dim": 300
},
"postag": {
"cols": ["postag_feature1", "postag_feature2"],
"dim": 20
}
origin_data_path:
predict_output_path: if predict_output_path exists, output the prediction result.
Returns:
"""
assert not (predict_output_path and not origin_data_path)
if predict_output_path:
to_predict = True
else:
to_predict = False
logging.info("Starting %s ..." % phase)
self.model.eval()
with torch.no_grad():
data_batches, length_batches, target_batches = \
get_batches(self.problem, data, length, target, self.conf.batch_size_total, input_types, pad_ids, permutate=False, transform_tensor=True)
if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
streaming_recoder = StreamingRecorder(['prediction', 'pred_scores', 'pred_scores_all', 'target'])
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
streaming_recoder = StreamingRecorder(['prediction', 'pred_scores', 'target'])
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
streaming_recoder = StreamingRecorder(['prediction', 'target'])
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
streaming_recoder = StreamingRecorder(['prediction', 'answer_text'])
if to_predict:
predict_stream_recoder = StreamingRecorder(self.conf.predict_fields)
fin = open(origin_data_path, 'r', encoding='utf-8')
if predict_output_path.startswith('/hdfs/'):
direct_hdfs_path = convert_to_hdfspath(predict_output_path)
local_tmp_path = convert_to_tmppath(predict_output_path)
fout = open(local_tmp_path, 'w', encoding='utf-8')
else:
direct_hdfs_path = None
fout = open(predict_output_path, 'w', encoding='utf-8')
if self.conf.file_with_col_header:
title_line = fin.readline()
fout.write(title_line)
temp_key_list = list(length_batches[0].keys())
if 'target' in temp_key_list:
temp_key_list.remove('target')
key_random = random.choice(temp_key_list)
loss_recoder = StreamingRecorder(['loss'])
if self.conf.mode == 'normal':
progress = tqdm(range(len(target_batches)))
elif self.conf.mode == 'philly':
progress = range(len(target_batches))
for i in progress:
# batch_size_actual = target_batches[i].size(0)
param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
logits = self.model(inputs_desc, length_desc, *param_list)
logits_softmax = {}
if isinstance(self.model, nn.DataParallel):
for tmp_output_layer_id in self.model.module.output_layer_id:
if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
(not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
logits[tmp_output_layer_id], dim=-1)
else:
logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
else:
for tmp_output_layer_id in self.model.output_layer_id:
if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
(not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
logits[tmp_output_layer_id], dim=-1)
else:
logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
logits = list(logits.values())[0]
logits_softmax = list(logits_softmax.values())[0]
# for auc metric
prediction_pos_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
if self.evaluator.has_auc_type_specific:
prediction_scores_all = logits_softmax.cpu().data.numpy()
else:
prediction_scores_all = None
else:
prediction_pos_scores = None
prediction_scores_all = None
logits_flat = {}
if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
logits = list(logits.values())[0]
if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
prediction_indices = tag_seq.cpu().numpy()
streaming_recoder.record_one_row(
[self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
prediction_pos_scores,
self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
keep_dim=False)
else:
logits_softmax = list(logits_softmax.values())[0]
# Transform output shapes for metric evaluation
# for seq_tag_f1 metric
prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
# pytorch's CrossEntropyLoss only support this
logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2)) # [batch_size * seq_len, # of tags]
streaming_recoder.record_one_row(
[self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
prediction_pos_scores,
self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
keep_dim=False)
target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][
self.conf.answer_column_name[0]].reshape(-1) # [batch_size * seq_len]
if to_predict:
prediction_batch = self.problem.decode(prediction_indices, length_batches[i][key_random].numpy())
for prediction_sample in prediction_batch:
predict_stream_recoder.record('prediction', " ".join(prediction_sample))
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
prediction_indices = logits_softmax.data.max(1)[1].cpu().numpy()
# Should not decode!
streaming_recoder.record_one_row([prediction_indices, prediction_pos_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
logits_flat[self.conf.output_layer_id[0]] = logits
if to_predict:
for field in self.conf.predict_fields:
if field == 'prediction':
predict_stream_recoder.record(field, self.problem.decode(prediction_indices, length_batches[i][key_random].numpy()))
elif field == 'confidence':
prediction_scores = logits_softmax.cpu().data.numpy()
for prediction_score, prediction_idx in zip(prediction_scores, prediction_indices):
predict_stream_recoder.record(field, prediction_score[prediction_idx])
elif field.startswith('confidence') and field.find('@') != -1:
label_specified = field.split('@')[1]
label_specified_idx = self.problem.output_dict.id(label_specified)
confidence_specified = torch.index_select(logits_softmax.cpu(), 1, torch.tensor([label_specified_idx], dtype=torch.long)).squeeze(1)
predict_stream_recoder.record(field, confidence_specified.data.numpy())
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
logits = list(logits.values())[0]
# logits_softmax is unuseful for regression task!
logits_softmax = list(logits_softmax.values())[0]
temp_logits_flat = logits.squeeze(1)
prediction_scores = temp_logits_flat.detach().cpu().numpy()
streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
logits_flat[self.conf.output_layer_id[0]] = temp_logits_flat
if to_predict:
predict_stream_recoder.record_one_row([prediction_scores])
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
for key, value in logits.items():
logits[key] = value.squeeze()
for key, value in logits_softmax.items():
logits_softmax[key] = value.squeeze()
passage_identify = None
for type_key in data_batches[i].keys():
if 'p' in type_key.lower():
passage_identify = type_key
break
if not passage_identify:
raise Exception('MRC task need passage information.')
prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
batch_data=data_batches[i][passage_identify])
logits_flat = logits
mrc_answer_target = None
for single_target in target_batches[i]:
if isinstance(target_batches[i][single_target][0], str):
mrc_answer_target = target_batches[i][single_target]
streaming_recoder.record_one_row([prediction, mrc_answer_target])
if to_predict:
predict_stream_recoder.record_one_row([prediction])
if to_predict:
if ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
logits_len = len(list(logits.values())[0])
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging and isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
# for sequence_tagging task, logits is tuple type which index 3 is tag_seq [batch_size*seq_len]
logits_len = logits[3].size(0)
else:
logits_len = len(logits)
for sample_idx in range(logits_len):
while True:
sample = fin.readline().rstrip()
line_split = list(filter(lambda x: len(x) > 0, sample.rstrip().split('\t')))
if self.problem.file_column_num is None or len(line_split) == self.problem.file_column_num:
break
fout.write("%s\t%s\n" % (sample,
"\t".join([str(predict_stream_recoder.get(field)[sample_idx]) for field in self.conf.predict_fields])))
predict_stream_recoder.clear_records()
if self.use_gpu:
for single_target in self.conf.answer_column_name:
if isinstance(target_batches[i][single_target], torch.Tensor):
target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
if isinstance(loss_fn.loss_fn[0], CRFLoss):
loss = loss_fn.loss_fn[0](forward_score, scores, masks, list(target_batches[i].values())[0], transitions, layer_conf)
else:
loss = loss_fn(logits_flat, target_batches[i])
loss_recoder.record('loss', loss.item())
del loss, logits, logits_softmax, logits_flat
del prediction_pos_scores
if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
del prediction_indices
del data_batches, length_batches, target_batches
if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
result = self.evaluator.evaluate(streaming_recoder.get('target'), streaming_recoder.get('prediction'),
y_pred_pos_score=streaming_recoder.get('pred_scores'),
y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
result = self.evaluator.evaluate(streaming_recoder.get('target'), streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'), formatting=True)
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
result = self.evaluator.evaluate(streaming_recoder.get('target'), streaming_recoder.get('prediction'), y_pred_pos_score=None, formatting=True)
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
if epoch:
logging.info("Epoch %d, %s %s loss: %f" % (epoch, phase, result, loss_recoder.get('loss', 'mean')))
else:
logging.info("%s %s loss: %f" % (phase, result, loss_recoder.get('loss', 'mean')))
if phase == 'valid':
cur_result = evaluator.get_first_metric_result()
if self.evaluator.compare(cur_result, cur_best_result) == 1:
logging.info(
'Cur result %f is better than previous best result %s, renew the best model now...' % (cur_result, "%f" % cur_best_result if cur_best_result else "None"))
if model_save_path is not None:
if self.conf.mode == 'philly' and model_save_path.startswith('/hdfs/'):
with HDFSDirectTransferer(model_save_path, with_hdfs_command=True) as transferer:
if isinstance(self.model, nn.DataParallel):
transferer.torch_save(self.model.module)
else:
transferer.torch_save(self.model)
else:
if not os.path.exists(os.path.dirname(model_save_path)):
os.makedirs(os.path.dirname(model_save_path))
if isinstance(self.model, nn.DataParallel):
torch.save(self.model.module, model_save_path, pickle_protocol=pkl.HIGHEST_PROTOCOL)
else:
torch.save(self.model, model_save_path, pickle_protocol=pkl.HIGHEST_PROTOCOL)
logging.info("Best model saved to %s" % model_save_path)
cur_best_result = cur_result
else:
logging.info('Cur result %f is no better than previous best result %f' % (cur_result, cur_best_result))
if to_predict:
fin.close()
fout.close()
if direct_hdfs_path:
move_from_local_to_hdfs(local_tmp_path, direct_hdfs_path)
return cur_best_result