in nmt/train.py [0:0]
def run_internal_and_external_eval(model_dir,
infer_model,
infer_sess,
eval_model,
eval_sess,
hparams,
summary_writer,
avg_ckpts=False,
dev_eval_iterator_feed_dict=None,
test_eval_iterator_feed_dict=None,
dev_infer_iterator_feed_dict=None,
test_infer_iterator_feed_dict=None):
"""Compute internal evaluation (perplexity) for both dev / test.
Computes development and testing perplexities for given model.
Args:
model_dir: Directory from which to load models from.
infer_model: Inference model for which to compute perplexities.
infer_sess: Inference TensorFlow session.
eval_model: Evaluation model for which to compute perplexities.
eval_sess: Evaluation TensorFlow session.
hparams: Model hyper-parameters.
summary_writer: Summary writer for logging metrics to TensorBoard.
avg_ckpts: Whether to compute average external evaluation scores.
dev_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session.
Can be used to pass in additional inputs necessary for running the
internal development evaluation.
test_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session.
Can be used to pass in additional inputs necessary for running the
internal testing evaluation.
dev_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session.
Can be used to pass in additional inputs necessary for running the
external development evaluation.
test_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session.
Can be used to pass in additional inputs necessary for running the
external testing evaluation.
Returns:
Triple containing results summary, global step Tensorflow Variable and
metrics in this order.
"""
dev_ppl, test_ppl = run_internal_eval(
eval_model,
eval_sess,
model_dir,
hparams,
summary_writer,
dev_eval_iterator_feed_dict=dev_eval_iterator_feed_dict,
test_eval_iterator_feed_dict=test_eval_iterator_feed_dict)
dev_scores, test_scores, global_step = run_external_eval(
infer_model,
infer_sess,
model_dir,
hparams,
summary_writer,
dev_infer_iterator_feed_dict=dev_infer_iterator_feed_dict,
test_infer_iterator_feed_dict=test_infer_iterator_feed_dict)
metrics = {
"dev_ppl": dev_ppl,
"test_ppl": test_ppl,
"dev_scores": dev_scores,
"test_scores": test_scores,
}
avg_dev_scores, avg_test_scores = None, None
if avg_ckpts:
avg_dev_scores, avg_test_scores = run_avg_external_eval(
infer_model, infer_sess, model_dir, hparams, summary_writer,
global_step)
metrics["avg_dev_scores"] = avg_dev_scores
metrics["avg_test_scores"] = avg_test_scores
result_summary = _format_results("dev", dev_ppl, dev_scores, hparams.metrics)
if avg_dev_scores:
result_summary += ", " + _format_results("avg_dev", None, avg_dev_scores,
hparams.metrics)
if hparams.test_prefix:
result_summary += ", " + _format_results("test", test_ppl, test_scores,
hparams.metrics)
if avg_test_scores:
result_summary += ", " + _format_results("avg_test", None,
avg_test_scores, hparams.metrics)
return result_summary, global_step, metrics