def evaluate()

in tensorflow_examples/lite/model_maker/core/task/model_spec/text_spec.py [0:0]


  def evaluate(self, model, tflite_filepath, dataset, num_steps, eval_examples,
               eval_features, predict_file, version_2_with_negative,
               max_answer_length, null_score_diff_threshold, verbose_logging,
               output_dir):
    """Evaluate QA model.

    Args:
      model: The keras model to be evaluated.
      tflite_filepath: File path to the TFLite model.
      dataset: tf.data.Dataset used for evaluation.
      num_steps: Number of steps to evaluate the model.
      eval_examples: List of `squad_lib.SquadExample` for evaluation data.
      eval_features: List of `squad_lib.InputFeatures` for evaluation data.
      predict_file: The input predict file.
      version_2_with_negative: Whether the input predict file is SQuAD 2.0
        format.
      max_answer_length: The maximum length of an answer that can be generated.
        This is needed because the start and end predictions are not conditioned
        on one another.
      null_score_diff_threshold: If null_score - best_non_null is greater than
        the threshold, predict null. This is only used for SQuAD v2.
      verbose_logging: If true, all of the warnings related to data processing
        will be printed. A number of warnings are expected for a normal SQuAD
        evaluation.
      output_dir: The output directory to save output to json files:
        predictions.json, nbest_predictions.json, null_odds.json. If None, skip
        saving to json files.

    Returns:
      A dict contains two metrics: Exact match rate and F1 score.
    """
    if model is not None and tflite_filepath is not None:
      raise ValueError('Exactly one of the paramaters `model` and '
                       '`tflite_filepath` should be set.')
    elif model is None and tflite_filepath is None:
      raise ValueError('At least one of the parameters `model` and '
                       '`tflite_filepath` are None.')

    if tflite_filepath is not None:
      all_results = self.predict_tflite(tflite_filepath, dataset)
    else:
      all_results = self.predict(model, dataset, num_steps)

    all_predictions, all_nbest_json, scores_diff_json = (
        squad_lib.postprocess_output(
            eval_examples,
            eval_features,
            all_results,
            n_best_size=20,
            max_answer_length=max_answer_length,
            do_lower_case=self.do_lower_case,
            version_2_with_negative=version_2_with_negative,
            null_score_diff_threshold=null_score_diff_threshold,
            verbose=verbose_logging))

    if output_dir is not None:
      dump_to_files(all_predictions, all_nbest_json, scores_diff_json,
                    version_2_with_negative, output_dir)

    dataset_json = file_util.load_json_file(predict_file)
    pred_dataset = dataset_json['data']

    if version_2_with_negative:
      eval_metrics = squad_evaluate_v2_0.evaluate(pred_dataset, all_predictions,
                                                  scores_diff_json)
    else:
      eval_metrics = squad_evaluate_v1_1.evaluate(pred_dataset, all_predictions)
    return eval_metrics