def select_models()

in scripts/recordify.py [0:0]


def select_models(all_records, predictive_metric, verbose=True):
  """
  Select top-K algorithm seeds corresponding to the best predictive metrics.
  """
  evaluation_records = all_records['evaluation']
  # Selection based on evaluation records
  df = pd.DataFrame.from_records(evaluation_records)
  # HACK(Ishmael): Train epoch is awfully slow. So ensembles are "trained" for
  # only one epoch. We set the epoch counter for ensemble models to that of non
  # ensembles models
  # --- Hack starts
  if 'ensemble.name' in df.columns:
    ensembles_idx = df['ensemble.name'].isin(['Bagging'])
    max_epoch = df[~ensembles_idx]['epoch'].max()
    df.epoch[ensembles_idx] = max_epoch
    assert max_epoch == df.epoch.max()
  # --- Hack ends.
  # Selecting best seeds
  cond = (df.epoch == df.epoch.max()) & (df.split == 'eval')
  df = df[cond]
  best_df = select_topk_algorithm_seeds(df, predictive_metric, k=1)
  if verbose:
    utils.message(f'Selected models based on in-domain {predictive_metric}')
    utils.message(best_df.to_string())

  model_keys = ['algorithm.name', 'algorithm.arch', 'algorithm.seed']
  best_models= best_df[model_keys].to_dict('records')
  #  Selecting algorithm seeds
  filtered_records = collections.defaultdict(list)
  for record_name, records in all_records.items():

    if records is None:
      continue

    for record in records:
      model = {k: v for k,v in record.items() if k in model_keys}
      if model in best_models:
        ## HACK(Ishmael): Fuggly hack... don't live in a house with broken windows.
        ## --- Hack starts.
        if 'Bagging' in record['algorithm.name']:
          record['epoch'] = max_epoch
        ## -- Hack ends.
        filtered_records[record_name] += [record]

  filtered_records = dict(filtered_records)
  return filtered_records