in scripts/recordify.py [0:0]
def select_models(all_records, predictive_metric, verbose=True):
"""
Select top-K algorithm seeds corresponding to the best predictive metrics.
"""
evaluation_records = all_records['evaluation']
# Selection based on evaluation records
df = pd.DataFrame.from_records(evaluation_records)
# HACK(Ishmael): Train epoch is awfully slow. So ensembles are "trained" for
# only one epoch. We set the epoch counter for ensemble models to that of non
# ensembles models
# --- Hack starts
if 'ensemble.name' in df.columns:
ensembles_idx = df['ensemble.name'].isin(['Bagging'])
max_epoch = df[~ensembles_idx]['epoch'].max()
df.epoch[ensembles_idx] = max_epoch
assert max_epoch == df.epoch.max()
# --- Hack ends.
# Selecting best seeds
cond = (df.epoch == df.epoch.max()) & (df.split == 'eval')
df = df[cond]
best_df = select_topk_algorithm_seeds(df, predictive_metric, k=1)
if verbose:
utils.message(f'Selected models based on in-domain {predictive_metric}')
utils.message(best_df.to_string())
model_keys = ['algorithm.name', 'algorithm.arch', 'algorithm.seed']
best_models= best_df[model_keys].to_dict('records')
# Selecting algorithm seeds
filtered_records = collections.defaultdict(list)
for record_name, records in all_records.items():
if records is None:
continue
for record in records:
model = {k: v for k,v in record.items() if k in model_keys}
if model in best_models:
## HACK(Ishmael): Fuggly hack... don't live in a house with broken windows.
## --- Hack starts.
if 'Bagging' in record['algorithm.name']:
record['epoch'] = max_epoch
## -- Hack ends.
filtered_records[record_name] += [record]
filtered_records = dict(filtered_records)
return filtered_records