in baselines/how_find_matches.py [0:0]
def find_matches(args):
"""Given results files, create csv predictions file"""
databases = [args.database]
if args.normalization_set:
databases.append(args.normalization_set)
# Load list files
queries = load_list_file(args.query_list)
references = load_list_file(args.db_list)
# Load scenario parameters
params = io_helpers.load_params(args.scenario)
exp_name = args.scenario.rsplit("/", 1)[1][:-len(args.scenario.rsplit(".", 1)[1])-1]
eval_folder = HOW_ROOT / params['demo_eval']['exp_folder'] / exp_name / "eval"
# Load results for both databases
results = {}
for database in databases:
results[database] = load_results(eval_folder.glob(f"{database}.results*.pkl"))
if args.normalization_set:
assert (results[args.normalization_set]['query_ids'] == results[args.database]['query_ids']).all()
# Normalize references scores by train scores
ranks = results[args.database]['ranks']
scores = results[args.database]['scores']
if args.normalization_set:
norm_reduction, norm_rank, norm_factor = "min", 9, 2
if norm_reduction == "min":
norms = results[args.normalization_set]['scores'][:,norm_rank]
elif norm_reduction == "mean":
norms = results[args.normalization_set]['scores'][:,:norm_rank+1].mean(axis=1)
scores -= norm_factor * norms[:,None]
# Take top predictions
top_idxs = np.argsort(-scores.flatten())[:MAX_RESULTS]
idx_query, idx_rank = np.unravel_index(top_idxs, scores.shape)
idx_db, score = ranks[idx_query,idx_rank], scores[idx_query,idx_rank]
predictions = [(queries[q], references[db], s) for q, db, s in zip(idx_query, idx_db, score)]
if args.preds_filepath:
store_predictions(predictions, args.preds_filepath)