in evaluation_pipeline/retrieval.py [0:0]
def main(model_name, k, threshold, history_file_path, golden_path=None, row_limit=100):
model_name_normalized = model_name.replace("/","_").replace("-","_").replace(".","_")
# Configure logging
logging.basicConfig(
filename=f"performance_{model_name_normalized}.log",
level=logging.INFO,
format="%(asctime)s - %(message)s"
)
query_ids, db, ground_truth, ground_truth_urls = run_history_in_vector_db(row_limit, history_file_path=history_file_path, golden_set_file_path=golden_path)
fe = FeatureExtractor(EMBEDDING_MODELS_DICT, model_name=model_name)
retrieval_results, query_lookup = run_retrieval(fe, query_ids, db, model_name, threshold, k)
# reshape & save to df and csv
df = convert_dict_to_df(retrieval_dict=retrieval_results, query_lookup=query_lookup, ground_truth=ground_truth, ground_truth_urls=ground_truth_urls, model_name=model_name, k=k)
time_stamp = int(time.time())
df.to_csv(f"results/{model_name_normalized}_results.csv", index=False)
return db, retrieval_results, df