def main()

in evaluation_pipeline/retrieval.py [0:0]


def main(model_name, k, threshold, history_file_path, golden_path=None, row_limit=100):
    model_name_normalized = model_name.replace("/","_").replace("-","_").replace(".","_")
    # Configure logging
    logging.basicConfig(
    filename=f"performance_{model_name_normalized}.log",
    level=logging.INFO,
    format="%(asctime)s - %(message)s"
    )
    query_ids, db, ground_truth, ground_truth_urls = run_history_in_vector_db(row_limit, history_file_path=history_file_path, golden_set_file_path=golden_path)
    fe = FeatureExtractor(EMBEDDING_MODELS_DICT, model_name=model_name)
    retrieval_results, query_lookup = run_retrieval(fe, query_ids, db, model_name, threshold, k)
    # reshape & save to df and csv
    df = convert_dict_to_df(retrieval_dict=retrieval_results, query_lookup=query_lookup, ground_truth=ground_truth, ground_truth_urls=ground_truth_urls, model_name=model_name, k=k)
    time_stamp = int(time.time())
    df.to_csv(f"results/{model_name_normalized}_results.csv", index=False)
    return db, retrieval_results, df