def sweep_params()

in src/jobs/util/grouping_pipeline.py [0:0]


def sweep_params():
    all_results = []
    dataset_names = user_test_list
    for dataset_id in dataset_names:
        datasets, labeled_topics = get_labeled_dataset(dataset_id)
        model_provider = ModelProvider()
        for embedding_model in EMBEDDING_MODEL_LIST:
            for clustering_method in CLUSTER_METHODS:
                dbscan_eps_params = [0.4]
                if clustering_method == "kmeans":
                    num_cluster_methods = NUM_CLUSTER_METHODS
                else:
                    num_cluster_methods = ["knee"]
                if clustering_method == "dbscan":
                    dbscan_eps_params = [0.4] # add others here
                for num_cluster_method in num_cluster_methods:
                    for dbscan_eps in dbscan_eps_params:
                        for remap in DIM_REDUCE_OPTIONS:
                            for tf_idf_scale in [0.0]:
                                config = get_default_config()
                                config["embedding_model"] = embedding_model
                                config["remap"] = remap
                                config["dbscan_eps"] = dbscan_eps
                                config["tf_idf_scale"] = tf_idf_scale
                                config["clustering_method"] = clustering_method
                                config["num_cluster_method"] = num_cluster_method
                                res, score, adj_rscore = run_pipeline(config, datasets[0], model_provider=model_provider)
                                result_dict = {**config, "dataset": dataset_id, "rand": score, "adj_rand": adj_rscore}
                                all_results.append(result_dict)
                                # wandb.log(result_dict)
                                print("got result")
    return all_results