def boosts_to_dataframe()

in similarity_search_experiments/similarity_search.py [0:0]

23 lines of code
10 McCabe index (conditional complexity)


def boosts_to_dataframe(boosts: List[Boost], class_label: str) -> pd.DataFrame:
    """Aggregates boosts in a pandas dataframe"""
    fields = ["similarity_changes", "boosts", "decreases"]
    results = dict()

    for field in fields:
        results[f"avg_{field[:-1]}"] = [
            np.mean(getattr(boost, field)) for boost in boosts
        ]
        results[f"std_{field[:-1]}"] = [
            np.std(getattr(boost, field)) for boost in boosts
        ]

    # add percentages
    results["avg_percent_similarity_change"] = [
        np.mean(boost.percent_change) for boost in boosts
    ]

    results["avg_percent_boost"] = [
        np.mean(boost.boost_percent_change) for boost in boosts
    ]
    # add proportion
    results["proportion_boosted"] = [boost.proportion_boosted for boost in boosts]
    results["num_boosted"] = [boost.boosts.size for boost in boosts]
    results["total_num_pairs"] = [boost.similarity_changes.size for boost in boosts]

    df = pd.DataFrame(results)
    df["transform_name"] = [boost.transform_name for boost in boosts]
    df["class_label"] = class_label
    return df