in similarity_search_experiments/similarity_search.py [0:0]
def boosts_to_dataframe(boosts: List[Boost], class_label: str) -> pd.DataFrame:
"""Aggregates boosts in a pandas dataframe"""
fields = ["similarity_changes", "boosts", "decreases"]
results = dict()
for field in fields:
results[f"avg_{field[:-1]}"] = [
np.mean(getattr(boost, field)) for boost in boosts
]
results[f"std_{field[:-1]}"] = [
np.std(getattr(boost, field)) for boost in boosts
]
# add percentages
results["avg_percent_similarity_change"] = [
np.mean(boost.percent_change) for boost in boosts
]
results["avg_percent_boost"] = [
np.mean(boost.boost_percent_change) for boost in boosts
]
# add proportion
results["proportion_boosted"] = [boost.proportion_boosted for boost in boosts]
results["num_boosted"] = [boost.boosts.size for boost in boosts]
results["total_num_pairs"] = [boost.similarity_changes.size for boost in boosts]
df = pd.DataFrame(results)
df["transform_name"] = [boost.transform_name for boost in boosts]
df["class_label"] = class_label
return df