def compute_aligned_topics()

in src/tab_grouping_streamlit.py [0:0]


def compute_aligned_topics(frame: pd.DataFrame, user_label_key: str, ai_label_key: str):
    user_topics = frame[user_label_key].unique().tolist()
    ai_topics = frame[ai_label_key].unique().tolist()
    results = []
    items = []
    for user_topic in user_topics:
        for ai_topic in ai_topics:
            total_match = len(frame[(frame[user_label_key] == user_topic) & (frame[ai_label_key] == ai_topic)])
            num_ai = len(frame[frame[ai_label_key] == ai_topic])
            num_user = len(frame[frame[user_label_key] == user_topic])
            percent = total_match / (num_ai + num_user)
            items.append({"user": user_topic, "ai": ai_topic, "percentage": percent})
    all_combos = pd.DataFrame.from_records(items).sort_values(by="percentage", ascending=False)
    picked_ai = set()
    picked_user = set()
    for index, row in all_combos.iterrows():
        if row["ai"] not in picked_ai and row["user"] not in picked_user:
            results.append([row["user"], row["ai"]])
            picked_ai.add(row["ai"])
            picked_user.add(row["user"])
    for index, row in all_combos.iterrows():
        if row["ai"] not in picked_ai:
            results.append([None, row["ai"]])
            picked_ai.add(row["ai"])
        if row["user"] not in picked_user:
            results.append([row["user"], None])
            picked_user.add(row["user"])
    return results