in src/tab_grouping_streamlit.py [0:0]
def compute_aligned_topics(frame: pd.DataFrame, user_label_key: str, ai_label_key: str):
user_topics = frame[user_label_key].unique().tolist()
ai_topics = frame[ai_label_key].unique().tolist()
results = []
items = []
for user_topic in user_topics:
for ai_topic in ai_topics:
total_match = len(frame[(frame[user_label_key] == user_topic) & (frame[ai_label_key] == ai_topic)])
num_ai = len(frame[frame[ai_label_key] == ai_topic])
num_user = len(frame[frame[user_label_key] == user_topic])
percent = total_match / (num_ai + num_user)
items.append({"user": user_topic, "ai": ai_topic, "percentage": percent})
all_combos = pd.DataFrame.from_records(items).sort_values(by="percentage", ascending=False)
picked_ai = set()
picked_user = set()
for index, row in all_combos.iterrows():
if row["ai"] not in picked_ai and row["user"] not in picked_user:
results.append([row["user"], row["ai"]])
picked_ai.add(row["ai"])
picked_user.add(row["user"])
for index, row in all_combos.iterrows():
if row["ai"] not in picked_ai:
results.append([None, row["ai"]])
picked_ai.add(row["ai"])
if row["user"] not in picked_user:
results.append([row["user"], None])
picked_user.add(row["user"])
return results