def report_elo_analysis_results()

in fastchat/serve/monitor/elo_analysis.py [0:0]


def report_elo_analysis_results(battles_json):
    battles = pd.DataFrame(battles_json)
    battles = battles.sort_values(ascending=True, by=["tstamp"])
    # Only use anonymous votes
    battles = battles[battles["anony"]].reset_index(drop=True)
    battles_no_ties = battles[~battles["winner"].str.contains("tie")]

    # Online update
    elo_rating_online = compute_elo(battles)

    # Bootstrap
    bootstrap_df = get_bootstrap_result(battles, compute_elo)
    elo_rating_median = get_median_elo_from_bootstrap(bootstrap_df)
    model_order = list(elo_rating_median.keys())
    model_order.sort(key=lambda k: -elo_rating_median[k])

    limit_show_number = 25  # limit show number to make plots smaller
    model_order = model_order[:limit_show_number]

    # Plots
    leaderboard_table = visualize_leaderboard_table(elo_rating_median)
    win_fraction_heatmap = visualize_pairwise_win_fraction(battles_no_ties, model_order)
    battle_count_heatmap = visualize_battle_count(battles_no_ties, model_order)
    average_win_rate_bar = visualize_average_win_rate(
        battles_no_ties, limit_show_number
    )
    bootstrap_elo_rating = visualize_bootstrap_elo_rating(
        bootstrap_df, limit_show_number
    )

    last_updated_tstamp = battles["tstamp"].max()
    last_updated_datetime = datetime.datetime.fromtimestamp(
        last_updated_tstamp, tz=timezone("US/Pacific")
    ).strftime("%Y-%m-%d %H:%M:%S %Z")

    return {
        "elo_rating_online": elo_rating_online,
        "elo_rating_median": elo_rating_median,
        "leaderboard_table": leaderboard_table,
        "win_fraction_heatmap": win_fraction_heatmap,
        "battle_count_heatmap": battle_count_heatmap,
        "average_win_rate_bar": average_win_rate_bar,
        "bootstrap_elo_rating": bootstrap_elo_rating,
        "last_updated_datetime": last_updated_datetime,
        "last_updated_tstamp": last_updated_tstamp,
    }