def calculate_counter_stat()

in perfrunbook/utilities/measure_aggregated_pmu_stats.py [0:0]


def calculate_counter_stat(platforms):
    """
    Process out csv file from perf out to a set of aggregate statistics
    """
    df = pd.read_csv(
        RESULTS_CSV,
        sep="|",
        header=None,
        names=["time", "CPU", "count", "rsrvd1", "event", "rsrvd2", "frac", "rsrvd3", "rsrvd4"],
        dtype={
            "time": np.float64,
            "CPU": str,
            "count": np.float64,
            "rsrvd1": str,
            "event": str,
            "rsrvd2": str,
            "frac": np.float64,
            "rsrvd3": str,
            "rsrvd4": str,
        },
        na_values=["<not counted>", "<not supported>"],
    )

    # Filter counter event names into a group id and back
    # into the human readable counter definition.
    def split_counter_group(row):
        group, counter = row.event.split("-")
        row["group"] = group
        row["counter"] = counter
        return row

    # Normalize our time value to serve as an index along with CPU
    # for easier processing.
    time_offset = 0
    cur_time = 0
    cur_group_id = ""

    def normalize_time(row):
        # Each time the group changes, update our time offset as we have started
        # a new set of measurements, but time resets.
        nonlocal time_offset
        nonlocal cur_time
        nonlocal cur_group_id
        # Initial group_id assignment
        if not cur_group_id:
            cur_group_id = row["group"]
        if cur_group_id != row["group"]:
            time_offset = int(math.ceil(time_offset + row["time"]))
            cur_group_id = row["group"]
        cur_time = int(math.ceil(row["time"] + time_offset))  # round up to whole seconds
        row["normalized_time"] = cur_time
        return row

    df = df.apply(split_counter_group, axis=1)
    df = df.apply(normalize_time, axis=1)
    df = df.set_index(["normalized_time", "CPU"])
    data = {}

    for platform in platforms:
        counter_list = platform.get_counters()
        for counter in counter_list:
            stat_name = counter.get_name()
            series_res = counter.create_stat(df)

            try:
                series_res.replace([np.inf, -np.inf], np.nan, inplace=True)
                series_res.dropna(inplace=True)

                # Calculate some meaningful aggregate stats for comparisons
                geomean = stats.gmean(series_res)
                p10 = stats.scoreatpercentile(series_res, 10)
                p50 = stats.scoreatpercentile(series_res, 50)
                p90 = stats.scoreatpercentile(series_res, 90)
                p95 = stats.scoreatpercentile(series_res, 95)
                p99 = stats.scoreatpercentile(series_res, 99)
                p999 = stats.scoreatpercentile(series_res, 99.9)
                p100 = stats.scoreatpercentile(series_res, 100)

                data[stat_name] = {
                    "geomean": geomean,
                    "p10": p10,
                    "p50": p50,
                    "p90": p90,
                    "p95": p95,
                    "p99": p99,
                    "p99.9": p999,
                    "p100": p100,
                }
            except:  # noqa
                data[stat_name] = {
                    "geomean": 0,
                    "p10": 0,
                    "p50": 0,
                    "p90": 0,
                    "p95": 0,
                    "p99": 0,
                    "p99.9": 0,
                    "p100": 0,
                }
    with open(RESULTS_JSON, "w") as f:
        json.dump(data, f)
    return data