in perfrunbook/utilities/measure_aggregated_pmu_stats.py [0:0]
def calculate_counter_stat(platforms):
"""
Process out csv file from perf out to a set of aggregate statistics
"""
df = pd.read_csv(
RESULTS_CSV,
sep="|",
header=None,
names=["time", "CPU", "count", "rsrvd1", "event", "rsrvd2", "frac", "rsrvd3", "rsrvd4"],
dtype={
"time": np.float64,
"CPU": str,
"count": np.float64,
"rsrvd1": str,
"event": str,
"rsrvd2": str,
"frac": np.float64,
"rsrvd3": str,
"rsrvd4": str,
},
na_values=["<not counted>", "<not supported>"],
)
# Filter counter event names into a group id and back
# into the human readable counter definition.
def split_counter_group(row):
group, counter = row.event.split("-")
row["group"] = group
row["counter"] = counter
return row
# Normalize our time value to serve as an index along with CPU
# for easier processing.
time_offset = 0
cur_time = 0
cur_group_id = ""
def normalize_time(row):
# Each time the group changes, update our time offset as we have started
# a new set of measurements, but time resets.
nonlocal time_offset
nonlocal cur_time
nonlocal cur_group_id
# Initial group_id assignment
if not cur_group_id:
cur_group_id = row["group"]
if cur_group_id != row["group"]:
time_offset = int(math.ceil(time_offset + row["time"]))
cur_group_id = row["group"]
cur_time = int(math.ceil(row["time"] + time_offset)) # round up to whole seconds
row["normalized_time"] = cur_time
return row
df = df.apply(split_counter_group, axis=1)
df = df.apply(normalize_time, axis=1)
df = df.set_index(["normalized_time", "CPU"])
data = {}
for platform in platforms:
counter_list = platform.get_counters()
for counter in counter_list:
stat_name = counter.get_name()
series_res = counter.create_stat(df)
try:
series_res.replace([np.inf, -np.inf], np.nan, inplace=True)
series_res.dropna(inplace=True)
# Calculate some meaningful aggregate stats for comparisons
geomean = stats.gmean(series_res)
p10 = stats.scoreatpercentile(series_res, 10)
p50 = stats.scoreatpercentile(series_res, 50)
p90 = stats.scoreatpercentile(series_res, 90)
p95 = stats.scoreatpercentile(series_res, 95)
p99 = stats.scoreatpercentile(series_res, 99)
p999 = stats.scoreatpercentile(series_res, 99.9)
p100 = stats.scoreatpercentile(series_res, 100)
data[stat_name] = {
"geomean": geomean,
"p10": p10,
"p50": p50,
"p90": p90,
"p95": p95,
"p99": p99,
"p99.9": p999,
"p100": p100,
}
except: # noqa
data[stat_name] = {
"geomean": 0,
"p10": 0,
"p50": 0,
"p90": 0,
"p95": 0,
"p99": 0,
"p99.9": 0,
"p100": 0,
}
with open(RESULTS_JSON, "w") as f:
json.dump(data, f)
return data