perfrunbook/utilities/measure_and_plot_basic_pmu_counters.py (195 lines of code) (raw):

#!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse import pandas as pd import numpy as np import re from scipy import stats import subprocess import io # When calculating aggregate stats, if some are zero, may # get a benign divide-by-zero warning from numpy, make it silent. np.seterr(divide='ignore') def perfstat(time, period, cpus, counter_numerator, counter_denominator, __unused__): """ Measure performance counters using perf-stat in a subprocess. Return a CSV buffer of the values measured. """ try: if not cpus: res = subprocess.run(["lscpu", "-p=CPU"], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = io.StringIO(res.stdout.decode('utf-8')) cpus = [] for line in output.readlines(): match = re.search(r'''^(\d+)$''', line) if match is not None: cpus.append(match.group(1)) res = subprocess.run(["perf", "stat", f"-C{','.join(cpus)}", f"-I{period}", "-x|", "-a", "-e", f"{counter_numerator}", "-e", f"{counter_denominator}", "--", "sleep", f"{time}"], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) return io.StringIO(res.stdout.decode('utf-8')) except subprocess.CalledProcessError: print("Failed to measure performance counters.") print("Please check that perf is installed using install_perfrunbook_dependencies.sh and in your PATH") return None def plot_terminal(data, title, xtitle): """ Plot data to the terminal using plotext """ import plotext as plt x = data.index.tolist() y = data[title].tolist() plt.scatter(x, y) plt.title(title) plt.xlabel(xtitle) plt.plot_size(100, 30) plt.show() def plot_counter_stat(csv, logfile, plot, stat_name, counter_numerator, counter_denominator, scale): """ Process the returned csv file into a time-series statistic to plot and also calculate some useful aggregate stats. """ df = pd.read_csv(csv, sep='|', names=['time', 'count', 'rsrvd1', 'event', 'rsrvd2', 'frac', 'rsrvd3', 'rsrvd4'], dtype={'time': np.float64, 'count': np.float64, 'rsrvd1': str, 'event': str, 'rsrvd2': str, 'frac': np.float64, 'rsrvd3': str, 'rsrvd4': str}) df_processed = pd.DataFrame() df_processed[stat_name] = (df[df['event'] == counter_numerator]['count'].reset_index(drop=True)) / (df[df['event'] == counter_denominator]['count'].reset_index(drop=True)) * scale df_processed[counter_numerator] = df[df['event'] == counter_numerator]['count'].reset_index(drop=True) df_processed[counter_denominator] = df[df['event'] == counter_denominator]['count'].reset_index(drop=True) df_processed.dropna(inplace=True) # Calculate some meaningful aggregate stats for comparing time-series plots geomean = stats.gmean(df_processed[stat_name]) p50 = stats.scoreatpercentile(df_processed[stat_name], 50) p90 = stats.scoreatpercentile(df_processed[stat_name], 90) p99 = stats.scoreatpercentile(df_processed[stat_name], 99) xtitle = f"gmean:{geomean:>6.2f} p50:{p50:>6.2f} p90:{p90:>6.2f} p99:{p99:>6.2f}" if logfile: df_processed.to_csv(logfile) if plot: plot_terminal(df_processed, stat_name, xtitle) def get_cpu_type(): GRAVITON_MAPPING = { "0xd0c": "Graviton2", "0xd40": "Graviton3", "0xd4f": "Graviton4" } AMD_MAPPING = { "7R13": "Milan", "9R14": "Genoa" } with open("/proc/cpuinfo", "r") as f: for line in f.readlines(): if "model name" in line: ln = line.split(":")[-1].strip() if "AMD EPYC" in ln: # Return the model number of the AMD CPU, its the 3rd entry in format # AMD EPYC <model> return AMD_MAPPING[ln.split(" ")[2]] else: return ln elif "CPU part" in line: cpu = line.split(":")[-1].strip() return GRAVITON_MAPPING[cpu] UNIVERSAL_GRAVITON_CTRS = { "ipc": ["armv8_pmuv3_0/event=0x8/", "armv8_pmuv3_0/event=0x11/", 1], "branch-mpki": ["armv8_pmuv3_0/event=0x10/", "armv8_pmuv3_0/event=0x8/", 1000], "data-l1-mpki": ["armv8_pmuv3_0/event=0x3/", "armv8_pmuv3_0/event=0x8/", 1000], "inst-l1-mpki": ["armv8_pmuv3_0/event=0x1/", "armv8_pmuv3_0/event=0x8/", 1000], "l2-mpki": ["armv8_pmuv3_0/event=0x17/", "armv8_pmuv3_0/event=0x8/", 1000], "l3-mpki": ["armv8_pmuv3_0/event=0x37/", "armv8_pmuv3_0/event=0x8/", 1000], "stall_frontend_pkc": ["armv8_pmuv3_0/event=0x23/", "armv8_pmuv3_0/event=0x11/", 1000], "stall_backend_pkc": ["armv8_pmuv3_0/event=0x24/", "armv8_pmuv3_0/event=0x11/", 1000], "inst-tlb-mpki": ["armv8_pmuv3_0/event=0x2/", "armv8_pmuv3_0/event=0x8/", 1000], "inst-tlb-tw-pki": ["armv8_pmuv3_0/event=0x35/", "armv8_pmuv3_0/event=0x8/", 1000], "data-tlb-mpki": ["armv8_pmuv3_0/event=0x5/", "armv8_pmuv3_0/event=0x8/", 1000], "data-tlb-tw-pki": ["armv8_pmuv3_0/event=0x34/", "armv8_pmuv3_0/event=0x8/", 1000], "code-sparsity": ["armv8_pmuv3_0/event=0x11c/", "armv8_pmuv3_0/event=0x8/", 1000], } GRAVITON3_CTRS = { "stall_backend_mem_pkc": ["armv8_pmuv3_0/event=0x4005/", "armv8_pmuv3_0/event=0x11/", 1000], } UNIVERSAL_INTEL_CTRS = { "ipc": ["cpu/event=0xc0,umask=0x0/", "cpu/event=0x3c,umask=0x0/", 1], "branch-mpki": ["cpu/event=0xC5,umask=0x0/", "cpu/event=0xc0,umask=0x0/", 1000], "data-l1-mpki": ["cpu/event=0x51,umask=0x1/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-l1-mpki": ["cpu/event=0x24,umask=0xe4/", "cpu/event=0xc0,umask=0x0/", 1000], "l2-mpki": ["cpu/event=0xf1,umask=0x1f/", "cpu/event=0xc0,umask=0x0/", 1000], "l3-mpki": ["cpu/event=0x2e,umask=0x41/", "cpu/event=0xc0,umask=0x0/", 1000], "stall_frontend_pkc": ["cpu/event=0x9C,umask=0x1,cmask=0x4/", "cpu/event=0x3c,umask=0x0/", 1000], "stall_backend_pkc": ["cpu/event=0xA2,umask=0x1/", "cpu/event=0x3c,umask=0x0/", 1000], "inst-tlb-mpki": ["cpu/event=0x85,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-tlb-tw-pki": ["cpu/event=0x85,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000], "data-tlb-mpki": ["cpu/event=0x08,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "data-st-tlb-mpki": ["cpu/event=0x49,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "data-tlb-tw-pki": ["cpu/event=0x08,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000], "data-st-tlb-tw-pki": ["cpu/event=0x49,umask=0x01/", "cpu/event=0xc0,umask=0x0/", 1000], } ICX_CTRS = { "stall_frontend_pkc": ["cpu/event=0x9C,umask=0x1,cmask=0x5/", "cpu/event=0x3c,umask=0x0/", 1000], "stall_backend_pkc": ["cpu/event=0xa4,umask=0x2/", "cpu/event=0xa4,umask=0x01/", 1000], } SPR_CTRS = { "l2-mpki": ["cpu/event=0x25,umask=0x1f/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-tlb-mpki": ["cpu/event=0x11,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-tlb-tw-pki": ["cpu/event=0x11,umask=0x0e/", "cpu/event=0xc0,umask=0x0/", 1000], "data-rd-tlb-mpki": ["cpu/event=0x12,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "data-st-tlb-mpki": ["cpu/event=0x13,umask=0x20/", "cpu/event=0xc0,umask=0x0/", 1000], "data-rd-tlb-tw-pki": ["cpu/event=0x12,umask=0x0e/", "cpu/event=0xc0,umask=0x0/", 1000], "data-st-tlb-tw-pki": ["cpu/event=0x13,umask=0x0e/", "cpu/event=0xc0,umask=0x0/", 1000], "stall_frontend_pkc": ["cpu/event=0x9c,umask=0x1,cmask=0x6/", "cpu/event=0x3c,umask=0x0/", 1000], "stall_backend_pkc": ["cpu/event=0xa4,umask=0x2/", "cpu/event=0xa4,umask=0x01/", 1000], } UNIVERSAL_AMD_CTRS = { "ipc": ["cpu/event=0xc0,umask=0x0/", "cpu/event=0x76,umask=0x0/", 1], "branch-mpki": ["cpu/event=0xc3,umask=0x0/", "cpu/event=0xc0,umask=0x0/", 1000], "data-l1-mpki": ["cpu/event=0x44,umask=0xff/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-l1-mpki": ["cpu/event=0x60,umask=0x10/", "cpu/event=0xc0,umask=0x0/", 1000], "l2-mpki": ["cpu/event=0x64,umask=0x9/", "cpu/event=0xc0,umask=0x0/", 1000], "l3-mpki": ["cpu/event=0x44,umask=0x8/", "cpu/event=0xc0,umask=0x0/", 1000], "stall_frontend_pkc": ["cpu/event=0xa9,umask=0x0/", "cpu/event=0x76,umask=0x0/", 1000], "inst-tlb-mpki": ["cpu/event=0x84,umask=0x0/", "cpu/event=0xc0,umask=0x0/", 1000], "inst-tlb-tw-pki": ["cpu/event=0x85,umask=0x0f/", "cpu/event=0xc0,umask=0x0/", 1000], "data-tlb-mpki": ["cpu/event=0x45,umask=0xff/", "cpu/event=0xc0,umask=0x0/", 1000], "data-tlb-tw-pki": ["cpu/event=0x45,umask=0xf0/", "cpu/event=0xc0,umask=0x0/", 1000], } MILAN_CTRS = { "stall_backend_pkc1": ["cpu/event=0xae,umask=0xf7/", "cpu/event=0x76,umask=0x0/", 1000], "stall_backend_pkc2": ["cpu/event=0xaf,umask=0x27/", "cpu/event=0x76,umask=0x0/", 1000], } GENOA_CTRS = { "stall_backend_pkc": ["cpu/event=0x1a0,umask=0x1e/", "cpu/event=0x76,umask=0x0/", 1000 * (1.0 / 6.0)] } filter_proc = { "Graviton2": UNIVERSAL_GRAVITON_CTRS, "Graviton3": {**UNIVERSAL_GRAVITON_CTRS, **GRAVITON3_CTRS}, "Graviton4": {**UNIVERSAL_GRAVITON_CTRS, **GRAVITON3_CTRS}, "Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz": UNIVERSAL_INTEL_CTRS, "Intel(R) Xeon(R) Platinum 8175M CPU @ 2.50GHz": UNIVERSAL_INTEL_CTRS, "Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz": UNIVERSAL_INTEL_CTRS, "Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz": UNIVERSAL_INTEL_CTRS, "Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz": {**UNIVERSAL_INTEL_CTRS, **ICX_CTRS}, "Intel(R) Xeon(R) Platinum 8488C": {**UNIVERSAL_INTEL_CTRS, **SPR_CTRS}, "Milan": {**UNIVERSAL_AMD_CTRS, **MILAN_CTRS}, "Genoa": {**UNIVERSAL_AMD_CTRS, **GENOA_CTRS}, } if __name__ == "__main__": processor_version = get_cpu_type() try: stat_choices = list(filter_proc[processor_version].keys()) except Exception: print(f"{processor_version} is not supported") exit(1) parser = argparse.ArgumentParser() parser.add_argument("--stat", default="ipc", type=str, choices=stat_choices) parser.add_argument("--period", default=1000, type=int) parser.add_argument("--cpu-list", action="store", type=str) parser.add_argument("--no-plot", action="store_true", help="Do not plot to terminal") parser.add_argument("--log-file", help="Save counter data as CSV to specified file") parser.add_argument("--time", default=60, type=int, help="How long to measure for in seconds") parser.add_argument("--custom_ctr", type=str, help="Specify a custom counter ratio and scaling factor as 'name|ctr1|ctr2|scale'" ", calculated as ctr1/ctr2 * scale") parser.add_argument("--no-root", action="store_true", help="Allow running without root privileges") args = parser.parse_args() if not args.no_root: res = subprocess.run(["id", "-u"], check=True, stdout=subprocess.PIPE) if int(res.stdout) > 0: print("Must be run with root privileges (or with --no-root)") exit(1) if args.custom_ctr: ctrs = args.custom_ctr.split("|") counter_info = [ctrs[1], ctrs[2], int(ctrs[3])] # Override the name of the stat to a user defined name stat_name = ctrs[0] else: counter_info = filter_proc[processor_version][args.stat] stat_name = args.stat cpus = None if args.cpu_list and args.cpu_list != "all": cpus = args.cpu_list.split(",") csv = perfstat(args.time, args.period, cpus, *counter_info) plot_counter_stat(csv, args.log_file, (not args.no_plot), stat_name, *counter_info)