fiosynth_lib/fio_json_parser.py (403 lines of code) (raw):
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# AUTHOR = 'Darryl Gardner <darryleg@fb.com>'
# fio_json_parser.py - script to parse fio workload results
# produced by fio in JSON format. By default the following values are written
# to a .csv file: Read_IOPS, Read_BW, Write_IOPS, Write_BW, Mean_Read_Latency,
# P50_Read_Latency, P70_Read_Latency, P99_Read_Latency, P99.9_Read_Latency,
# P99.99_Read_Latency, Mean_Write_Latency, P50_Write_Latency,
# P70_Write_Latency, P99_Write_Latency, P99.9_Write_latency,
# P99.99_Write_Latency
#
# input arguments (at least 1 argument is required):
# -j
# single fio JSON file to be parsed (default = )
# -k
# single fio JSON file path (default = .)'
# -n
# file path for multiple fio JSON files (default = )
# -l
# csv file path (default = .)
# -f
# csv file name (default = fio_fb_results.csv)
# -c
# path to directory of csv's from server/client mode that you want to
# combine
#
# example parse a single json file in the the results directory:
# FioFlashJsonParser.py -k results -j 4K_random_read.json results.csv
#
# example parse all json files in 'results' directory:
# FioFlashJsonParser.py -n results -f results.csv
#
# example create combined csv from csvs in a directory:
# FioFlashJsonParser.py -c /some_path/csv_directory
#
import argparse
import csv
import glob
import json
import os
import sys
from collections import OrderedDict
from distutils.version import StrictVersion
TOOL_NAME = "fio-parse-json-flash"
tunnel2host = {}
def set_attributes():
#
# Attribute Table Definition
#
parser = argparse.ArgumentParser(description="fio JSON File Parser for Flash")
parser.add_argument(
"-j",
action="store",
dest="json_file",
type=str,
help="single fio JSON file to be parsed (default = )",
default="",
)
parser.add_argument(
"-k",
action="store",
dest="json_path",
type=str,
help="single fio JSON file path (default = .)",
default=".",
)
parser.add_argument(
"-n",
action="store",
dest="all_json",
type=str,
help="file path for multiple fio JSON files (default = )",
default="",
)
parser.add_argument(
"-l",
action="store",
dest="csv_path",
type=str,
help="csv file path (default = .)",
default=".",
)
parser.add_argument(
"-f",
action="store",
dest="csv_file",
type=str,
help="csv file name (default = fio_fb_results.csv)",
default="fio_fb_results.csv",
)
parser.add_argument(
"-s",
action="store",
dest="serverMode",
type=str,
help="denotes server mode: y for server mode, n for local mode",
default="n",
)
parser.add_argument(
"-c",
action="store",
dest="combine_csv_path",
type=str,
help=(
"path to directory holding multiple csvs from different that "
"will be combined"
),
default="",
)
args = parser.parse_args()
return args
def check_if_mounted(fn):
mounted = False
with open(fn) as f:
for line in f.readlines():
if "appears mounted, and 'allow_mounted_write' isn't set." in line:
print(line)
mounted = True
break
if mounted:
print("To run, please unmount the device and try again")
sys.exit(1)
def read_json(fn, serverMode=False):
data = ""
if not os.path.isfile(fn):
print("%s does not exist" % fn)
sys.exit(1)
check_if_mounted(fn)
f = open(fn)
if serverMode:
jsonstr = f.read()
jsonstr = "{" + jsonstr[jsonstr.rfind('"fio version" : ') :]
try:
data = json.loads(jsonstr)
except ValueError:
print("JSON decoding failed on %s, is file corrupt?" % fn)
f.close()
sys.exit(1)
else:
try:
data = json.load(f)
except ValueError:
print("JSON decoding failed on %s. Is file corrupt?" % fn)
f.close()
sys.exit(1)
f.close()
return data
def new_csv(f, notStdPercentile1, notStdPercentile2):
if notStdPercentile1 or notStdPercentile2:
col_names = [
"Jobname",
"Read_IOPS",
"Read_BW",
"Write_IOPS",
"Write_BW",
"Trim_IOPS",
"Trim_BW",
"Mean_Read_Latency",
"Max_Read_Latency",
"P25_Read_Latency",
"P50_Read_Latency",
"P70_Read_Latency",
"P75_Read_Latency",
"P90_Read_Latency",
"P99_Read_Latency",
"P99.9_Read_Latency",
"P99.99_Read_Latency",
"P99.999_Read_Latency",
"P99.9999_Read_Latency",
"Mean_Write_Latency",
"Max_Write_Latency",
"P25_Write_Latency",
"P50_Write_Latency",
"P70_Write_Latency",
"P75_Write_Latency",
"P90_Write_Latency",
"P99_Write_Latency",
"P99.9_Write_Latency",
"P99.99_Write_Latency",
"P99.999_Write_Latency",
"P99.9999_Write_Latency",
"Mean_Trim_Latency",
"Max_Trim_Latency",
"P25_Trim_Latency",
"P50_Trim_Latency",
"P70_Trim_Latency",
"P75_Trim_Latency",
"P90_Trim_Latency",
"P99_Trim_Latency",
"P99.9_Trim_Latency",
"P99.99_Trim_Latency",
"P99.999_Trim_Latency",
"P99.9999_Trim_Latency",
]
else:
col_names = [
"Jobname",
"Read_IOPS",
"Read_BW",
"Write_IOPS",
"Write_BW",
"Trim_IOPS",
"Trim_BW",
"Mean_Read_Latency",
"Max_Read_Latency",
"P50_Read_Latency",
"P70_Read_Latency",
"P90_Read_Latency",
"P99_Read_Latency",
"P99.9_Read_Latency",
"P99.99_Read_Latency",
"P99.9999_Read_Latency",
"Mean_Write_Latency",
"Max_Write_Latency",
"P50_Write_Latency",
"P70_Write_Latency",
"P90_Write_Latency",
"P99_Write_Latency",
"P99.9_Write_Latency",
"P99.99_Write_Latency",
"P99.9999_Write_Latency",
"Mean_Trim_Latency",
"Max_Trim_Latency",
"P50_Trim_Latency",
"P70_Trim_Latency",
"P90_Trim_Latency",
"P99_Trim_Latency",
"P99.9_Trim_Latency",
"P99.99_Trim_Latency",
"P99.9999_Trim_Latency",
]
try:
writer = csv.writer(f)
writer.writerow(col_names)
except IOError:
print("cannot write to ", f)
f.close()
sys.exit(1)
def get_csv_line(jobname, json, index, data, version_str, serverMode):
clat = "clat"
con = 1
# clat -> clat_ns in version 3.0
verstr = version_str[version_str.rfind("-") + 1 :]
fio_version = StrictVersion(verstr)
v3_version = StrictVersion("3.0")
if fio_version >= v3_version:
clat = "clat_ns"
# convert nanoseconds to microseconds
con = 1000
if serverMode:
# Support for older and newer fio json formats
options1 = "percentile_list" in json["job options"]
options2 = "percentile_list" in json["global options"]
else:
options1 = "percentile_list" in json["jobs"][0]["job options"]
options2 = "percentile_list" in json["global options"]
iotype = ["read", "write", "trim"]
if options1 or options2:
percent = [
"25.000000",
"50.000000",
"70.000000",
"75.000000",
"90.000000",
"99.000000",
"99.900000",
"99.990000",
"99.999000",
"99.999900",
]
else:
percent = [
"50.000000",
"70.000000",
"90.000000",
"99.000000",
"99.900000",
"99.990000",
"99.999900",
]
line = [
jobname,
data["read"]["iops"],
data["read"]["bw"],
data["write"]["iops"],
data["write"]["bw"],
data["trim"]["iops"],
data["trim"]["bw"],
]
for io in iotype:
line.append(str(data[io][clat]["mean"] / con))
line.append(str(data[io][clat]["max"] / con))
if data[io]["iops"] > 0:
for p in percent:
if "percentile" in data[io][clat]:
line.append(str(data[io][clat]["percentile"][p] / con))
else:
for _p in percent:
line.append(0)
return line
def print_csv_line(f, jobname, json, ver="", serverMode=False):
index = 0
lines = 1
if not serverMode:
lines = len(json["jobs"])
ver = json["fio version"]
while index != lines:
data = json
if not serverMode:
data = json["jobs"][index]
try:
line = get_csv_line(jobname, json, index, data, ver, serverMode)
wrtr = csv.writer(f)
wrtr.writerow(line)
except IOError:
print("cannot write to ", f)
f.close()
sys.exit(1)
index += 1
def parseServerResults(json_path, csv_dir):
if not os.path.isdir(csv_dir):
os.mkdir(csv_dir)
write_server_csv_files(csv_dir, json_path)
def write_server_csv_files(csv_dir, json_path):
"""Writes fio server mode json results into CSV files.
One CSV file is written per hostname.
"""
data = read_json(json_path, serverMode=True)
version_str = data["fio version"]
jobname = os.path.splitext(os.path.basename(json_path))[0]
hostname_data_dict = get_hostname_to_data_dict(data)
for hostname in hostname_data_dict:
host_csv_path = os.path.join(csv_dir, "%s.csv" % hostname)
is_new_file = not os.path.isfile(host_csv_path)
with open(host_csv_path, "a") as csv_out:
jb_data = hostname_data_dict[hostname]
jb = jb_data[0]
jb["global options"] = data["global options"]
if is_new_file:
new_csv(
csv_out,
("percentile_list" in jb["job options"]),
("percentile_list" in data["global options"]),
)
print_csv_line(csv_out, jobname, jb, version_str, serverMode=True)
for jb in jb_data[1:]:
jb["global options"] = data["global options"]
print_csv_line(csv_out, jobname, jb, version_str, serverMode=True)
def get_hostname_to_data_dict(fio_data):
"""Create dictionary mapping hostname to its fio data.
Returns:
Dict[str, List[dict]] - hostname to its fio data
"""
hostname_data_dict = {}
for jb in fio_data["client_stats"]:
if jb["jobname"] == "All clients":
continue
if len(tunnel2host) == 0:
hostname = jb["hostname"]
else:
hostname = tunnel2host[jb["port"]]
if hostname not in hostname_data_dict:
hostname_data_dict[hostname] = [jb]
else:
hostname_data_dict[hostname].append(jb)
return hostname_data_dict
def get_combined_stats(stats):
combined_stats = OrderedDict()
for job in stats.keys():
combined_stats[job] = OrderedDict()
for stat in stats[job].keys():
currStat = [float(val) for val in stats[job][stat]]
if "_IOPS" in stat or "_BW" in stat:
combined_stats[job][stat + "_TOTAL"] = sum(currStat)
combined_stats[job][stat + "_MIN"] = min(currStat)
combined_stats[job][stat + "_AVG"] = sum(currStat) / len(currStat)
combined_stats[job][stat + "_MAX"] = max(currStat)
return combined_stats
def combineCsv(csvFolder, fname, dut_list):
csvName = "Combined_Results-%s.csv" % fname
csvPath = os.path.join(csvFolder, csvName)
stats = OrderedDict() # Using OrderedDict to preserve job and stat ordering
try:
os.remove(csvPath) # Remove it if it already exists
except OSError:
pass
csvList = glob.glob(os.path.join(csvFolder, "*.csv"))
reader = csv.reader(open(csvList[0]))
col_names = next(reader)
for row in reader:
stats[row[0]] = OrderedDict()
for c in col_names[1:]: # remove "jobname" column
stats[row[0]][c] = []
for c in csvList:
with open(c) as fd:
creader = csv.reader(fd)
next(creader)
for row in creader:
for i in range(1, len(row)):
stats[row[0]][col_names[i]].append(row[i])
combined_stats = get_combined_stats(stats)
with open(csvPath, "a") as csv_out:
writer = csv.writer(csv_out)
server_list = ";".join([dut.serverName for dut in dut_list])
writer.writerow([fname] + [server_list])
stats_headers = combined_stats[list(combined_stats.keys())[0]].keys()
writer.writerow(["Jobname"] + list(stats_headers))
for job in combined_stats.keys():
row = [job]
for stat in combined_stats[job].keys():
row.append(combined_stats[job][stat])
writer.writerow(row)
def get_json_files(dir_path):
"""Returns list of files under `dir_path` with a `.json` extension."""
json_files = []
for f in sorted(os.listdir(dir_path)):
if f.endswith(".json"):
json_files.append(os.path.join(dir_path, f))
return json_files
def write_csv_file(csv_filepath, fio_json_files):
"""Converts and writes each fio json file into a single CSV file."""
is_new_file = not os.path.isfile(csv_filepath)
with open(csv_filepath, "a") as csv_out:
first_file = fio_json_files[0]
fio_jobname = os.path.splitext(os.path.basename(first_file))[0]
fio_data = read_json(first_file)
if is_new_file:
new_csv(
csv_out,
("percentile_list" in fio_data["jobs"][0]["job options"]),
"percentile_list" in fio_data["global options"],
)
print_csv_line(csv_out, fio_jobname, fio_data)
for f in fio_json_files[1:]: # Continue from second element, if any
fio_jobname = os.path.splitext(os.path.basename(f))[0]
fio_data = read_json(f)
print_csv_line(csv_out, fio_jobname, fio_data)
def main(args):
if args.combine_csv_path != "":
combineCsv(args.combine_csv_path)
return
if args.all_json:
json_files = get_json_files(args.all_json)
else:
json_files = [args.json_file]
if json_files:
csv_filepath = os.path.join(args.csv_path, args.csv_file)
write_csv_file(csv_filepath, json_files)
def cli_main():
args = set_attributes()
main(args)
if __name__ == "__main__":
cli_main()