#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
#
# AUTHOR = 'Darryl Gardner <darryleg@fb.com>'
# fio_json_parser.py - script to parse fio workload results
# produced by fio in JSON format.  By default the following values are written
# to a .csv file: Read_IOPS, Read_BW, Write_IOPS, Write_BW, Mean_Read_Latency,
# P50_Read_Latency, P70_Read_Latency, P99_Read_Latency, P99.9_Read_Latency,
# P99.99_Read_Latency, Mean_Write_Latency, P50_Write_Latency,
# P70_Write_Latency, P99_Write_Latency, P99.9_Write_latency,
# P99.99_Write_Latency
#
# input arguments (at least 1 argument is required):
#  -j
#       single fio JSON file to be parsed (default = )
#  -k
#       single fio JSON file path (default = .)'
#  -n
#       file path for multiple fio JSON files (default = )
#  -l
#       csv file path (default = .)
#  -f
#       csv file name (default = fio_fb_results.csv)
#  -c
#       path to directory of csv's from server/client mode that you want to
#       combine
#
# example parse a single json file in the the results directory:
#  FioFlashJsonParser.py -k results -j 4K_random_read.json results.csv
#
# example parse all json files in 'results' directory:
#  FioFlashJsonParser.py -n results -f results.csv
#
# example create combined csv from csvs in a directory:
#  FioFlashJsonParser.py -c /some_path/csv_directory
#
import argparse
import csv
import glob
import json
import os
import sys
from collections import OrderedDict
from distutils.version import StrictVersion

TOOL_NAME = "fio-parse-json-flash"
tunnel2host = {}


def set_attributes():
    #
    # Attribute Table Definition
    #

    parser = argparse.ArgumentParser(description="fio JSON File Parser for Flash")
    parser.add_argument(
        "-j",
        action="store",
        dest="json_file",
        type=str,
        help="single fio JSON file to be parsed (default = )",
        default="",
    )
    parser.add_argument(
        "-k",
        action="store",
        dest="json_path",
        type=str,
        help="single fio JSON file path (default = .)",
        default=".",
    )
    parser.add_argument(
        "-n",
        action="store",
        dest="all_json",
        type=str,
        help="file path for multiple fio JSON files (default = )",
        default="",
    )
    parser.add_argument(
        "-l",
        action="store",
        dest="csv_path",
        type=str,
        help="csv file path (default = .)",
        default=".",
    )
    parser.add_argument(
        "-f",
        action="store",
        dest="csv_file",
        type=str,
        help="csv file name (default = fio_fb_results.csv)",
        default="fio_fb_results.csv",
    )
    parser.add_argument(
        "-s",
        action="store",
        dest="serverMode",
        type=str,
        help="denotes server mode: y for server mode, n for local mode",
        default="n",
    )
    parser.add_argument(
        "-c",
        action="store",
        dest="combine_csv_path",
        type=str,
        help=(
            "path to directory holding multiple csvs from different that "
            "will be combined"
        ),
        default="",
    )
    args = parser.parse_args()
    return args


def check_if_mounted(fn):
    mounted = False
    with open(fn) as f:
        for line in f.readlines():
            if "appears mounted, and 'allow_mounted_write' isn't set." in line:
                print(line)
                mounted = True
                break
    if mounted:
        print("To run, please unmount the device and try again")
        sys.exit(1)


def read_json(fn, serverMode=False):
    data = ""
    if not os.path.isfile(fn):
        print("%s does not exist" % fn)
        sys.exit(1)
    check_if_mounted(fn)
    f = open(fn)
    if serverMode:
        jsonstr = f.read()
        jsonstr = "{" + jsonstr[jsonstr.rfind('"fio version" : ') :]
        try:
            data = json.loads(jsonstr)
        except ValueError:
            print("JSON decoding failed on %s, is file corrupt?" % fn)
            f.close()
            sys.exit(1)
    else:
        try:
            data = json.load(f)
        except ValueError:
            print("JSON decoding failed on %s. Is file corrupt?" % fn)
            f.close()
            sys.exit(1)
    f.close()
    return data


def new_csv(f, notStdPercentile1, notStdPercentile2):
    if notStdPercentile1 or notStdPercentile2:
        col_names = [
            "Jobname",
            "Read_IOPS",
            "Read_BW",
            "Write_IOPS",
            "Write_BW",
            "Trim_IOPS",
            "Trim_BW",
            "Mean_Read_Latency",
            "Max_Read_Latency",
            "P25_Read_Latency",
            "P50_Read_Latency",
            "P70_Read_Latency",
            "P75_Read_Latency",
            "P90_Read_Latency",
            "P99_Read_Latency",
            "P99.9_Read_Latency",
            "P99.99_Read_Latency",
            "P99.999_Read_Latency",
            "P99.9999_Read_Latency",
            "Mean_Write_Latency",
            "Max_Write_Latency",
            "P25_Write_Latency",
            "P50_Write_Latency",
            "P70_Write_Latency",
            "P75_Write_Latency",
            "P90_Write_Latency",
            "P99_Write_Latency",
            "P99.9_Write_Latency",
            "P99.99_Write_Latency",
            "P99.999_Write_Latency",
            "P99.9999_Write_Latency",
            "Mean_Trim_Latency",
            "Max_Trim_Latency",
            "P25_Trim_Latency",
            "P50_Trim_Latency",
            "P70_Trim_Latency",
            "P75_Trim_Latency",
            "P90_Trim_Latency",
            "P99_Trim_Latency",
            "P99.9_Trim_Latency",
            "P99.99_Trim_Latency",
            "P99.999_Trim_Latency",
            "P99.9999_Trim_Latency",
        ]
    else:
        col_names = [
            "Jobname",
            "Read_IOPS",
            "Read_BW",
            "Write_IOPS",
            "Write_BW",
            "Trim_IOPS",
            "Trim_BW",
            "Mean_Read_Latency",
            "Max_Read_Latency",
            "P50_Read_Latency",
            "P70_Read_Latency",
            "P90_Read_Latency",
            "P99_Read_Latency",
            "P99.9_Read_Latency",
            "P99.99_Read_Latency",
            "P99.9999_Read_Latency",
            "Mean_Write_Latency",
            "Max_Write_Latency",
            "P50_Write_Latency",
            "P70_Write_Latency",
            "P90_Write_Latency",
            "P99_Write_Latency",
            "P99.9_Write_Latency",
            "P99.99_Write_Latency",
            "P99.9999_Write_Latency",
            "Mean_Trim_Latency",
            "Max_Trim_Latency",
            "P50_Trim_Latency",
            "P70_Trim_Latency",
            "P90_Trim_Latency",
            "P99_Trim_Latency",
            "P99.9_Trim_Latency",
            "P99.99_Trim_Latency",
            "P99.9999_Trim_Latency",
        ]
    try:
        writer = csv.writer(f)
        writer.writerow(col_names)
    except IOError:
        print("cannot write to ", f)
        f.close()
        sys.exit(1)


def get_csv_line(jobname, json, index, data, version_str, serverMode):
    clat = "clat"
    con = 1
    # clat -> clat_ns in version 3.0
    verstr = version_str[version_str.rfind("-") + 1 :]
    fio_version = StrictVersion(verstr)
    v3_version = StrictVersion("3.0")
    if fio_version >= v3_version:
        clat = "clat_ns"
        # convert nanoseconds to microseconds
        con = 1000
    if serverMode:
        # Support for older and newer fio json formats
        options1 = "percentile_list" in json["job options"]
        options2 = "percentile_list" in json["global options"]
    else:
        options1 = "percentile_list" in json["jobs"][0]["job options"]
        options2 = "percentile_list" in json["global options"]
    iotype = ["read", "write", "trim"]
    if options1 or options2:
        percent = [
            "25.000000",
            "50.000000",
            "70.000000",
            "75.000000",
            "90.000000",
            "99.000000",
            "99.900000",
            "99.990000",
            "99.999000",
            "99.999900",
        ]
    else:
        percent = [
            "50.000000",
            "70.000000",
            "90.000000",
            "99.000000",
            "99.900000",
            "99.990000",
            "99.999900",
        ]
    line = [
        jobname,
        data["read"]["iops"],
        data["read"]["bw"],
        data["write"]["iops"],
        data["write"]["bw"],
        data["trim"]["iops"],
        data["trim"]["bw"],
    ]
    for io in iotype:
        line.append(str(data[io][clat]["mean"] / con))
        line.append(str(data[io][clat]["max"] / con))
        if data[io]["iops"] > 0:
            for p in percent:
                if "percentile" in data[io][clat]:
                    line.append(str(data[io][clat]["percentile"][p] / con))
        else:
            for _p in percent:
                line.append(0)
    return line


def print_csv_line(f, jobname, json, ver="", serverMode=False):
    index = 0
    lines = 1
    if not serverMode:
        lines = len(json["jobs"])
        ver = json["fio version"]
    while index != lines:
        data = json
        if not serverMode:
            data = json["jobs"][index]
        try:
            line = get_csv_line(jobname, json, index, data, ver, serverMode)
            wrtr = csv.writer(f)
            wrtr.writerow(line)
        except IOError:
            print("cannot write to ", f)
            f.close()
            sys.exit(1)
        index += 1


def parseServerResults(json_path, csv_dir):
    if not os.path.isdir(csv_dir):
        os.mkdir(csv_dir)
    write_server_csv_files(csv_dir, json_path)


def write_server_csv_files(csv_dir, json_path):
    """Writes fio server mode json results into CSV files.

    One CSV file is written per hostname.
    """
    data = read_json(json_path, serverMode=True)
    version_str = data["fio version"]
    jobname = os.path.splitext(os.path.basename(json_path))[0]
    hostname_data_dict = get_hostname_to_data_dict(data)
    for hostname in hostname_data_dict:
        host_csv_path = os.path.join(csv_dir, "%s.csv" % hostname)
        is_new_file = not os.path.isfile(host_csv_path)
        with open(host_csv_path, "a") as csv_out:
            jb_data = hostname_data_dict[hostname]
            jb = jb_data[0]
            jb["global options"] = data["global options"]
            if is_new_file:
                new_csv(
                    csv_out,
                    ("percentile_list" in jb["job options"]),
                    ("percentile_list" in data["global options"]),
                )
            print_csv_line(csv_out, jobname, jb, version_str, serverMode=True)
            for jb in jb_data[1:]:
                jb["global options"] = data["global options"]
                print_csv_line(csv_out, jobname, jb, version_str, serverMode=True)


def get_hostname_to_data_dict(fio_data):
    """Create dictionary mapping hostname to its fio data.

    Returns:
        Dict[str, List[dict]] - hostname to its fio data
    """
    hostname_data_dict = {}
    for jb in fio_data["client_stats"]:
        if jb["jobname"] == "All clients":
            continue
        if len(tunnel2host) == 0:
            hostname = jb["hostname"]
        else:
            hostname = tunnel2host[jb["port"]]

        if hostname not in hostname_data_dict:
            hostname_data_dict[hostname] = [jb]
        else:
            hostname_data_dict[hostname].append(jb)
    return hostname_data_dict


def get_combined_stats(stats):
    combined_stats = OrderedDict()
    for job in stats.keys():
        combined_stats[job] = OrderedDict()
        for stat in stats[job].keys():
            currStat = [float(val) for val in stats[job][stat]]
            if "_IOPS" in stat or "_BW" in stat:
                combined_stats[job][stat + "_TOTAL"] = sum(currStat)
                combined_stats[job][stat + "_MIN"] = min(currStat)
            combined_stats[job][stat + "_AVG"] = sum(currStat) / len(currStat)
            combined_stats[job][stat + "_MAX"] = max(currStat)
    return combined_stats


def combineCsv(csvFolder, fname, dut_list):
    csvName = "Combined_Results-%s.csv" % fname
    csvPath = os.path.join(csvFolder, csvName)
    stats = OrderedDict()  # Using OrderedDict to preserve job and stat ordering

    try:
        os.remove(csvPath)  # Remove it if it already exists
    except OSError:
        pass
    csvList = glob.glob(os.path.join(csvFolder, "*.csv"))
    reader = csv.reader(open(csvList[0]))
    col_names = next(reader)

    for row in reader:
        stats[row[0]] = OrderedDict()
        for c in col_names[1:]:  # remove "jobname" column
            stats[row[0]][c] = []

    for c in csvList:
        with open(c) as fd:
            creader = csv.reader(fd)
            next(creader)
            for row in creader:
                for i in range(1, len(row)):
                    stats[row[0]][col_names[i]].append(row[i])
    combined_stats = get_combined_stats(stats)
    with open(csvPath, "a") as csv_out:
        writer = csv.writer(csv_out)
        server_list = ";".join([dut.serverName for dut in dut_list])
        writer.writerow([fname] + [server_list])
        stats_headers = combined_stats[list(combined_stats.keys())[0]].keys()
        writer.writerow(["Jobname"] + list(stats_headers))

        for job in combined_stats.keys():
            row = [job]
            for stat in combined_stats[job].keys():
                row.append(combined_stats[job][stat])
            writer.writerow(row)


def get_json_files(dir_path):
    """Returns list of files under `dir_path` with a `.json` extension."""
    json_files = []
    for f in sorted(os.listdir(dir_path)):
        if f.endswith(".json"):
            json_files.append(os.path.join(dir_path, f))
    return json_files


def write_csv_file(csv_filepath, fio_json_files):
    """Converts and writes each fio json file into a single CSV file."""
    is_new_file = not os.path.isfile(csv_filepath)
    with open(csv_filepath, "a") as csv_out:
        first_file = fio_json_files[0]
        fio_jobname = os.path.splitext(os.path.basename(first_file))[0]
        fio_data = read_json(first_file)
        if is_new_file:
            new_csv(
                csv_out,
                ("percentile_list" in fio_data["jobs"][0]["job options"]),
                "percentile_list" in fio_data["global options"],
            )
        print_csv_line(csv_out, fio_jobname, fio_data)
        for f in fio_json_files[1:]:  # Continue from second element, if any
            fio_jobname = os.path.splitext(os.path.basename(f))[0]
            fio_data = read_json(f)
            print_csv_line(csv_out, fio_jobname, fio_data)


def main(args):
    if args.combine_csv_path != "":
        combineCsv(args.combine_csv_path)
        return

    if args.all_json:
        json_files = get_json_files(args.all_json)
    else:
        json_files = [args.json_file]

    if json_files:
        csv_filepath = os.path.join(args.csv_path, args.csv_file)
        write_csv_file(csv_filepath, json_files)


def cli_main():
    args = set_attributes()
    main(args)


if __name__ == "__main__":
    cli_main()
