def generate_report()

in benchmarks/horovod-resnet/execute_horovod_training.py [0:0]


def generate_report():
    results_dir = os.path.join(dir_path, "results")

    if os.path.exists(results_dir):
        shutil.rmtree(results_dir)

    subprocess.call(["aws", "s3", "cp", "--recursive", benchmark_results_dir, results_dir])

    jobs = {}

    for job_name in os.listdir(results_dir):
        jobs[job_name] = {}

        _, instance_type, instance_count, device, py_version, _, _, _, _, _, _, _ = job_name.split(
            "-"
        )

        current_dir = os.path.join(results_dir, job_name)

        model_dir = os.path.join(current_dir, "output", "model.tar.gz")
        subprocess.call(["tar", "-xvzf", model_dir], cwd=current_dir)

        jobs[job_name]["instance_type"] = instance_type
        jobs[job_name]["instance_count"] = instance_count
        jobs[job_name]["device"] = device
        jobs[job_name]["py_version"] = py_version

        benchmark_log = os.path.join(current_dir, "benchmark_run.log")

        if os.path.exists(benchmark_log):
            with open(benchmark_log) as f:
                data = json.load(f)

                jobs[job_name]["dataset"] = data["dataset"]["name"]
                jobs[job_name]["num_cores"] = data["machine_config"]["cpu_info"]["num_cores"]
                jobs[job_name]["cpu_info"] = data["machine_config"]["cpu_info"]["cpu_info"]
                jobs[job_name]["mhz_per_cpu"] = data["machine_config"]["cpu_info"]["mhz_per_cpu"]
                jobs[job_name]["gpu_count"] = data["machine_config"]["gpu_info"]["count"]
                jobs[job_name]["gpu_model"] = data["machine_config"]["gpu_info"]["model"]

                def find_value(parameter):
                    other_key = [k for k in parameter if k != "name"][0]
                    return parameter[other_key]

                for parameter in data["run_parameters"]:
                    jobs[job_name][parameter["name"]] = find_value(parameter)

                jobs[job_name]["model_name"] = data["model_name"]
                jobs[job_name]["run_date"] = data["run_date"]
                jobs[job_name]["tensorflow_version"] = data["tensorflow_version"]["version"]
                jobs[job_name]["tensorflow_version_git_hash"] = data["tensorflow_version"][
                    "git_hash"
                ]

    return pd.DataFrame(jobs)