benchmarking/data_converters/json_converter/json_converter.py (99 lines of code) (raw):
#!/usr/bin/env python
##############################################################################
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
##############################################################################
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import collections
import json
from data_converters.data_converter_base import DataConverterBase
from utils.custom_logger import getLogger
class JsonConverter(DataConverterBase):
def __init__(self):
super(JsonConverter, self).__init__()
def getName(self):
return "json_converter"
def collect(self, data, args=None):
rows = self._prepareData(data)
results = []
valid_run_idxs = []
for row in rows:
try:
result = json.loads(row)
if (
"type" in result and result["type"] == "NET" and "value" in result
) or (
"NET" in result
): # for backward compatibility
valid_run_idxs.append(len(results))
results.append(result)
except Exception as e:
# bypass one line
getLogger().info("Skip one row %s \n Exception: %s" % (row, str(e)))
pass
if len(valid_run_idxs) > 0:
# strip data not yet in a valid range
# here it is assumed the NET metric appears earlier than
# other metrics
results = results[valid_run_idxs[0] :]
return results, valid_run_idxs
def convert(self, data):
details = collections.defaultdict(lambda: collections.defaultdict(list))
for d in data:
if "type" in d and "metric" in d and "unit" in d:
# new format
key = d["type"] + " " + d["metric"]
if "info_string" in d:
if "info_string" in details[key]:
old_string = details[key]["info_string"]
new_string = d["info_string"]
if old_string != new_string:
getLogger().warning(
"info_string values "
"for {} ".format(key)
+ "do not match.\n"
+ "Current info_string: "
+ "{}\n ".format(old_string)
+ "does not match new "
+ "info_string: "
+ "{}".format(new_string)
)
else:
details[key]["info_string"] = d["info_string"]
if "value" in d:
details[key]["values"].append(float(d["value"]))
if "num_runs" in d:
details[key]["num_runs"] = d["num_runs"]
if "summary" in d:
details[key]["summary"] = d["summary"]
self._updateOneEntry(details[key], d, "type")
self._updateOneEntry(details[key], d, "metric")
self._updateOneEntry(details[key], d, "unit")
else:
# for backward compatibility purpose
# will remove after some time
for k, v in d.items():
if not isinstance(v, dict):
# prevent some data corruption
continue
for kk, vv in v.items():
key = k + " " + kk
if "info_string" in vv:
if "info_string" in details[key]:
assert (
details[key]["info_string"] == vv["info_string"]
), (
"info_string values for {} ".format(key)
+ "do not match.\n"
+ "Current info_string:\n{}\n ".format(
details[key]["info_string"]
)
+ "does not match new info_string:\n{}".format(
vv["info_string"]
)
)
else:
details[key]["info_string"] = vv["info_string"]
else:
details[key]["values"].append(float(vv["value"]))
details[key]["type"] = k
# although it is declared as list
details[key]["metric"] = kk
details[key]["unit"] = str(vv["unit"])
return details
def _updateOneEntry(self, detail, d, k):
if k in detail:
assert (
detail[k] == d[k]
), "Field {} does not match in different entries".format(k)
else:
detail[k] = d[k]