variance-analysis/mach_perftest_notebook_dev/perftestnotebook/transformer.py (97 lines of code) (raw):

import json import importlib.util import inspect import os import pathlib from perftestnotebook.logger import NotebookLogger logger = NotebookLogger() top_level = pathlib.Path(os.path.dirname(__file__)).resolve().parent class Transformer(object): """ Abstract class for data transformers. """ def __init__(self, files=None): """ Initialize the transformer with files. :param list files: A list of files containing data to transform. """ self._files = files @property def files(self): return self._files @files.setter def files(self, val): if not type(val) == list: logger.warning("`files` must be a list, got %s" % type(val)) return self._files = val def transform(self, data): """ Transform the data into the standardized data format. The `data` entry can be of any type and the subclass is responsible for knowing what they expect. :param data: Data to transform. :return: Data standardized in the perftest-notebook format. """ raise NotImplementedError def merge(self, standardized_data_entries): """ Merge multiple standardized entries into a timeseries. :param list standardized_data_entries: List of standardized data entries. :return: Merged standardized data entries. """ raise NotImplementedError def open_data(self, file): """ Opens a file of data. If it's not a JSON file, then the data will be opened as a text file. :param str file: Path to the data file. :return: Data contained in the file. """ data = None if file.endswith(".json"): with open(file, "r") as f: data = json.load(f) else: with open(file, "r") as f: data = f.readlines() return data def process(self, name): """ Process all the known data into a merged, and standardized data format. :param str name: Name of the merged data. :return dict: Merged data. """ trfmdata = [] for file in self.files: data = {} # Open data try: data = self.open_data(file) except Exception as e: logger.warning("Failed to open file %s, skipping" % file) logger.warning("%s %s" % (e.__class__.__name__, e)) # Transform data try: data = self.transform(data) if type(data) != list: data = [data] trfmdata.extend(data) except Exception as e: logger.warning("Failed to transform file %s, skipping" % file) logger.warning("%s %s" % (e.__class__.__name__, e)) merged = self.merge(trfmdata) if type(merged) == dict: merged["name"] = name else: for e in merged: e["name"] = name return merged class SimplePerfherderTransformer(Transformer): """ Transforms perfherder data into the standardized data format. """ entry_number = 0 def transform(self, data): self.entry_number += 1 return { "data": [{"value": data["suites"][0]["value"], "xaxis": self.entry_number}] } def merge(self, sde): merged = {"data": []} for entry in sde: if type(entry["data"]) == list: merged["data"].extend(entry["data"]) else: merged["data"].append(entry["data"]) self.entry_number = 0 return merged def get_transformers( filepath=os.path.join(str(top_level), "perftestnotebook/customtransforms") ): """ This function returns a dict of transformers under the given path. If more than one transformers have the same class name, an exception will be raised. :param str filepath: file path. :return dict: {"transformer name": Transformer class}. """ ret = {} tfm_path = pathlib.Path(filepath).resolve() if not tfm_path.is_dir(): raise Exception(f"{tfm_path} is not a directory or it does not exist.") tfm_files = list(tfm_path.glob("*.py")) for file in tfm_files: # Importing a source file directly spec = importlib.util.spec_from_file_location( name=file.name, location=file.resolve().as_posix() ) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) members = inspect.getmembers( module, lambda c: inspect.isclass(c) and issubclass(c, Transformer) ) for (name, tfm_class) in members: if name in ret and name != "Transformer": raise Exception( f"""Duplicated transformer {name} is found in the folder {filepath}. Please define each transformer class with a unique class name.""" ) ret.update({name: tfm_class}) return ret