tools/side-by-side/side_by_side.py (313 lines of code) (raw):

#!/usr/bin/env python # Copyright 2014 PerfKitBenchmarker Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # -*- coding: utf-8 -*- """Runs a side-by-side comparison of two PerfKitBenchmarker revisions. Given a pair of revisions (e.g., 'dev', 'master') and command-line arguments, this tool runs 'pkb.py' with for each and creates a report showing the differences in the results between the two runs. """ import argparse import collections import contextlib import difflib import itertools import json import logging import os import pprint import shlex import shutil import subprocess import tempfile import jinja2 DEFAULT_FLAGS = ( '--cloud=GCP', '--machine_type=n1-standard-4', '--benchmarks=netperf', ) # Keys in the sample JSON we expect to vary between runs. # These will be removed prior to diffing samples. VARYING_KEYS = 'run_uri', 'sample_uri', 'timestamp', 'value' # Template name, in same directory as this file. TEMPLATE = 'side_by_side.html.j2' # Thresholds for highlighting results SMALL_CHANGE_THRESHOLD = 5 MEDIUM_CHANGE_THRESHOLD = 10 LARGE_CHANGE_THRESHOLD = 25 PerfKitBenchmarkerResult = collections.namedtuple( 'PerfKitBenchmarkerResult', ['name', 'description', 'sha1', 'samples', 'flags'], ) @contextlib.contextmanager def TempDir(delete=True, **kwargs): """Directory equivalent of tempfile.NamedTemporaryFile. When used as a context manager, yields a temporary directory which by default is removed when the context manager goes our of scope. Example usage: >>> with TempDir(prefix='perfkit') as td: ... shutil.copy('test.txt', td) Args: delete: Delete the directory on exit? **kwargs: Passed to tempfile.mkdtemp. Yields: String. Path to the temporary directory. """ td = tempfile.mkdtemp(**kwargs) logging.info('Created %s', td) try: yield td finally: if delete: logging.info('Removing %s', td) shutil.rmtree(td) def _GitCommandPrefix(): """Prefix for all git commands. Returns: list of strings; 'git' with an appropriate '--git-dir' flag. """ git_dir = os.path.join(os.path.dirname(__file__), '..', '..', '.git') return ['git', '--git-dir', git_dir] def _GitRevParse(revision): """Returns the output of 'git rev-parse' for 'revision'.""" output = subprocess.check_output( _GitCommandPrefix() + ['rev-parse', revision] ) return output.rstrip() def _GitDescribe(revision): """Returns the output of 'git describe' for 'revision'.""" output = subprocess.check_output( _GitCommandPrefix() + ['describe', '--always', revision] ) return output.rstrip() @contextlib.contextmanager def PerfKitBenchmarkerCheckout(revision): """Yields a directory with PerfKitBenchmarker checked out to 'revision'.""" archive_cmd = _GitCommandPrefix() + ['archive', revision] logging.info('Running: %s', archive_cmd) p_archive = subprocess.Popen(archive_cmd, stdout=subprocess.PIPE) with TempDir(prefix='pkb-test-') as td: tar_cmd = ['tar', 'xf', '-'] logging.info('Running %s in %s', tar_cmd, td) p_tar = subprocess.Popen(tar_cmd, stdin=p_archive.stdout, cwd=td) archive_status = p_archive.wait() tar_status = p_tar.wait() if archive_status: raise subprocess.CalledProcessError(archive_cmd, archive_status) if tar_status: raise subprocess.CalledProcessError(tar_status, tar_cmd) yield td def RunPerfKitBenchmarker(revision, flags): """Runs perfkitbenchmarker, returning the results as parsed JSON. Args: revision: string. git commit identifier. Version of PerfKitBenchmarker to run. flags: list of strings. Default arguments to pass to `pkb.py.` Returns: List of dicts. Deserialized JSON output of running PerfKitBenchmarker with `--json_path`. """ sha1 = _GitRevParse(revision) description = _GitDescribe(revision) with PerfKitBenchmarkerCheckout(revision) as td: with tempfile.NamedTemporaryFile(suffix='.json') as tf: flags = flags + ['--json_path=' + tf.name] cmd = ['./pkb.py'] + flags logging.info('Running %s in %s', cmd, td) subprocess.check_call(cmd, cwd=td) samples = [json.loads(line) for line in tf] return PerfKitBenchmarkerResult( name=revision, sha1=sha1, flags=flags, samples=samples, description=description, ) def _SplitLabels(labels): """Parse the 'labels' key from a PerfKitBenchmarker record. Labels are recorded in '|key:value|,|key:value|' form. This function transforms them to a dict. Args: labels: string. labels to parse. Returns: dict. Parsed 'labels'. """ result = {} for item in labels.strip('|').split('|,|'): k, v = item.split(':', 1) result[k] = v return result def _CompareSamples(a, b, context=True, numlines=1): """Generate an HTML table showing differences between 'a' and 'b'. Args: a: dict, as output by PerfKitBenchmarker. b: dict, as output by PerfKitBenchmarker. context: boolean. Show context in diff? If False, all lines are output, even those which are equal. numlines: int. Passed to difflib.Htmldiff.make_table. Returns: string or None. An HTML table, or None if there are no differences. """ a = a.copy() b = b.copy() a['metadata'] = _SplitLabels(a.pop('labels', '')) b['metadata'] = _SplitLabels(b.pop('labels', '')) # Prune the keys in VARYING_KEYS prior to comparison to make the diff more # informative. for d in (a, b): for key in VARYING_KEYS: d.pop(key, None) astr = pprint.pformat(a).splitlines() bstr = pprint.pformat(b).splitlines() if astr == bstr and context: return None differ = difflib.HtmlDiff() return differ.make_table(astr, bstr, context=context, numlines=numlines) def _MatchSamples(base_samples, head_samples): """Match items from base_samples with items from head_samples. Rows are matched using 'test', 'metric', and 'unit' fields. Args: base_samples: List of dicts. head_samples: List of dicts. Returns: List of pairs, each item of the pair containing either a dict or None. """ def ExtractKeys(samples): return [(i['test'], i['metric'], i['unit']) for i in samples] base_keys = ExtractKeys(base_samples) head_keys = ExtractKeys(head_samples) sm = difflib.SequenceMatcher('', base_keys, head_keys) result = [] for opcode, base_begin, base_end, head_begin, head_end in sm.get_opcodes(): if opcode == 'equal': result.extend( zip( base_samples[base_begin:base_end], head_samples[head_begin:head_end], ) ) elif opcode == 'replace': result.extend( zip( base_samples[base_begin:base_end], [None] * (base_end - base_begin), ) ) result.extend( zip( [None] * (head_end - head_begin), head_samples[head_begin:head_end], ) ) elif opcode == 'delete': result.extend( zip( base_samples[base_begin:base_end], [None] * (base_end - base_begin), ) ) elif opcode == 'insert': result.extend( zip( [None] * (head_end - head_begin), head_samples[head_begin:head_end], ) ) else: raise AssertionError('Unknown op: ' + opcode) return result def RenderResults(base_result, head_result, template_name=TEMPLATE, **kwargs): """Render the results of a comparison as an HTML page. Args: base_result: PerfKitBenchmarkerResult. Result of running against base revision. head_result: PerfKitBenchmarkerResult. Result of running against head revision. template_name: string. The filename of the template. kwargs: Additional arguments to Template.render. Returns: String. The HTML template. """ def _ClassForPercentDifference(percent_diff): """Crude highlighting of differences between runs. Samples varying by >25% are colored red. Samples varying by 5-25% are colored orange. Other samples are colored green. Args: percent_diff: float. percent difference between values. """ if percent_diff < 0: direction = 'decrease' else: direction = 'increase' percent_diff = abs(percent_diff) if percent_diff > LARGE_CHANGE_THRESHOLD: size = 'large' elif percent_diff > MEDIUM_CHANGE_THRESHOLD: size = 'medium' elif percent_diff > SMALL_CHANGE_THRESHOLD: size = 'small' else: return '' return 'value-{}-{}'.format(direction, size) env = jinja2.Environment( loader=jinja2.FileSystemLoader(os.path.dirname(__file__)), undefined=jinja2.StrictUndefined, ) env.globals['class_for_percent_diff'] = _ClassForPercentDifference env.globals['izip_longest'] = itertools.izip_longest template = env.get_template('side_by_side.html.j2') matched = _MatchSamples(base_result.samples, head_result.samples) # Generate sample diffs sample_context_diffs = [] sample_diffs = [] for base_sample, head_sample in matched: if not base_sample or not head_sample: # Sample inserted or deleted. continue sample_context_diffs.append(_CompareSamples(base_sample, head_sample)) sample_diffs.append( _CompareSamples(base_sample, head_sample, context=False) ) # Generate flag diffs flag_diffs = difflib.HtmlDiff().make_table( base_result.flags, head_result.flags, context=False ) # Used for generating a chart with differences. matched_json = ( json.dumps(matched) .replace('<', '\\u003c') .replace('>', '\\u003e') .replace('&', '\\u0026') .replace("'", '\\u0027') ) return template.render( base=base_result, head=head_result, matched_samples=matched, matched_samples_json=matched_json, sample_diffs=sample_diffs, sample_context_diffs=sample_context_diffs, flag_diffs=flag_diffs, infinity=float('inf'), **kwargs ) def main(): p = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=__doc__, ) p.add_argument( '-t', '--title', default='PerfKitBenchmarker Comparison', help="""HTML report title""", ) p.add_argument('--base', default='master', help="""Base revision.""") p.add_argument('--head', default='dev', help="""Head revision.""") p.add_argument( '--base-flags', default=None, help="""Flags for run against '--base' revision. Will be combined with --flags.""", type=shlex.split, ) p.add_argument( '--head-flags', default=None, help="""Flags for run against '--head' revision. Will be combined with --flags.""", type=shlex.split, ) p.add_argument( '-f', '--flags', type=shlex.split, help="""Command line flags (Default: {})""".format( ' '.join(DEFAULT_FLAGS) ), ) p.add_argument( '-p', '--parallel', default=False, action='store_true', help="""Run concurrently""", ) p.add_argument( '--rerender', help="""Re-render the HTML report from a JSON file [for developers].""", action='store_true', ) p.add_argument('json_output', help="""JSON output path.""") p.add_argument('html_output', help="""HTML output path.""") a = p.parse_args() if a.base_flags or a.head_flags: if not (a.base_flags and a.head_flags): p.error( '--base-flags and --head-flags must be specified together.\n' '\tbase flags={}\n\thead flags={}'.format( a.base_flags, a.head_flags ) ) a.base_flags = a.base_flags + (a.flags or []) a.head_flags = a.head_flags + (a.flags or []) else: # Just --flags assert not a.base_flags, a.base_flags assert not a.head_flags, a.head_flags a.base_flags = a.flags or list(DEFAULT_FLAGS) a.head_flags = a.flags or list(DEFAULT_FLAGS) if not a.rerender: if a.parallel: from concurrent import futures with futures.ThreadPoolExecutor(max_workers=2) as executor: base_res_fut = executor.submit( RunPerfKitBenchmarker, a.base, a.base_flags ) head_res_fut = executor.submit( RunPerfKitBenchmarker, a.head, a.head_flags ) base_res = base_res_fut.result() head_res = head_res_fut.result() else: base_res = RunPerfKitBenchmarker(a.base, a.base_flags) head_res = RunPerfKitBenchmarker(a.head, a.head_flags) logging.info('Base result: %s', base_res) logging.info('Head result: %s', head_res) with argparse.FileType('w')(a.json_output) as json_fp: logging.info('Writing JSON to %s', a.json_output) json.dump( {'head': head_res._asdict(), 'base': base_res._asdict()}, json_fp, indent=2, ) json_fp.write('\n') else: logging.info('Loading results from %s', a.json_output) with argparse.FileType('r')(a.json_output) as json_fp: d = json.load(json_fp) base_res = PerfKitBenchmarkerResult(**d['base']) head_res = PerfKitBenchmarkerResult(**d['head']) with argparse.FileType('w')(a.html_output) as html_fp: logging.info('Writing HTML to %s', a.html_output) html_fp.write( RenderResults( base_result=base_res, head_result=head_res, varying_keys=VARYING_KEYS, title=a.title, ) ) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) main()