lib/generate.py (133 lines of code) (raw):
#!/usr/bin/env python3
import json
import os
import sys
import time
import numpy as np
import django
from django.apps import apps
import lib.parser as parser
from django.conf import settings
from lib.telemetry import TelemetryClient
from lib.analysis import DataAnalyzer
from lib.report import ReportGenerator
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
return super(NpEncoder, self).default(obj)
def setupDjango():
if apps.ready:
return
TEMPLATES = [
{
"BACKEND": "django.template.backends.django.DjangoTemplates",
'DIRS': [os.path.join(os.path.dirname(__file__),'templates','sql'),
os.path.join(os.path.dirname(__file__),'templates','html')]
}
]
settings.configure(TEMPLATES=TEMPLATES)
django.setup()
def setupDirs(slug, dataDir, reportDir, generate_report):
if not os.path.isdir(dataDir):
os.mkdir(dataDir)
if not os.path.isdir(os.path.join(dataDir,slug)):
os.mkdir(os.path.join(dataDir,slug))
if generate_report:
if not os.path.isdir(reportDir):
os.mkdir(reportDir)
def getResultsForExperiment(slug, dataDir, config, skipCache):
sqlClient = TelemetryClient(dataDir, config, skipCache)
telemetryData = sqlClient.getResults()
# Change the branches to a list for easier use during analysis.
branch_names = []
for i in range(len(config['branches'])):
branch_names.append(config['branches'][i]['name'])
config['branches'] = branch_names
analyzer = DataAnalyzer(config)
results = analyzer.processTelemetryData(telemetryData)
# Save the queries into the results and cache them.
queriesFile=os.path.join(dataDir, f"{slug}-queries.json")
if 'queries' in telemetryData and telemetryData['queries']:
with open(queriesFile, 'w') as f:
json.dump(telemetryData['queries'], f, indent=2, cls=NpEncoder)
else:
queries = checkForLocalResults(queriesFile)
if queries is not None:
telemetryData['queries'] = queries
results['queries'] = telemetryData['queries']
return results
def checkForLocalResults(resultsFile):
if os.path.isfile(resultsFile):
with open(resultsFile, 'r') as f:
results = json.load(f)
return results
return None
def generate_report(args):
startTime = time.time()
setupDjango()
# Parse config file.
print("Loading config file: ", args.config)
config = parser.parseConfigFile(args.config)
slug = config['slug']
# Setup local dirs
print("Setting up local directories.")
setupDirs(slug, args.dataDir, args.reportDir, args.html_report)
dataDir=os.path.join(args.dataDir, slug)
reportDir=args.reportDir
skipCache=args.skip_cache
# Check for local results first.
resultsFile= os.path.join(dataDir, f"{slug}-results.json")
if skipCache:
results = None
else:
results = checkForLocalResults(resultsFile)
# If results not found, generate them.
if results is None:
# Annotate metrics
parser.annotateMetrics(config)
if config["is_experiment"] == True:
# Parse Nimbus API.
api = parser.parseNimbusAPI(dataDir, slug, skipCache)
config = config | api
# If the experiment is a rollout, then use the non-enrolled branch
# as the control.
if config['isRollout'] == True:
config['include_non_enrolled_branch'] = True
# If non-enrolled branch was included, add an extra branch.
if 'include_non_enrolled_branch' in config:
include_non_enrolled_branch = config['include_non_enrolled_branch']
if include_non_enrolled_branch == True or include_non_enrolled_branch.lower() == "true":
config['include_non_enrolled_branch'] = True
if config['isRollout'] == True:
config["branches"].insert(0, {'name': 'default'})
else:
config["branches"].append({'name': 'default'})
else:
config['include_non_enrolled_branch'] = False
# Make control the first element if not already.
if "control" in config:
control = config["control"]
del config["control"]
if config["branches"][0]["name"] != control:
for i,b in enumerate(config["branches"]):
if b["name"] == control:
tmpFirst = config["branches"][0]
tmpControl = config["branches"][i]
config["branches"][i] = tmpFirst
config["branches"][0] = tmpControl
break
print("Using Config:")
configStr = json.dumps(config, indent=2)
print(configStr)
# Get statistical results
origConfig = config.copy()
results = getResultsForExperiment(slug, dataDir, config, skipCache)
results = results | config
results['input'] = origConfig
# Save results to disk.
print("---------------------------------")
print(f"Writing results to {resultsFile}")
with open(resultsFile, 'w') as f:
json.dump(results, f, indent=2, cls=NpEncoder)
else:
print("---------------------------------")
print(f"Found local results in {resultsFile}")
if args.html_report:
reportFile = os.path.join(reportDir, f"{slug}.html")
print(f"Generating html report in {reportFile}")
gen = ReportGenerator(results)
report = gen.createHTMLReport()
with open(reportFile, "w") as f:
f.write(report)
executionTime = time.time()-startTime
print(f"Execution time: {executionTime:.1f} seconds")