in benchmarking/pipemode_benchmark/benchmark.py [0:0]
def main(args=None):
"""Run benchmarking."""
parser = argparse.ArgumentParser(description='Benchmark SageMaker TensorFlow PipeMode')
parser.add_argument('--parallelism', type=int, default=8, help='How many training jobs to run concurrently')
parser.add_argument('sdist_path',
help='The path of a sagemaker_tensorflow tar.gz source distribution to benchmark')
parser.add_argument('--role_name', default='SageMakerRoleTest',
help='The name of an IAM role to pass to SageMaker for running benchmarking training jobs')
args = parser.parse_args()
role_arn = get_role_arn(role_name=args.role_name)
bucket = bucket_helper.bucket()
output_path = "s3://{}/pipemode/output/".format(bucket)
executor = concurrent.futures.ProcessPoolExecutor(max_workers=args.parallelism)
futures = []
print "Benchmarking starting"
for benchmark_script in script.all_scripts.values():
benchmark_script.build(sdist_path=args.sdist_path)
print "Built scripts"
for benchmark_dataset in dataset.all_datasets.values():
benchmark_dataset.build()
print "Built datasets"
for dataset_name, script_name, instance_type in all_benchmarks:
print "Submitting benchmark:", dataset_name, script_name, instance_type
future = executor.submit(benchmark,
region_helper.region,
role_arn,
dataset.all_datasets[dataset_name],
output_path,
instance_type,
script.all_scripts[script_name])
futures.append(future)
time.sleep(2)
cwclient = boto3.client('cloudwatch', region_name=region_helper.region)
for future in concurrent.futures.as_completed(futures):
benchmark_result = future.result()
print benchmark_result
def make_metric_data(name, unit, value, benchmark_result):
return {
'MetricName': "{}.{}.{}".format(name, benchmark_result.dataset, benchmark_result.script),
'Dimensions': [
{
'Name': 'Dataset',
'Value': benchmark_result.dataset
},
{
'Name': 'Script',
'Value': benchmark_result.script
}],
'Timestamp': datetime.datetime.now(),
'Value': value,
'Unit': unit
}
cwclient.put_metric_data(
Namespace='tf-pipemode-benchmark',
MetricData=[
make_metric_data('TotalIterationTime', 'Seconds',
benchmark_result.total_iteration_time.total_seconds(), benchmark_result),
make_metric_data('IteratorIterationTime', 'Seconds',
benchmark_result.iterator_time.total_seconds(), benchmark_result),
make_metric_data('ReadBytes', 'Bytes',
benchmark_result.read_bytes, benchmark_result),
make_metric_data('ReadGigabytesPerSecond', 'Gigabytes/Second',
benchmark_result.read_GB_sec, benchmark_result),
]
)