benchmarks/benchmark/tools/model-load-benchmark/plot.py (60 lines of code) (raw):
import matplotlib.pyplot as plt
import yaml
import glob
import os
import re
def load_yaml_files(directory="results"):
"""Loads all YAML files in the given directory."""
results = []
for filepath in glob.glob(os.path.join(directory, 'case_*.yaml')):
with open(filepath, 'r') as f:
try:
data = yaml.safe_load(f)
# Extract case number from filename using regex
match = re.search(r"case_(\d+)\.yaml", filepath)
if match:
data['case_no'] = int(match.group(1))
results.append(data)
except yaml.YAMLError as e:
print(f"Error parsing YAML file {filepath}: {e}")
return results
def extract_data(results):
"""Extracts relevant data for plotting."""
data = {
'cpu_request': [],
'memory_request': [],
'ephemeral_storage_request': [],
'parallel_downloads_per_file': [],
'max_parallel_downloads': [],
'download_chunk_size_mb': [],
'elapsed_time': [],
'case_no': [] # Add case_no to the data dictionary
}
for result in results:
data['cpu_request'].append(result['config']['sideCarResources']['cpu-request']['base'])
data['memory_request'].append(result['config']['sideCarResources']['memory-request']['base'])
data['ephemeral_storage_request'].append(result['config']['sideCarResources']['ephemeral-storage-request']['base'])
data['parallel_downloads_per_file'].append(result['config']['volumeAttributes']['mountOptions']['file-cache']['parallel-downloads-per-file']['base'])
data['max_parallel_downloads'].append(result['config']['volumeAttributes']['mountOptions']['file-cache']['max-parallel-downloads']['base'])
data['download_chunk_size_mb'].append(result['config']['volumeAttributes']['mountOptions']['file-cache']['download-chunk-size-mb']['base'])
minutes, seconds = map(float, result['elapsedTime'][:-1].split('m'))
data['elapsed_time'].append(minutes * 60 + seconds)
data['case_no'].append(result['case_no']) # Add case_no to the list
return data
def create_scatter_plots(data, directory="results"):
"""Creates scatter plots for each property and saves them to files."""
properties = [
'cpu_request', 'memory_request', 'ephemeral_storage_request',
'parallel_downloads_per_file', 'max_parallel_downloads', 'download_chunk_size_mb'
]
for prop in properties:
plt.figure()
plt.scatter(data[prop], data['elapsed_time'])
plt.xlabel(prop)
plt.ylabel('Elapsed Time (seconds)')
plt.title(f'Elapsed Time vs. {prop}')
# Label the points with case numbers
for i, txt in enumerate(data['case_no']):
plt.annotate(txt, (data[prop][i], data['elapsed_time'][i]))
filepath = os.path.join(directory, f'elapsed_time_vs_{prop}.png')
plt.savefig(filepath)
if __name__ == '__main__':
results_dir = 'results'
results = load_yaml_files(results_dir)
data = extract_data(results)
create_scatter_plots(data, results_dir)