scripts/throughput.py (117 lines of code) (raw):
# Copyright (c) Facebook, Inc. and its affiliates.
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import matplotlib.pyplot as plt
import re
import sys
import os.path
import argparse
import os, fnmatch
from collections import Counter, OrderedDict
import operator
SHARDS = 0
LOAD = 1
IN_FLIGHTS = 2
COMMITTEE = 3
"""
Parsed raw logs and prints to disc the following dictionary, where '<Z_VALUE>' is the parameter of the grpah,
<X_VALUE> is the value of the x-axis, and <Y_VALUE> is the throughput:
{
'transfer': {
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>)]
],
},
'confirmation': {
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>)]
],
}
}
"""
def parse(log_file, parsed_log_file, x_axis=SHARDS, z_axis=IN_FLIGHTS):
fname = os.path.abspath(log_file)
data = open(fname).read()
parameters = re.findall(r'\d+', log_file)
x_value = parameters[x_axis]
z_value = parameters[z_axis]
#_accounts = parameters[1] # not used
#in_flights = parameters[2]
#_committee = parameters[3] # not used
orders_types = ['transfer', 'confirmation']
orders = {}
for orders_type in orders_types:
orders[orders_type] = {}
tps = ''.join(re.findall(r'Estimated server throughput: [0-9]* %s orders per sec' % orders_type, data))
tps = re.findall(r'\d+',tps)
assert len(tps) == 1
orders[orders_type][z_value] = [(x_value, tps[0])]
with open(parsed_log_file, 'w') as f:
f.write(str(orders))
"""
Aggregate parsed logs and prints to disc the following dictionary, where '<Z_VALUE>' is the parameter of the grpah,
<X_VALUE> is the value of the x-axis, and <Y_VALUE> is the throughput
{
'transfer': {
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>), (<X_VALUE>, <Y_VALUE>), ...],
...
],
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>), (<X_VALUE>, <Y_VALUE>), ...],
...
],
...
},
'confirmation': {
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>), (<X_VALUE>, <Y_VALUE>), ...],
...
],
'<Z_VALUE>': [
[(<X_VALUE>, <Y_VALUE>), (<X_VALUE>, <Y_VALUE>), ...],
...
],
...
}
}
"""
def aggregate(parsed_log_files, aggregated_parsed_log_file):
assert len(parsed_log_files) > 1
with open(parsed_log_files[0], 'r') as f:
aggregate_orders = eval(f.read())
for parsed_log_file in parsed_log_files[1:]:
with open(parsed_log_file, 'r') as f:
data = eval(f.read())
for (orders_type, orders) in data.items():
assert len(orders.items()) == 1
(z_value, items) = list(orders.items())[0]
assert len(items) == 1
if z_value in aggregate_orders[orders_type]:
aggregate_orders[orders_type][z_value] += items
else:
aggregate_orders[orders_type][z_value] = items
for (orders_type, orders) in aggregate_orders.items():
for (z_value, items) in orders.items():
items.sort(key=lambda tup: int(tup[0]))
counter = Counter(item[0] for item in items)
shards = len(counter.items())
runs = list(counter.values())[0]
assert runs * shards == len(items)
arr = np.array(items)
items = arr.reshape((shards,runs,2)).tolist()
aggregate_orders[orders_type][z_value] = items
print(aggregate_orders)
with open(aggregated_parsed_log_file, 'w') as f:
f.write(str(aggregate_orders))
"""
Load parsed logs (as produced by 'parse'), and saves the following figures as PDF:
- the throughput of transfer orders VS the number of processes, for multiple max in-flight values
- the throughput of confirmation orders VS the number of processes, for multiple max in-flight values
"""
def plot(parsed_log_file, x_label='Number of shards', z_label='tx in-flight', legend_position='lower right', style='plot'):
with open(parsed_log_file, 'r') as f:
orders = eval(f.read())
for (orders_type, order) in orders.items():
fig = plt.figure()
width = 2
i = -3
for (z_value, items) in sorted(order.items(), reverse=True):
x_values = []
y_values = []
y_err = []
for item in items:
x, y = list(zip(*item))
x = int(x[0])
y = np.array(y).astype(np.int)
x_values.append(x)
y_values.append(np.mean(y))
y_err.append(np.std(y))
if style == 'bar':
plt.bar(np.array(x_values) + i, y_values, width, yerr=y_err,
label='%s %s' % (z_value, z_label))
i = i + width
plt.xticks(x_values, x_values)
else:
#plt.plot(x_values, y_values)
plt.ylim(0, 180000)
plt.errorbar(x_values, y_values, yerr=y_err, uplims=True, lolims=True,
label='%s %s' % (z_value, z_label), marker='.', alpha=1, dashes=None)
plt.legend(loc=legend_position)
plt.xlabel(x_label)
plt.ylabel('Observed throughput (tx / sec)')
plt.savefig('%s.pdf' % orders_type)
print('created figure "%s.pdf".' % orders_type)
"""
Utility to find files
"""
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
if __name__== '__main__':
aggregated_log = 'aggregated_tps_log.txt'
commands = ['parse', 'aggregate', 'plot', 'all']
command = 'plot'
'''
parser = argparse.ArgumentParser()
parser.add_argument('-c', action='store', dest='command', help = 'Command to execute (parse, aggregate, plot).')
args = vars(parser.parse_args())
command = args['command']
print(args)
'''
execute_all = command == commands[3]
if command == commands[0] or execute_all:
raw_logs = find('*.txt.*.*.*.*', '.')
parsed_logs = ['%s_parsed' % raw_log for raw_log in raw_logs]
[parse(raw_log, parsed_log, x_axis=SHARDS, z_axis=IN_FLIGHTS) for (raw_log, parsed_log) in zip(raw_logs, parsed_logs)]
if command == commands[1] or execute_all:
parsed_logs = find('*_parsed', '.')
aggregate(parsed_logs, aggregated_log)
if command == commands[2] or execute_all:
plot(aggregated_log, x_label='Committee size', z_label='tx shards')