in benchmark/text-generation/performance/gen_barcharts.py [0:0]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("inputs", type=str, nargs="*", help="A list of benchmark results files (.json).")
args = parser.parse_args()
inputs = args.inputs
if len(inputs) == 0:
inputs = glob.glob("*.json")
benchmarks = {}
for input in inputs:
model_name = Path(input).stem
with open(input) as f:
benchmarks[model_name] = json.load(f)
model_names = benchmarks.keys()
# Generate encoding barchart
input_length = []
ttft_s = {}
latency_ms = {}
throughput_t_per_s = {}
for name in model_names:
results = benchmarks[name]["results"]
cur_input_length = [result["input_length"] for result in results]
if len(input_length) == 0:
input_length = cur_input_length
else:
assert cur_input_length == input_length, f"{name} does not have the same number of results"
ttft_s[name] = [round(result["encoding_time"], 1) for result in results]
latency_ms[name] = [round(result["latency"], 0) for result in results]
throughput_t_per_s[name] = [round(result["throughput"], 0) for result in results]
save_bar_chart(
title="Time to generate the first token in seconds",
labels=input_length,
series=ttft_s,
xlabel="Input tokens",
ylabel="Time to first token (s)",
save_path="ttft.png",
)
save_bar_chart(
title="Inter-token latency in milliseconds",
labels=input_length,
series=latency_ms,
xlabel="Input tokens",
ylabel="Latency (ms)",
save_path="latency.png",
)
save_bar_chart(
title="Generated tokens per second (end-to-end)",
labels=input_length,
series=throughput_t_per_s,
xlabel="Input tokens",
ylabel="Throughput (tokens/s)",
save_path="throughput.png",
)