in inference-units/tps.py [0:0]
def run_load(user_count, spawn_rate):
# setup Environment and Runner
env = Environment(user_classes=[WebserviceUser])
local_runner = env.create_local_runner()
# start a greenlet that periodically outputs the current stats
gevent.spawn(stats_printer(env.stats))
# start a greenlet that save current stats to history
gevent.spawn(stats_history, env.runner)
# start the test
env.runner.start(user_count, spawn_rate=spawn_rate)
# in 60 seconds stop the runner
gevent.spawn_later(30, lambda: env.runner.quit())
# wait for the greenlets
env.runner.greenlet.join()
# Sleep so that history is up to date
time.sleep(5)
# NOTE: Max TPS calculated from last run.
last_stats = env.stats.history[-1]
max_tps = last_stats['current_rps'] - last_stats['current_fail_per_sec']
p95_latency = last_stats['response_time_percentile_95']
failure_tps = last_stats['current_fail_per_sec']
print(f'Max supported TPS: {max_tps}')
print(f'95th percentile response time: {p95_latency}')
return max_tps, p95_latency, failure_tps