def run_load()

in inference-units/tps.py [0:0]


def run_load(user_count, spawn_rate):
    # setup Environment and Runner
    env = Environment(user_classes=[WebserviceUser])
    local_runner = env.create_local_runner()

    
    # start a greenlet that periodically outputs the current stats
    gevent.spawn(stats_printer(env.stats))

    # start a greenlet that save current stats to history
    gevent.spawn(stats_history, env.runner)

    # start the test
    env.runner.start(user_count, spawn_rate=spawn_rate)

    # in 60 seconds stop the runner
    gevent.spawn_later(30, lambda: env.runner.quit())

    # wait for the greenlets
    env.runner.greenlet.join()

    
    # Sleep so that history is up to date
    time.sleep(5)

    # NOTE: Max TPS calculated from last run. 
    last_stats = env.stats.history[-1]
    max_tps = last_stats['current_rps'] - last_stats['current_fail_per_sec']
    p95_latency = last_stats['response_time_percentile_95']
    failure_tps = last_stats['current_fail_per_sec']
    print(f'Max supported TPS: {max_tps}')
    print(f'95th percentile response time: {p95_latency}')
    return max_tps, p95_latency, failure_tps