in notebooks/community/vertex_endpoints/optimized_tensorflow_runtime/benchmark.py [0:0]
def benchmark_qps(send_request, requests, qps):
logging.info("Running benchmark at {} qps".format(qps))
# List appends are thread safe
num_requests = len(requests)
success = []
error = []
latency = []
def _make_call(i):
"""Send a request to using specified method and measure observed latency."""
start_time = time.time()
try:
_ = send_request(requests[i])
success.append(1)
except Exception as e:
print(e)
error.append(1)
latency.append(time.time() - start_time)
if len(latency) % (qps * 10) == 0:
logging.info("received {} responses.".format(len(latency)))
thread_lst = []
miss_rate_percent = []
start_time = time.time()
previous_worker_start = start_time
for i in range(num_requests):
thread = threading.Thread(target=_make_call, args=(i,))
thread_lst.append(thread)
thread.start()
if i % (qps * 10) == 0 and i != 0:
logging.info("sent {} requests.".format(i))
# send requests at a constant rate and adjust for the time it took to send previous request
pause = 1.0 / qps - (time.time() - previous_worker_start)
if pause > 0:
time.sleep(pause)
else:
missed_delay = (
100 * ((time.time() - previous_worker_start) - 1.0 / qps) / (1.0 / qps)
)
miss_rate_percent.append(missed_delay)
previous_worker_start = time.time()
for thread in thread_lst:
thread.join()
acc_time = time.time() - start_time
avg_miss_rate_percent = 0
if len(miss_rate_percent) > 0:
avg_miss_rate_percent = np.average(miss_rate_percent)
logging.warning(
"couldn't keep up at current QPS rate, average miss rate:{:.2f}%".format(
avg_miss_rate_percent
)
)
logging.info(
"num_qps:{} requests/second: {:.2f} #success:{} #error:{} "
"latencies: [avg:{:.2f}ms p50:{:.2f}ms p90:{:.2f}ms p99:{:.2f}ms]".format(
qps,
num_requests / acc_time,
sum(success),
sum(error),
np.average(latency) * 1000,
np.percentile(latency, 50) * 1000,
np.percentile(latency, 90) * 1000,
np.percentile(latency, 99) * 1000,
)
)
return {
"reqested_qps": qps,
"actual_qps": num_requests / acc_time,
"success": sum(success),
"error": sum(error),
"time": acc_time,
"avg_latency": np.average(latency) * 1000,
"p50": np.percentile(latency, 50) * 1000,
"p90": np.percentile(latency, 90) * 1000,
"p99": np.percentile(latency, 99) * 1000,
"avg_miss_rate_percent": avg_miss_rate_percent,
}