in tensorflow-serving-load-testing-sample/python/submit_mc_tf_k8s_requests.py [0:0]
def runParallelRequests(processes=10, queued_requests_per_process=10,url=""):
the_url = url
queue_of_urls = [ the_url for x in range(processes * queued_requests_per_process) ]
print("Total processes: {}\nNumber of Requests: {}\ncontent of queue_of_urls: {}".format(
processes,
len(queue_of_urls),
queue_of_urls[0]))
pool = Pool(processes)
try:
sucess=0
fail=0
total_time = 0
for res in tqdm(pool.imap_unordered(getUrl, queue_of_urls), total=len(queue_of_urls)):
if res is not None and res.status_code == 200:
total_time += res.elapsed.total_seconds()
prediction = res.json()['predictions'][0]['classes']
sucess+=1
else:
fail+=1
print('Number of Sucessful Requests ', sucess)
print('Number of Dropped Requests ', fail)
print('Avg latency: {} ms'.format((total_time*1000)/(queued_requests_per_process*processes)))
finally:
pool.close()
pool.join()