def runParallelRequests()

in tensorflow-serving-load-testing-sample/python/submit_mc_tf_k8s_requests.py [0:0]


def runParallelRequests(processes=10, queued_requests_per_process=10,url=""):
    the_url = url 
    queue_of_urls = [ the_url for x in range(processes * queued_requests_per_process) ]
    print("Total processes: {}\nNumber of Requests: {}\ncontent of queue_of_urls: {}".format(
        processes,
        len(queue_of_urls), 
        queue_of_urls[0]))
    pool = Pool(processes)
    try:
        sucess=0
        fail=0
        total_time = 0
        for res in tqdm(pool.imap_unordered(getUrl, queue_of_urls), total=len(queue_of_urls)):
            if res is not None and res.status_code == 200:
                total_time += res.elapsed.total_seconds()
                prediction = res.json()['predictions'][0]['classes']
                sucess+=1
            else:
                fail+=1
        print('Number of Sucessful Requests ', sucess)
        print('Number of Dropped Requests ', fail)
        print('Avg latency: {} ms'.format((total_time*1000)/(queued_requests_per_process*processes)))
    finally:
        pool.close()
        pool.join()