in machine_learning/ml_infrastructure/inference-server-performance/server/scripts/tensorrt-optimization.py [0:0]
def convert_fp32_or_fp16(
input_model_dir, output_model_dir, batch_size, precision_mode):
"""Optimize and quantize a input model with FP32 or FP16.
"""
trt.create_inference_graph(
input_graph_def=None,
outputs=None,
max_batch_size=batch_size,
input_saved_model_dir=input_model_dir,
output_saved_model_dir=output_model_dir,
precision_mode=precision_mode)