in scripts/inf2_env.py [0:0]
def parse_cmdline_and_set_env(argv: List[str] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser()
if not argv:
argv = sys.argv
# All these are params passed to tgi and intercepted here
parser.add_argument(
"--batch-size",
type=int,
default=os.getenv("HF_BATCH_SIZE", os.getenv("BATCH_SIZE", 0)),
)
parser.add_argument(
"--sequence-length", type=int,
default=os.getenv("HF_OPTIMUM_SEQUENCE_LENGTH",
os.getenv("SEQUENCE_LENGTH", 0))
)
parser.add_argument("--model-id", type=str, default=os.getenv("HF_MODEL_ID", os.getenv("HF_MODEL_DIR")))
parser.add_argument("--revision", type=str, default=os.getenv("REVISION"))
args = parser.parse_known_args(argv)[0]
if not args.model_id:
raise Exception(
"No model id provided ! Either specify it using --model-id cmdline or MODEL_ID env var"
)
# Override env with cmdline params
os.environ["MODEL_ID"] = args.model_id
# Set all tgi router and tgi server values to consistent values as early as possible
# from the order of the parser defaults, the tgi router value can override the tgi server ones
if args.batch_size > 0:
os.environ["HF_BATCH_SIZE"] = str(args.batch_size)
if args.sequence_length > 0:
os.environ["HF_OPTIMUM_SEQUENCE_LENGTH"] = str(args.sequence_length)
if args.revision:
os.environ["REVISION"] = str(args.revision)
return args