in phi3/olive/phi3.py [0:0]
def main(raw_args=None):
args = get_args(raw_args)
if args.target in ("mobile", "web") and args.precision != "int4":
raise ValueError("mobile or web only supports int4(default)")
elif args.target == "cpu" and args.precision == "fp16":
raise ValueError("Choose from fp32 or int4(default) for cpu target")
if args.inference and args.target == "web":
raise ValueError("Web model inference is not supported in this script")
# Generate Olive configuration file for specific target
print("\nGenerating Olive configuration file...")
config_file = generate_config(args)
print("Olive configuration file is generated...\n")
# Generate optimized model for specific target
print("Generating optimized model for", args.target, "...\n")
output_path = Path(args.output_dir)
with tempfile.TemporaryDirectory() as tempdir:
with open(config_file) as f:
run_config = json.load(f)
if args.quarot:
run_config["output_dir"] = args.output_dir
else:
run_config["output_dir"] = tempdir
olive_run(run_config)
if args.quarot:
return
save_output_model(run_config, output_path)
if args.inference:
if not args.chat_template:
args.chat_template = (
"### Question: {input} \n### Answer: "
if args.finetune_method
else "<|user|>\n{input}<|end|>\n<|assistant|>"
)
prompts = "Write a joke" if not args.prompt else "".join(args.prompt)
prompts = f"{args.chat_template.format(input=prompts)}"
max_length = 200 if not args.max_length else args.max_length
genai_run(prompts, str(output_path / "model"), max_length)