in phi3/olive/phi3.py [0:0]
def generate_config(args):
json_file_template = "phi3_template.json"
with open(json_file_template) as f:
template_json = json.load(f)
config_prefix = "phi3_run_"
if args.quarot:
template_json = use_passes(template_json, "quarot")
template_json["systems"]["local_system"]["accelerators"] = [
{"device": "GPU", "execution_providers": ["CUDAExecutionProvider"]}
]
new_json_file = f"{config_prefix}quarot.json"
with open(new_json_file, "w") as f:
json.dump(template_json, f, indent=4)
return new_json_file
# use aml instance of model
if args.source == "AzureML":
template_json["input_model"]["model_path"] = AML_MODEL_Path
else:
template_json["input_model"]["model_path"] = args.model_path
# finetune
passes_to_use = []
if args.finetune_method:
# adapters will be fine-tuned and merged into the model
passes_to_use.extend([args.finetune_method, "merge_adapter_weights"])
if args.awq:
passes_to_use.append("awq")
if args.precision != "int4":
print("AWQ only supports int4 precision. Changing precision to int4")
args.precision = "int4"
passes_to_use.append("builder")
target = str(args.target)
if target == "web":
# web doesn't have fp16 io
passes_to_use.append("fp32_logits")
# use the relevant passes
template_json = use_passes(template_json, *passes_to_use)
# set the accelerator
device = "GPU" if target in ("cuda", "web") else "CPU"
template_json["systems"]["local_system"]["accelerators"] = [
{"device": device, "execution_providers": [TARGET_TO_EP[target.lower()]]}
]
# set the precision
template_json["passes"]["builder"]["precision"] = args.precision
if target == "mobile":
template_json["passes"]["builder"]["int4_accuracy_level"] = 4
# set cache dir
template_json["cache_dir"] = args.cache_dir
new_json_file = f"{config_prefix}{target.lower()}_{args.precision}.json"
with open(new_json_file, "w") as f:
json.dump(template_json, f, indent=4)
return new_json_file