assets/models/system/microsoft-phi-2/spec.yaml (61 lines of code) (raw):

$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json name: microsoft-phi-2 path: ./ properties: SharedComputeCapacityEnabled: true SHA: b10c3eba545ad279e7208ee3a5d644566f001670 datasets: StackOverflow, Stackv1.2, CodeContests, gpt-3.5-turbo-0301 inference-min-sku-spec: 4|1|28|64 inference-recommended-sku: Standard_NC4as_T4_v3, Standard_NC6s_v3, Standard_NC8as_T4_v3, Standard_NC12s_v3, Standard_NC16as_T4_v3, Standard_NC24s_v3, Standard_NC64as_T4_v3, Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4, Standard_ND40rs_v2 languages: en evaluation-min-sku-spec: 6|0|56|112 evaluation-recommended-sku: Standard_DS5_v2, Standard_NC6s_v3, Standard_NC12s_v3, Standard_NC24s_v3, Standard_NC24rs_v3, Standard_ND40rs_v2, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4 finetuning-tasks: text-generation finetune-min-sku-spec: 24|1|220|64 finetune-recommended-sku: Standard_ND40rs_v2, Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4 tags: SharedComputeCapacityEnabled: "" license: mit author: Microsoft task: text-generation hiddenlayerscanned: "" evaluation_compute_allow_list: [ Standard_DS5_v2, Standard_NC6s_v3, Standard_NC12s_v3, Standard_NC24s_v3, Standard_NC24rs_v3, Standard_ND40rs_v2, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4, ] finetune_compute_allow_list: [Standard_ND40rs_v2, Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4] inference_compute_allow_list: [ Standard_NC4as_T4_v3, Standard_NC6s_v3, Standard_NC8as_T4_v3, Standard_NC12s_v3, Standard_NC16as_T4_v3, Standard_NC24s_v3, Standard_NC64as_T4_v3, Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4, Standard_ND40rs_v2 ] inference_supported_envs: - vllm model_specific_defaults: apply_deepspeed: "true" deepspeed_stage: 2 apply_lora: "true" apply_ort: "false" precision: 16 max_seq_length: 2048 benchmark: "quality" version: 19