in custom-targets/vertex-ai/model-deployer/render.go [112:175]
func (r *renderer) renderDeployModelRequest() ([]byte, error) {
if err := applyDeployParams(r.params.configPath); err != nil {
return nil, fmt.Errorf("cannot apply deploy parameters to configuration file: %v", err)
}
configuration, err := loadConfigurationFile(r.params.configPath)
if err != nil {
return nil, fmt.Errorf("unable to obtain configuration data: %v", err)
}
// blank deployed model template
deployedModel := &aiplatform.GoogleCloudAiplatformV1DeployedModel{}
if err = yaml.Unmarshal(configuration, deployedModel); err != nil {
return nil, fmt.Errorf("unable to parse configuration data into DeployModel object: %v", err)
}
model, err := fetchModel(r.aiPlatformService, r.params.model)
if err != nil {
return nil, fmt.Errorf("unable to fetch model: %v", err)
}
modelNameWithVersionId := resolveModelWithVersion(model)
if err != nil {
return nil, fmt.Errorf("unable to resolve model version: %v", err)
}
if err := validateRequest(modelNameWithVersionId, r.params.endpoint, r.params.minReplicaCount, deployedModel); err != nil {
return nil, fmt.Errorf("manifest validation failed: %v", err)
}
deployedModel.Model = modelNameWithVersionId
if deployedModel.DedicatedResources == nil {
deployedModel.DedicatedResources = &aiplatform.GoogleCloudAiplatformV1DedicatedResources{MinReplicaCount: r.params.minReplicaCount}
}
if deployedModel.DedicatedResources.MinReplicaCount == 0 {
deployedModel.DedicatedResources.MinReplicaCount = r.params.minReplicaCount
}
// deploy model params requires this field to be non-nil. Setting to the default "n1-standard-2"
// if it's not already set
if deployedModel.DedicatedResources.MachineSpec == nil {
deployedModel.DedicatedResources.MachineSpec = &aiplatform.GoogleCloudAiplatformV1MachineSpec{MachineType: "n1-standard-2"}
}
if deployedModel.DedicatedResources.MachineSpec.MachineType == "" {
deployedModel.DedicatedResources.MachineSpec.MachineType = "n1-standard-2"
}
percentage := int64(r.req.Percentage)
trafficSplit := map[string]int64{}
// "0" is a stand-in to refer to the current model being deployed
trafficSplit["0"] = percentage
if percentage != 100 {
trafficSplit["previous-model"] = 100 - percentage
}
request := &aiplatform.GoogleCloudAiplatformV1DeployModelRequest{DeployedModel: deployedModel, TrafficSplit: trafficSplit}
return yaml.Marshal(request)
}