func()

in custom-targets/vertex-ai/model-deployer/render.go [112:175]


func (r *renderer) renderDeployModelRequest() ([]byte, error) {

	if err := applyDeployParams(r.params.configPath); err != nil {
		return nil, fmt.Errorf("cannot apply deploy parameters to configuration file: %v", err)
	}

	configuration, err := loadConfigurationFile(r.params.configPath)
	if err != nil {
		return nil, fmt.Errorf("unable to obtain configuration data: %v", err)
	}

	// blank deployed model template
	deployedModel := &aiplatform.GoogleCloudAiplatformV1DeployedModel{}

	if err = yaml.Unmarshal(configuration, deployedModel); err != nil {
		return nil, fmt.Errorf("unable to parse configuration data into DeployModel object: %v", err)
	}

	model, err := fetchModel(r.aiPlatformService, r.params.model)
	if err != nil {
		return nil, fmt.Errorf("unable to fetch model: %v", err)
	}

	modelNameWithVersionId := resolveModelWithVersion(model)
	if err != nil {
		return nil, fmt.Errorf("unable to resolve model version: %v", err)
	}

	if err := validateRequest(modelNameWithVersionId, r.params.endpoint, r.params.minReplicaCount, deployedModel); err != nil {
		return nil, fmt.Errorf("manifest validation failed: %v", err)
	}
	deployedModel.Model = modelNameWithVersionId

	if deployedModel.DedicatedResources == nil {
		deployedModel.DedicatedResources = &aiplatform.GoogleCloudAiplatformV1DedicatedResources{MinReplicaCount: r.params.minReplicaCount}
	}

	if deployedModel.DedicatedResources.MinReplicaCount == 0 {
		deployedModel.DedicatedResources.MinReplicaCount = r.params.minReplicaCount
	}

	// deploy model params requires this field to be non-nil. Setting to the default "n1-standard-2"
	// if it's not already set
	if deployedModel.DedicatedResources.MachineSpec == nil {
		deployedModel.DedicatedResources.MachineSpec = &aiplatform.GoogleCloudAiplatformV1MachineSpec{MachineType: "n1-standard-2"}
	}

	if deployedModel.DedicatedResources.MachineSpec.MachineType == "" {
		deployedModel.DedicatedResources.MachineSpec.MachineType = "n1-standard-2"
	}

	percentage := int64(r.req.Percentage)
	trafficSplit := map[string]int64{}
	// "0" is a stand-in to refer to the current model being deployed
	trafficSplit["0"] = percentage

	if percentage != 100 {
		trafficSplit["previous-model"] = 100 - percentage
	}

	request := &aiplatform.GoogleCloudAiplatformV1DeployModelRequest{DeployedModel: deployedModel, TrafficSplit: trafficSplit}

	return yaml.Marshal(request)
}