in aiplatform/src/main/java/aiplatform/DeployModelSample.java [56:142]
static void deployModelSample(
String project,
String deployedModelDisplayName,
String endpointId,
String modelId,
int timeout)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// Set long-running operations (LROs) timeout
final OperationTimedPollAlgorithm operationTimedPollAlgorithm =
OperationTimedPollAlgorithm.create(
RetrySettings.newBuilder()
.setInitialRetryDelay(Duration.ofMillis(5000L))
.setRetryDelayMultiplier(1.5)
.setMaxRetryDelay(Duration.ofMillis(45000L))
.setInitialRpcTimeout(Duration.ZERO)
.setRpcTimeoutMultiplier(1.0)
.setMaxRpcTimeout(Duration.ZERO)
.setTotalTimeout(Duration.ofSeconds(timeout))
.build());
EndpointServiceStubSettings.Builder endpointServiceStubSettingsBuilder =
EndpointServiceStubSettings.newBuilder();
endpointServiceStubSettingsBuilder
.deployModelOperationSettings()
.setPollingAlgorithm(operationTimedPollAlgorithm);
EndpointServiceStubSettings endpointStubSettings = endpointServiceStubSettingsBuilder.build();
EndpointServiceSettings endpointServiceSettings =
EndpointServiceSettings.create(endpointStubSettings);
endpointServiceSettings =
endpointServiceSettings.toBuilder()
.setEndpoint("us-central1-aiplatform.googleapis.com:443")
.build();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (EndpointServiceClient endpointServiceClient =
EndpointServiceClient.create(endpointServiceSettings)) {
String location = "us-central1";
EndpointName endpointName = EndpointName.of(project, location, endpointId);
// key '0' assigns traffic for the newly deployed model
// Traffic percentage values must add up to 100
// Leave dictionary empty if endpoint should not accept any traffic
Map<String, Integer> trafficSplit = new HashMap<>();
trafficSplit.put("0", 100);
ModelName modelName = ModelName.of(project, location, modelId);
AutomaticResources automaticResourcesInput =
AutomaticResources.newBuilder().setMinReplicaCount(1).setMaxReplicaCount(1).build();
DeployedModel deployedModelInput =
DeployedModel.newBuilder()
.setModel(modelName.toString())
.setDisplayName(deployedModelDisplayName)
.setAutomaticResources(automaticResourcesInput)
.build();
OperationFuture<DeployModelResponse, DeployModelOperationMetadata> deployModelResponseFuture =
endpointServiceClient.deployModelAsync(endpointName, deployedModelInput, trafficSplit);
System.out.format(
"Operation name: %s\n", deployModelResponseFuture.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
DeployModelResponse deployModelResponse = deployModelResponseFuture.get(20, TimeUnit.MINUTES);
System.out.println("Deploy Model Response");
DeployedModel deployedModel = deployModelResponse.getDeployedModel();
System.out.println("\tDeployed Model");
System.out.format("\t\tid: %s\n", deployedModel.getId());
System.out.format("\t\tmodel: %s\n", deployedModel.getModel());
System.out.format("\t\tDisplay Name: %s\n", deployedModel.getDisplayName());
System.out.format("\t\tCreate Time: %s\n", deployedModel.getCreateTime());
DedicatedResources dedicatedResources = deployedModel.getDedicatedResources();
System.out.println("\t\tDedicated Resources");
System.out.format("\t\t\tMin Replica Count: %s\n", dedicatedResources.getMinReplicaCount());
MachineSpec machineSpec = dedicatedResources.getMachineSpec();
System.out.println("\t\t\tMachine Spec");
System.out.format("\t\t\t\tMachine Type: %s\n", machineSpec.getMachineType());
System.out.format("\t\t\t\tAccelerator Type: %s\n", machineSpec.getAcceleratorType());
System.out.format("\t\t\t\tAccelerator Count: %s\n", machineSpec.getAcceleratorCount());
AutomaticResources automaticResources = deployedModel.getAutomaticResources();
System.out.println("\t\tAutomatic Resources");
System.out.format("\t\t\tMin Replica Count: %s\n", automaticResources.getMinReplicaCount());
System.out.format("\t\t\tMax Replica Count: %s\n", automaticResources.getMaxReplicaCount());
}
}