in frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java [216:310]
public static StatusResponse updateModelWorkers(
String modelName,
String modelVersion,
int minWorkers,
int maxWorkers,
boolean synchronous,
boolean isInit,
final Function<Void, Void> onError)
throws ModelVersionNotFoundException, ModelNotFoundException, ExecutionException,
InterruptedException {
ModelManager modelManager = ModelManager.getInstance();
if (maxWorkers < minWorkers) {
throw new BadRequestException("max_worker cannot be less than min_worker.");
}
if (!modelManager.getDefaultModels().containsKey(modelName)) {
throw new ModelNotFoundException("Model not found: " + modelName);
}
CompletableFuture<Integer> future =
modelManager.updateModel(modelName, modelVersion, minWorkers, maxWorkers);
StatusResponse statusResponse = new StatusResponse();
if (!synchronous) {
return new StatusResponse(
"Processing worker updates...", HttpURLConnection.HTTP_ACCEPTED);
}
CompletableFuture<StatusResponse> statusResponseCompletableFuture =
future.thenApply(
v -> {
boolean status =
modelManager.scaleRequestStatus(
modelName, modelVersion);
if (HttpURLConnection.HTTP_OK == v) {
if (status) {
String msg =
"Workers scaled to "
+ minWorkers
+ " for model: "
+ modelName;
if (modelVersion != null) {
msg += ", version: " + modelVersion; // NOPMD
}
if (isInit) {
msg =
"Model \""
+ modelName
+ "\" Version: "
+ modelVersion
+ " registered with "
+ minWorkers
+ " initial workers";
}
statusResponse.setStatus(msg);
statusResponse.setHttpResponseCode(v);
} else {
statusResponse.setStatus(
"Workers scaling in progress...");
statusResponse.setHttpResponseCode(
HttpURLConnection.HTTP_PARTIAL);
}
} else {
statusResponse.setHttpResponseCode(v);
String msg =
"Failed to start workers for model "
+ modelName
+ " version: "
+ modelVersion;
statusResponse.setStatus(msg);
statusResponse.setE(new InternalServerException(msg));
if (onError != null) {
onError.apply(null);
}
}
return statusResponse;
})
.exceptionally(
(e) -> {
if (onError != null) {
onError.apply(null);
}
statusResponse.setStatus(e.getMessage());
statusResponse.setHttpResponseCode(
HttpURLConnection.HTTP_INTERNAL_ERROR);
statusResponse.setE(e);
return statusResponse;
});
return statusResponseCompletableFuture.get();
}