in common/sagemaker_rl/orchestrator/workflow/manager/experiment_manager.py [0:0]
def _update_model_in_endpoint(self, soft_deploy, model_id, wait=True):
"""Update the model hosted in an existing endpoint
Args:
soft_deploy (bool): Whether to update the model hosted by the
endpoint with soft deployment support
model_id (str): A unique string representing the new model
to deploy/update
"""
# update 'next_model_to_host_id' and 'hosting_state'
self.exp_db_client.update_experiment_next_model_to_host_id(self.experiment_id, model_id)
self.exp_db_client.update_experiment_hosting_state(self.experiment_id, HostingState.PENDING)
# soft deployment will happen once the 'next_model_host_id' is persisted into ExperimentDB
if not soft_deploy:
update_endpoint = True
environ_vars = self._get_hosting_environ_vars(model_id)
if not self.local_mode:
# do SageMaker blue-green deployment
stream_name = self.experiment_id
self.resource_manager.create_firehose_stream_if_not_exists(
stream_name, self.experiment_id
)
environ_vars["FIREHOSE_STREAM"] = stream_name
else:
# close the current container and re-deploy
update_endpoint = False
self.sagemaker_session.delete_endpoint_config(self.experiment_id)
self.sagemaker_session.delete_endpoint(self.experiment_id)
present, closed = self._close_existing_containers()
if present:
if closed:
logger.info(
"Closed docker container[s] that was already running (maybe from previous job)"
)
else:
logger.exception(
"Failed to close a docker container that was already running (maybe from "
"previous job). Please close it manually and retry."
)
model_record = self.model_db_client.get_model_record(self.experiment_id, model_id)
sagemaker_model = sagemaker.model.Model(
image_uri=self.image,
role=self.resource_manager.iam_role_arn,
name=model_id,
model_data=model_record["s3_model_output_path"],
sagemaker_session=self.sagemaker_session,
env=environ_vars,
)
hosting_instance_count = self.resource_manager.hosting_fleet_config.get(
"instance_count", 1
)
hosting_instance_type = self.resource_manager.hosting_fleet_config.get(
"instance_type", "local"
)
try:
sagemaker_model.deploy(
initial_instance_count=hosting_instance_count,
instance_type=hosting_instance_type,
endpoint_name=self.experiment_id,
wait=wait,
)
except Exception as e:
logger.error(e)
pass