in ezsmdeploy/__init__.py [0:0]
def autoscale_endpoint(self):
response = boto3.client("sagemaker").describe_endpoint(
EndpointName=self.endpoint_name
)
in1 = response["EndpointName"]
in2 = response["ProductionVariants"][0]["VariantName"]
client = boto3.client("application-autoscaling")
response = client.register_scalable_target(
ServiceNamespace="sagemaker",
ResourceId="endpoint/{}/variant/{}".format(in1, in2),
ScalableDimension="sagemaker:variant:DesiredInstanceCount",
MinCapacity=1,
MaxCapacity=10,
)
response = client.put_scaling_policy(
PolicyName="scaling-policy-{}".format(self.name),
ServiceNamespace="sagemaker",
ResourceId="endpoint/{}/variant/{}".format(in1, in2),
ScalableDimension="sagemaker:variant:DesiredInstanceCount",
PolicyType="TargetTrackingScaling",
TargetTrackingScalingPolicyConfiguration={
"TargetValue": self.autoscaletarget,
"PredefinedMetricSpecification": {
"PredefinedMetricType": "SageMakerVariantInvocationsPerInstance",
},
"ScaleOutCooldown": 600,
"ScaleInCooldown": 600,
"DisableScaleIn": False,
},
)
self.scalingresponse = response