def autoscale_endpoint()

in ezsmdeploy/__init__.py [0:0]


    def autoscale_endpoint(self):
        response = boto3.client("sagemaker").describe_endpoint(
            EndpointName=self.endpoint_name
        )

        in1 = response["EndpointName"]
        in2 = response["ProductionVariants"][0]["VariantName"]

        client = boto3.client("application-autoscaling")
        response = client.register_scalable_target(
            ServiceNamespace="sagemaker",
            ResourceId="endpoint/{}/variant/{}".format(in1, in2),
            ScalableDimension="sagemaker:variant:DesiredInstanceCount",
            MinCapacity=1,
            MaxCapacity=10,
        )

        response = client.put_scaling_policy(
            PolicyName="scaling-policy-{}".format(self.name),
            ServiceNamespace="sagemaker",
            ResourceId="endpoint/{}/variant/{}".format(in1, in2),
            ScalableDimension="sagemaker:variant:DesiredInstanceCount",
            PolicyType="TargetTrackingScaling",
            TargetTrackingScalingPolicyConfiguration={
                "TargetValue": self.autoscaletarget,
                "PredefinedMetricSpecification": {
                    "PredefinedMetricType": "SageMakerVariantInvocationsPerInstance",
                },
                "ScaleOutCooldown": 600,
                "ScaleInCooldown": 600,
                "DisableScaleIn": False,
            },
        )

        self.scalingresponse = response