def handler()

in autopilot/mlops/timeseries/aws-automl-ts-cdk/lambda/create-transform-job/index.py [0:0]


def handler(event, context):
    print(event)
    
    resource_bucket = os.environ['RESOURCE_BUCKET']
    transform_model = event['BestCandidate']['CandidateName']
    transform_job_name = transform_model + '-job'
    transform_job_config_file_key = 'config/batch_transform_job_config.json'
    
    # Get Transform Job Config file from S3
    transform_job_config_s3 = s3.get_object(Bucket=resource_bucket, Key=transform_job_config_file_key)
    
    transform_job_config_body = transform_job_config_s3['Body'].read().decode('utf-8')
    
    transform_job_config = json.loads(transform_job_config_body)
    
    # Configure the Transform Job variables from Config file
    # Input Location - key to the specific file
    input_location_key = transform_job_config['InputLocationKey']
    input_location = f's3://{resource_bucket}/{input_location_key}'
    
    # Output location - prefix only
    output_location_prefix = transform_job_config['OutputLocationPrefix']
    output_location = f's3://{resource_bucket}/{output_location_prefix}/'
    
    instance_type = transform_job_config['InstanceType']
    instance_count = transform_job_config['InstanceCount']
    max_payload_mb = transform_job_config['MaxPayloadInMB']
    
    # Start the SageMaker Batch Transform Job
    sm_response = sagemaker.create_transform_job(
        TransformJobName=transform_job_name, 
        ModelName=transform_model,
        TransformInput={
            'DataSource': {
                'S3DataSource': {
                    'S3DataType': 'S3Prefix',
                    'S3Uri': input_location
                }
            },
            'ContentType': 'text/csv' 
        },
        TransformOutput={
            'S3OutputPath': output_location,
            'Accept': 'text/csv'
        },
        TransformResources={
            'InstanceType': instance_type,
            'InstanceCount': instance_count
        },
        MaxPayloadInMB=max_payload_mb
    )
    
    response = {
        'response': sm_response,
        'TransformJobName': transform_job_name,
        'ModelName': transform_model,
        'InputLocation': input_location,
        'OutputLocation': output_location
    }
    
    return response