def lambda_handler()

in sdlf-utils/ingestion-examples/sqoop/sdlf-stageX/lambda/stage-x-create-emr-cluster/src/lambda_function.py [0:0]


def lambda_handler(event, context):
    try:
        kms_data_key = os.environ['KMS_DATA_KEY']
        emr_release = os.environ['EMR_RELEASE']
        emr_ec2_role = os.environ['EMR_EC2_ROLE']
        emr_role = os.environ['EMR_ROLE']
        subnet = os.environ['SUBNET_ID']
        logger.info(f'Evento: {event}')
        message = event  # ['body']
        team = message['team']
        env = message['env']
        pipeline = message['pipeline']
        cluster_name = event['clusterName']
        ssmresponse = ssm_client.get_parameter(
                Name='/SDLF/S3/ArtifactsBucket'
            )
        artifacts_bucket =  ssmresponse['Parameter']['Value']
        ssmresponse = ssm_client.get_parameter(
                Name='/SDLF/S3/CloudTrailBucket'
            )
        cloudtrail_bucket = ssmresponse['Parameter']['Value']
        response = emr_client.run_job_flow(
            Name=cluster_name,
            LogUri=f"s3://{cloudtrail_bucket}/{team}/{team}-{pipeline}-emr-sqoop-logs-x/",
            # LogEncryptionKmsKeyId=kms_data_key,
            ReleaseLabel=emr_release,
            VisibleToAllUsers=True,
            JobFlowRole=emr_ec2_role,
            ServiceRole=emr_role,
            Instances={
                'InstanceGroups': build_instace_groups(),
                'KeepJobFlowAliveWhenNoSteps': True,
                'TerminationProtected': False,
                'Ec2SubnetId': subnet
            },
            BootstrapActions=[
                {
                    'Name': 'Bootstrap scripts',
                    'ScriptBootstrapAction': {
                        'Path': f's3://{artifacts_bucket}/emr-sqoop-bootstrap/install_libs.sh',
                        # 'Args': [artifacts_bucket, team]
                        'Args': [artifacts_bucket, 'emr-sqoop-bootstrap']
                    }
                },
                {
                    'Name': 'Install Terminate Cluster Action',
                    'ScriptBootstrapAction': {
                        'Path': f's3://{artifacts_bucket}/emr-sqoop-bootstrap/manage_emr_sqoop_shutdown_install.sh',
                    }
                }
            ],
            Applications=[
                {
                    'Name': 'Hadoop'
                }, {
                    'Name': 'Hive'
                }, {
                    'Name': 'Sqoop'
                }
            ],
            Configurations=[
                {
                    "Classification": "hive-site",
                    "Properties": {
                        "hive.metastore.client.factory.class":
                            "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                    },
                    "Configurations": [
                    ]
                },
                {
                    "Classification": "presto-connector-hive",
                    "Properties": {
                        "hive.metastore.glue.datacatalog.enabled": "true"
                    },
                    "Configurations": [
                    ]
                }
            ],
            Tags=[
                {
                    'Key': 'Role',
                    'Value': 'EMR Data Lake'
                },
                {
                    'Key': 'Environment',
                    'Value': env
                }
            ],
            SecurityConfiguration=f'sdlf-{team}-emr-security-config',
            StepConcurrencyLevel=3)

    except Exception as e:
        logger.error("Fatal error", exc_info=True)
        raise e
    return response['JobFlowId']