def lambda_handler()

in lib/lambda-handler/hello_emr.py [0:0]


def lambda_handler(event, context):
    print(event)

    input_file_path = get_upload_file_path(event)
    dest_file_path = f's3://{dest_bucket_name}/{int(time.time())}'

    response = emr.run_job_flow(
        Name="SampleCluster",
        ReleaseLabel='emr-5.23.0',
        Applications=[
            {
                'Name': 'Hadoop'
            },
            {
                'Name': 'Hive'
            },
            {
                'Name': 'Spark'
            },
        ],
        Instances={
            'InstanceGroups': [
                {
                    'Name': 'Master nodes',
                    'Market': 'ON_DEMAND',
                    'InstanceRole': 'MASTER',
                    'InstanceType': 'r3.xlarge',
                    'InstanceCount': 1,
                },
                {
                    'Name': 'Core nodes',
                    'Market': 'ON_DEMAND',
                    'InstanceRole': 'CORE',
                    'InstanceType': 'r3.xlarge',
                    'InstanceCount': 4,
                    'EbsConfiguration': {
                        'EbsBlockDeviceConfigs': [
                            {
                                'VolumeSpecification': {
                                    'VolumeType': 'gp2',
                                    'SizeInGB': 100
                                },
                                'VolumesPerInstance': 1
                            },
                        ],
                        'EbsOptimized': False
                    },
                },
            ],
            'KeepJobFlowAliveWhenNoSteps': False,
            'TerminationProtected': False
            #'Ec2SubnetId': ec2SubnetId,
            #'EmrManagedMasterSecurityGroup': master_sg,
            #'EmrManagedSlaveSecurityGroup': slave_sg,
            #'ServiceAccessSecurityGroup': service_sg,
        },
        Steps=[
            {
                'Name': 'Spark application',
                'ActionOnFailure': 'CONTINUE',
                'HadoopJarStep': {
                    'Jar': 'command-runner.jar',
                    'Args': [
                    'spark-submit',
                    '--class','com.aws.sample.SparkSimpleJob',
                    job_file_path,
                    input_file_path,
                    dest_file_path
                    ]
                }
            }
        ],
        VisibleToAllUsers=True,
        JobFlowRole='EMR_EC2_DefaultRole',
        ServiceRole='EMR_DefaultRole',
    )

    # print(response)

    return {
        'statusCode': 200,
        'body': json.dumps(response)
    }