in airflow_dag_for_execution/simple_dag.py [0:0]
def create_cluster():
emr = boto3.client('emr', region_name=<FILL_IN_REGION, e.g. 'us-west-2'>)
cluster = emr.run_job_flow(
Name='Demo-Cluster',
ReleaseLabel='emr-6.2.0',
Applications=[{'Name': 'Spark'}, {'Name': 'Livy'}, {'Name': 'JupyterEnterpriseGateway'}],
VisibleToAllUsers=True,
Instances={
'InstanceGroups': [
{
'Name': "Master nodes",
'Market': 'ON_DEMAND',
'InstanceRole': 'MASTER',
'InstanceType': 'm5.xlarge',
'InstanceCount': 1,
}
],
'KeepJobFlowAliveWhenNoSteps': True,
'TerminationProtected': False,
'Ec2SubnetId': '<FILL_IN_SUBNET_ID, e.g subnet-123456>',
},
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
)
cluster_id = cluster['JobFlowId']
print("Created an cluster: " + cluster_id)
return cluster_id