in cdk/lib/emr-infrastructure.ts [14:77]
constructor(scope: cdk.Construct, id: string, props: EmrProps) {
super(scope, id);
const sg = new ec2.SecurityGroup(this, 'SecurityGroup', {
vpc: props.vpc
});
sg.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(22));
const role = new iam.Role(this, 'ReplayRole', {
assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'),
managedPolicies: [
iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonElasticMapReduceforEC2Role')
]
});
const profile = new iam.CfnInstanceProfile(this, 'InstanceProfile', {
roles: [
role.roleName
]
});
const cluster = new emr.CfnCluster(this, 'EmrCluster', {
name: 'Beam',
applications: [
{ name: 'Hadoop' },
{ name: 'Ganglia' },
{ name: 'Flink' },
{ name: 'ZooKeeper'}
],
instances: {
masterInstanceGroup: {
instanceCount: 1,
instanceType: 'c5.xlarge',
name: 'Master'
},
coreInstanceGroup: {
instanceCount: 2,
instanceType: 'r5.xlarge',
name: 'Core'
},
ec2KeyName: props.keyName,
additionalMasterSecurityGroups: [
sg.securityGroupName
],
ec2SubnetId: props.vpc.publicSubnets[0].subnetId
},
serviceRole : 'EMR_DefaultRole',
releaseLabel: 'emr-6.3.0',
visibleToAllUsers: true,
jobFlowRole: profile.ref,
configurations: [
{
classification: 'emrfs-site',
configurationProperties: {
"fs.s3.maxConnections": "1000"
}
}
]
});
new cdk.CfnOutput(this, 'SshEmrCluster', { value: `ssh -C -D 8157 hadoop@${cluster.attrMasterPublicDns}` });
new cdk.CfnOutput(this, 'StartFlinkRuntime', { value: 'flink-yarn-session -n 2 -s 4 -tm 16GB -d' });
}