constructor()

in source/lib/cdk-solution-stack.ts [126:400]


  constructor(scope: cdk.Construct, id: string, props?: cdk.StackProps) {
    super(scope, id, props);

    assertenv('BUCKET_NAME', 'SOLUTION_NAME', 'VERSION');

    const cfnParam = (id_: string, props_?: CfnParameterProps): CfnParameter => new CfnParameter(this, id_, props_);
    const { BUCKET_NAME, SOLUTION_NAME, VERSION } = process.env;

    const juiceFSAccessTokenParam = cfnParam('JuiceFSAccessToken', {
      type: 'String',
      description: 'The access token of JuiceFS volume',
      noEcho: true,
      minLength: 30,
    });
    const juiceFSVolumeNameParam = cfnParam('JuiceFSVolumeName', {
      type: 'String',
      description: 'The volume name of JuiceFS',
      minLength: 1,
    });
    const juiceFSCacheDirParam = cfnParam('JuiceFSCacheDir', {
      type: 'String',
      description: 'The cache directory of JuiceFS Java Client, you could specify more than one folder, seperate by colon, or use wildcards *',
      minLength: 3,
      default: '/mnt*/jfs',
    });
    const juiceFSCacheSizeParam = cfnParam('JuiceFSCacheSize', {
      type: 'Number',
      description: 'Cache capacity (unit MB), if multiple diectories are configured, this is total capacity for all cache folders',
      minValue: 0,
      default: '10240',
    });
    const juiceFSCacheFullBlock = cfnParam('JuiceFSCacheFullBlock', {
      type: 'String',
      description: 'Whether to cache sequential read data, set to false when disk space is limited or disk performance is low',
      allowedValues: ['true', 'false'],
      default: 'false',
    });
    const clusterNameParam = cfnParam('EMRClusterName', {
      type: 'String',
      description: 'The cluster name',
      minLength: 5,
      default: 'EMRwithJuiceFS',
    });
    const masterInstanceTypeParam = cfnParam('MasterInstanceType', {
      type: 'String',
      description: 'Instance type to be used for the master instance',
      allowedValues: utils.SupportedInstanceTypes,
      default: 'm5.xlarge',
    });
    const coreInstanceTypeParam = cfnParam('CoreInstanceType', {
      type: 'String',
      description: 'Instance type to be used for the core instances',
      allowedValues: utils.SupportedInstanceTypes,
      default: 'm5.xlarge',
    });
    const numberOfCoreInstancesParam = cfnParam('NumberOfCoreInstances', {
      type: 'Number',
      description: 'Number of core instances',
      default: 3,
    });
    // const enableS3ConsistentView = cfnParam('EnableS3ConsistentView', {
    //   type: 'String',
    //   description: 'Whether to enable s3 consistent view',
    //   allowedValues: ['true', 'false'],
    //   default: 'false',
    // });

    this.templateOptions.description = `(SO8008) - Amazon EMR with JuiceFS version ${VERSION}`;
    this.templateOptions.metadata = {
      'AWS::CloudFormation::Interface': {
        ParameterGroups: [
          paramGroup('Cluster Settings', [
            clusterNameParam,
            masterInstanceTypeParam,
            coreInstanceTypeParam,
            numberOfCoreInstancesParam,
          ]),
          paramGroup('JuiceFS Configurations', [
            juiceFSAccessTokenParam,
            juiceFSVolumeNameParam,
            juiceFSCacheDirParam,
            juiceFSCacheSizeParam,
            juiceFSCacheFullBlock,
          ]),
        ],
      },
    };

    const bucketName = `juicefs-${juiceFSVolumeNameParam.valueAsString}`;
    const logBucket = new s3.Bucket(this, 'LogBucket');

    const vpc = new ec2.Vpc(this, 'Vpc', {
      cidr: '10.0.0.0/21',
      natGateways: 1,
      maxAzs: 1,
      subnetConfiguration: [
        {
          subnetType: ec2.SubnetType.PUBLIC,
          name: 'Ingress',
          cidrMask: 24,
        },
        {
          subnetType: ec2.SubnetType.PRIVATE,
          name: 'Application',
          cidrMask: 24,
        },
      ],
      gatewayEndpoints: {
        S3: { service: ec2.GatewayVpcEndpointAwsService.S3 },
      },
    });

    const pubSubnet = vpc.publicSubnets[0];
    const privSubnet = vpc.privateSubnets[0];
    const natgw = pubSubnet.node.findChild('NATGateway') as CfnNatGateway;

    const masterBootScript = `#!/bin/bash -xe
      exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
      set -xe
      curl -fsSL https://s.juicefs.com/static/emr-boot.sh -o /tmp/emr-boot.sh && bash /tmp/emr-boot.sh --cache-dir "${juiceFSCacheDirParam.valueAsString}"
      curl -fsSL https://juicefs.com/static/juicefs -o /usr/bin/juicefs && chmod +x /usr/bin/juicefs
      echo "export JFS_VOL=${juiceFSVolumeNameParam.valueAsString}" > /etc/profile.d/jfs.sh
      echo "export AWS_DEFAULT_REGION=${Aws.REGION}" >> /etc/profile.d/jfs.sh
    `;
    const coreBootScript = `#!/bin/bash -xe
      exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
      set -xe
      curl -fsSL https://s.juicefs.com/static/emr-boot.sh -o /tmp/emr-boot.sh && bash /tmp/emr-boot.sh --cache-dir "${juiceFSCacheDirParam.valueAsString}"
    `;

    // https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html#emr-sg-elasticmapreduce-master-private
    const securityGroupOfMaster = new ec2.SecurityGroup(this, 'SG-EMR-Master', { vpc });
    const securityGroupOfSlave = new ec2.SecurityGroup(this, 'SG-EMR-Slave', { vpc });
    const securityGroupOfServiceAccess = new ec2.SecurityGroup(this, 'SG-EMR-ServiceAccess', { vpc });

    securityGroupOfMaster.addIngressRule(securityGroupOfServiceAccess, ec2.Port.tcp(8443));
    securityGroupOfMaster.addIngressRule(securityGroupOfMaster, ec2.Port.tcpRange(0, 65535));
    securityGroupOfMaster.addIngressRule(securityGroupOfSlave, ec2.Port.tcpRange(0, 65535));
    securityGroupOfMaster.addIngressRule(securityGroupOfMaster, ec2.Port.udpRange(0, 65535));
    securityGroupOfMaster.addIngressRule(securityGroupOfSlave, ec2.Port.udpRange(0, 65535));
    securityGroupOfMaster.addIngressRule(securityGroupOfMaster, ec2.Port.allIcmp());
    securityGroupOfMaster.addIngressRule(securityGroupOfSlave, ec2.Port.allIcmp());

    securityGroupOfSlave.addIngressRule(securityGroupOfServiceAccess, ec2.Port.tcp(8443));
    securityGroupOfSlave.addIngressRule(securityGroupOfMaster, ec2.Port.tcpRange(0, 65535));
    securityGroupOfSlave.addIngressRule(securityGroupOfSlave, ec2.Port.tcpRange(0, 65535));
    securityGroupOfSlave.addIngressRule(securityGroupOfMaster, ec2.Port.udpRange(0, 65535));
    securityGroupOfSlave.addIngressRule(securityGroupOfSlave, ec2.Port.udpRange(0, 65535));
    securityGroupOfSlave.addIngressRule(securityGroupOfMaster, ec2.Port.allIcmp());
    securityGroupOfSlave.addIngressRule(securityGroupOfSlave, ec2.Port.allIcmp());

    securityGroupOfServiceAccess.addIngressRule(securityGroupOfMaster, ec2.Port.tcp(9443));
    securityGroupOfServiceAccess.addEgressRule(securityGroupOfMaster, ec2.Port.tcp(8443));
    securityGroupOfServiceAccess.addEgressRule(securityGroupOfSlave, ec2.Port.tcp(8443));

    const cluster = new EMRCluster(this, 'EMRwithJuiceFS', {
      name: clusterNameParam.valueAsString,
      logUri: `s3://${logBucket.bucketName}`,
      visibleToAllUsers: true,
      ec2SubnetId: privSubnet.subnetId,
      masterInstanceGroup: {
        name: 'Master',
        instanceCount: 1,
        instanceType: masterInstanceTypeParam.valueAsString,
        market: 'ON_DEMAND',
      },
      coreInstanceGroup: {
        name: 'Core',
        instanceCount: numberOfCoreInstancesParam.valueAsNumber,
        instanceType: coreInstanceTypeParam.valueAsString,
        market: 'ON_DEMAND',
      },

      emrManagedMasterSecurityGroup: securityGroupOfMaster.securityGroupId,
      emrManagedSlaveSecurityGroup: securityGroupOfSlave.securityGroupId,
      serviceAccessSecurityGroup: securityGroupOfServiceAccess.securityGroupId,

      applications: [{ name: 'Hive' }, { name: 'Spark' }, { name: 'Tez' }],
      bootstrapActions: [
        {
          name: 'BootJuiceFSOnMasterNodes',
          scriptBootstrapAction: {
            path: `s3://${Aws.REGION}.elasticmapreduce/bootstrap-actions/run-if`,
            args: [
              'instance.isMaster=true',
              'sudo', 'bash', '-c', masterBootScript,
            ],
          },
        },
        {
          name: 'BootJuiceFSOnCoreNodes',
          scriptBootstrapAction: {
            path: `s3://${Aws.REGION}.elasticmapreduce/bootstrap-actions/run-if`,
            args: [
              'instance.isMaster=false',
              'sudo', 'bash', '-c', coreBootScript,
            ],
          },
        },
        {
          name: 'InstallTPC-DSBenchmarkAssets',
          scriptBootstrapAction: {
            path: `s3://${Aws.REGION}.elasticmapreduce/bootstrap-actions/run-if`,
            args: [
              'instance.isMaster=true',
              'aws', 's3', 'cp', `s3://${BUCKET_NAME}/${SOLUTION_NAME}/${VERSION}/benchmark-sample.zip`, '/home/hadoop',
            ],
          },
        },
      ],
      configurations: [
        {
          classification: 'core-site',
          configurationProperties: {
            'fs.jfs.impl': 'com.juicefs.JuiceFileSystem',
            'fs.AbstractFileSystem.jfs.impl': 'com.juicefs.JuiceFS',
            'juicefs.free-space': '0.3',
            'juicefs.cache-size': juiceFSCacheSizeParam.valueAsString,
            'juicefs.cache-dir': juiceFSCacheDirParam.valueAsString,
            'juicefs.cache-group': 'yarn',
            'juicefs.cache-full-block': juiceFSCacheFullBlock.valueAsString,
            'juicefs.discover-nodes-url': 'yarn',
            'juicefs.token': juiceFSAccessTokenParam.valueAsString,
            'juicefs.access-log': '/tmp/juicefs.access.log',
          },
          configurations: [],
        },
        // {
        //   classification: 'emrfs-site',
        //   configurationProperties: {
        //     'fs.s3.consistent': enableS3ConsistentView.valueAsString,
        //     'fs.s3.consistent.metadata.read.capacity': '600',
        //     'fs.s3.consistent.metadata.write.capacity': '300',
        //   },
        // },
      ],
      jobFlowRoleProfile: new iam.CfnInstanceProfile(this, 'EMRClusterinstanceProfile', {
        roles: [
          new iam.Role(this, 'EMRClusterinstanceProfileRole', {
            assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'),
            managedPolicies: [
              iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonElasticMapReduceforEC2Role'),
              iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'),
            ],
            // The following policy may not be necessary because `service-role/AmazonElasticMapReduceforEC2Role`,
            // already have all the s3 permissions. It's for future extending purpose here.
            inlinePolicies: {
              JuiceFSS3Access: new iam.PolicyDocument({
                statements: [
                  new iam.PolicyStatement({
                    effect: iam.Effect.ALLOW,
                    actions: [
                      's3:PutObject',
                      's3:GetObject',
                      's3:DeleteObject',
                      's3:ListBucket',
                    ],
                    resources: [
                      `arn:${Aws.PARTITION}:s3:::${bucketName}/*`,
                      `arn:${Aws.PARTITION}:s3:::${bucketName}`,
                    ],
                  }),
                ],
              }),
            },
          }).roleName,
        ],
      }),
    });

    cluster.node.addDependency(natgw);

    new CfnOutput(this, 'ClusterID', { value: cluster.clusterId });
    new CfnOutput(this, 'LogBucketName', { value: logBucket.bucketName });
  }