private getDataExportJob()

in source/cdk-infrastructure/lib/back-end/data-analysis/data-analysis-construct.ts [370:444]


  private getDataExportJob(): GlueJob {
    const scriptFileName = 'etl-data-export.py';

    const glueJobRole = new Role(this, 'AvaEtlDataExportJobRole', {
      assumedBy: new ServicePrincipal('glue.amazonaws.com'),
      managedPolicies: [ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')],
      inlinePolicies: {
        'DDBPolicy': new PolicyDocument({
          statements: [
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['dynamodb:Scan', 'dynamodb:DescribeTable'],
              resources: [this.ddbIssuesTable.tableArn, this.ddbDataHierarchyTable.tableArn]
            })
          ]
        }),
        'S3Policy': new PolicyDocument({
          statements: [
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['s3:GetObject'],
              resources: [this.sourceCodeBucket.arnForObjects(`${this.sourceCodeKeyPrefix}/glue-job-scripts/${scriptFileName}`)]
            }),
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['s3:PutObject', 's3:DeleteObject', 's3:GetObject'],
              resources: [this.glueOutputBucket.arnForObjects('*')]
            }),
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['s3:GetBucketLocation', 's3:ListBucket', 's3:GetBucketAcl', 's3:CreateBucket'],
              resources: [this.glueOutputBucket.bucketArn]
            }),
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['s3:ListAllMyBuckets'],
              resources: ['*']
            })
          ]
        })
      }
    });

    (glueJobRole.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
    addCfnSuppressRules(glueJobRole, [{ id: 'W11', reason: '* is required for the s3:ListAllMyBuckets permission' }]);

    return new GlueJob(this, 'AvaEtlDataExportJob', {
      role: glueJobRole.roleArn,
      name: 'amazon-virtual-andon-etl-data-export',
      command: {
        name: 'glueetl',
        pythonVersion: '3',
        scriptLocation: `s3://${this.sourceCodeBucket.bucketName}/${this.sourceCodeKeyPrefix}/glue-job-scripts/${scriptFileName}`
      },
      executionProperty: { maxConcurrentRuns: 2 },
      timeout: 60,  // 1 hour
      glueVersion: '2.0',
      numberOfWorkers: 2,
      workerType: 'Standard',
      defaultArguments: {
        '--job-language': 'python',
        '--job-bookmark-option': 'job-bookmark-enable',
        '--enable-metrics': '',
        '--ddb_issues_table_name': this.ddbIssuesTable.tableName,
        '--ddb_data_hierarchy_table_name': this.ddbDataHierarchyTable.tableName,
        '--glue_issues_table_name': this.glueIssuesTableName,
        '--glue_data_hierarchy_table_name': this.glueDataHierarchyTableName,
        '--glue_db_name': this.glueDatabaseName,
        '--glue_output_bucket': this.glueOutputBucket.bucketName,
        '--region': Aws.REGION,
        '--solution_id': this.solutionId,
        '--solution_version': this.solutionVersion
      }
    });
  }