constructor()

in source/cdk-infrastructure/lib/back-end/data-analysis/data-analysis-construct.ts [42:309]


  constructor(scope: Construct, id: string, props: DataAnalysisProps) {
    super(scope, id);

    this.solutionId = props.solutionId;
    this.solutionVersion = props.solutionVersion;
    this.sourceCodeBucket = Bucket.fromBucketName(this, 'sourceCodeBucket', props.sourceCodeBucketName);
    this.sourceCodeKeyPrefix = props.sourceCodeKeyPrefix;
    this.ddbIssuesTable = props.issuesTable;
    this.ddbDataHierarchyTable = props.dataHierarchyTable;

    this.startGlueWorkflow = new CfnParameter(this, 'StartGlueWorkflow', {
      type: 'String',
      description: 'Do you want to perform the Glue Workflow that will extract Amazon Virtual Andon\'s DynamoDB data to S3 for analysis with Athena? If set to \'Yes\', the process will run every Monday at 1am UTC by default',
      allowedValues: ['Yes', 'No'],
      default: 'No'
    });
    this.startGlueWorkflow.overrideLogicalId('StartGlueWorkflow');

    this.glueWorkflowCondition = new CfnCondition(this, 'GlueWorkflowCondition', {
      expression: Fn.conditionEquals(this.startGlueWorkflow.valueAsString, 'Yes')
    });

    this.glueOutputBucket = new Bucket(this, 'AvaGlueOutputBucket', {
      removalPolicy: RemovalPolicy.RETAIN,
      encryption: BucketEncryption.S3_MANAGED,
      serverAccessLogsBucket: props.logsBucket,
      serverAccessLogsPrefix: 'glue-output/',
      blockPublicAccess: {
        blockPublicAcls: true,
        blockPublicPolicy: true,
        ignorePublicAcls: true,
        restrictPublicBuckets: true
      }
    });

    (this.glueOutputBucket.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
    (this.glueOutputBucket.node.defaultChild as CfnResource).overrideLogicalId('AvaGlueOutputBucket');

    this.glueOutputBucket.addToResourcePolicy(new PolicyStatement({
      effect: Effect.DENY,
      actions: ['s3:*'],
      resources: [this.glueOutputBucket.bucketArn, this.glueOutputBucket.arnForObjects('*')],
      conditions: {
        'Bool': { 'aws:SecureTransport': 'False' }
      },
      principals: [new AnyPrincipal()]
    }));

    (this.glueOutputBucket.policy!.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;

    const glueDatabase = new GlueDatabase(this, 'AvaGlueDatabase', { databaseName: 'amazon-virtual-andon-glue-database' });
    (glueDatabase.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
    (glueDatabase.node.defaultChild as CfnResource).overrideLogicalId('AvaGlueDatabase');
    this.glueDatabaseName = glueDatabase.databaseName;

    const glueIssuesTable = new GlueTable(this, 'AvaGlueIssuesTable', {
      catalogId: Aws.ACCOUNT_ID,
      databaseName: glueDatabase.databaseName,
      tableInput: {
        storageDescriptor: {
          columns: [
            { name: 'eventid', type: 'string' },
            { name: 'acknowledged', type: 'string' },
            { name: 'created', type: 'string' },
            { name: 'sitename', type: 'string' },
            { name: 'issuesource', type: 'string' },
            { name: 'priority', type: 'string' },
            { name: 'areaname#status#processname#eventdescription#stationname#devicename#created', type: 'string' },
            { name: 'version', type: 'bigint' },
            { name: 'devicename', type: 'string' },
            { name: 'devicename#eventid', type: 'string' },
            { name: 'createdat', type: 'string' },
            { name: 'areaname', type: 'string' },
            { name: 'processname', type: 'string' },
            { name: 'createddateutc', type: 'date' },
            { name: 'eventdescription', type: 'string' },
            { name: 'areaname#status#processname#stationname#devicename#created', type: 'string' },
            { name: 'stationname', type: 'string' },
            { name: 'id', type: 'string' },
            { name: 'acknowledgedtime', type: 'bigint' },
            { name: 'status', type: 'string' },
            { name: 'updatedat', type: 'string' },
            { name: 'closed', type: 'string' },
            { name: 'resolutiontime', type: 'bigint' },
            { name: 'createdby', type: 'string' },
            { name: 'acknowledgedby', type: 'string' },
            { name: 'closedby', type: 'string' },
            { name: 'rejectedby', type: 'string' },
            { name: 'additionaldetails', type: 'string' }
          ],
          location: `s3://${this.glueOutputBucket.bucketName}/${this.glueOutputObjectPrefix}/issues`,
          inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
          outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
          compressed: false,
          numberOfBuckets: -1,
          serdeInfo: {
            serializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
            parameters: { 'serialization.format': '1' }
          },
          storedAsSubDirectories: false
        },
        tableType: 'EXTERNAL_TABLE',
        parameters: {
          EXTERNAL: 'TRUE',
          has_encrypted_data: 'false',
          classification: 'parquet'
        }
      }
    });
    this.glueIssuesTableName = glueIssuesTable.ref;
    glueIssuesTable.overrideLogicalId('AvaGlueIssuesTable');
    glueIssuesTable.cfnOptions.condition = this.glueWorkflowCondition;

    const glueDataHierarchyTable = new GlueTable(this, 'AvaGlueDataHierarchyTable', {
      catalogId: Aws.ACCOUNT_ID,
      databaseName: glueDatabase.databaseName,
      tableInput: {
        storageDescriptor: {
          columns: [
            { name: 'protocol', type: 'string' },
            { name: 'endpoint', type: 'string' },
            { name: 'filterpolicy', type: 'string' },
            { name: 'id', type: 'string' },
            { name: 'type', type: 'string' },
            { name: 'subscriptionarn', type: 'string' },
            { name: 'stationareaid', type: 'string' },
            { name: 'createdat', type: 'string' },
            { name: 'name', type: 'string' },
            { name: 'description', type: 'string' },
            { name: 'version', type: 'bigint' },
            { name: 'parentid', type: 'string' },
            { name: 'updatedat', type: 'string' },
            { name: 'processareaid', type: 'string' },
            { name: 'eventprocessid', type: 'string' },
            { name: 'eventtype', type: 'string' },
            { name: 'priority', type: 'string' },
            { name: 'rootcauses', type: 'string' },
            { name: 'sms', type: 'string' },
            { name: 'eventimgkey', type: 'string' },
            { name: 'email', type: 'string' },
            { name: 'devicestationid', type: 'string' },
            { name: 'areasiteid', type: 'string' },
            { name: 'alias', type: 'string' }
          ],
          location: `s3://${this.glueOutputBucket.bucketName}/${this.glueOutputObjectPrefix}/data-hierarchy`,
          inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
          outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
          compressed: false,
          numberOfBuckets: -1,
          serdeInfo: {
            serializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
            parameters: { 'serialization.format': '1' }
          },
          storedAsSubDirectories: false
        },
        tableType: 'EXTERNAL_TABLE',
        parameters: {
          EXTERNAL: 'TRUE',
          has_encrypted_data: 'false',
          classification: 'parquet'
        }
      }
    });
    this.glueDataHierarchyTableName = glueDataHierarchyTable.ref;
    glueDataHierarchyTable.overrideLogicalId('AvaGlueDataHierarchyTable');
    glueDataHierarchyTable.cfnOptions.condition = this.glueWorkflowCondition;

    const crawlerRole = new Role(this, 'CrawlerRole', {
      assumedBy: new ServicePrincipal('glue.amazonaws.com'),
      managedPolicies: [ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')],
      inlinePolicies: {
        'DDBPolicy': new PolicyDocument({
          statements: [
            new PolicyStatement({
              effect: Effect.ALLOW,
              actions: ['dynamodb:Scan', 'dynamodb:DescribeTable'],
              resources: [`${props.issuesTable.tableArn}*`, `${props.dataHierarchyTable.tableArn}*`]
            })
          ]
        })
      }
    });
    (crawlerRole.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;

    const crawler = new GlueCrawler(this, 'Crawler', {
      name: 'amazon-virtual-andon-crawler',
      role: crawlerRole.roleArn,
      databaseName: glueDatabase.databaseName,
      targets: {
        dynamoDbTargets: [
          { path: props.issuesTable.tableName },
          { path: props.dataHierarchyTable.tableName }
        ]
      }
    });
    crawler.cfnOptions.condition = this.glueWorkflowCondition;

    const cleanupJob = this.getCleanupJob();
    cleanupJob.cfnOptions.condition = this.glueWorkflowCondition;

    const workflow = new GlueWorkflow(this, 'AvaEtlWorkflow', {
      description: `Workflow for ${Aws.STACK_NAME} CloudFormation stack`
    });
    workflow.cfnOptions.condition = this.glueWorkflowCondition;

    const etlCleanupJobTrigger = new GlueTrigger(this, 'EtlCleanupJobTrigger', {
      name: `${Aws.STACK_NAME}-EtlCleanupJobTrigger`,
      description: 'Starts the first job (cleanup job) in the ETL workflow. This job will clean any data from S3 that resulted from a prior ETL workflow',
      actions: [{ jobName: cleanupJob.ref }],
      type: 'SCHEDULED',
      schedule: Schedule.cron({ weekDay: 'MON', hour: '00', minute: '00' }).expressionString,
      startOnCreation: true,
      workflowName: workflow.ref
    });

    etlCleanupJobTrigger.cfnOptions.condition = this.glueWorkflowCondition;

    new GlueTrigger(this, 'EtlCrawlerTrigger', {  // NOSONAR: typescript:S1848
      name: `${Aws.STACK_NAME}-EtlCrawlerTrigger`,
      description: 'Crawls the DynamoDB table to update the Glue Data Catalog',
      actions: [{ crawlerName: crawler.ref }],
      type: 'CONDITIONAL',
      predicate: {
        conditions: [{
          jobName: cleanupJob.ref,
          logicalOperator: 'EQUALS',
          state: 'SUCCEEDED'
        }]
      },
      startOnCreation: true,
      workflowName: workflow.ref
    }).cfnOptions.condition = this.glueWorkflowCondition;

    const dataExportJob = this.getDataExportJob();
    dataExportJob.cfnOptions.condition = this.glueWorkflowCondition;

    new GlueTrigger(this, 'EtlIssuesDataExportJobTrigger', {  // NOSONAR: typescript:S1848
      name: `${Aws.STACK_NAME}-EtlIssuesDataExportJobTrigger`,
      description: 'Runs ETL for the Issues table to S3',
      actions: [{ jobName: dataExportJob.ref, arguments: { '--job_type': 'issues' } }],
      type: 'CONDITIONAL',
      predicate: {
        conditions: [{
          crawlerName: crawler.ref,
          logicalOperator: 'EQUALS',
          crawlState: 'SUCCEEDED'
        }]
      },
      startOnCreation: true,
      workflowName: workflow.ref
    }).cfnOptions.condition = this.glueWorkflowCondition;

    new GlueTrigger(this, 'EtlDataHierarchyDataExportJobTrigger', { // NOSONAR: typescript:S1848
      name: `${Aws.STACK_NAME}-EtlDataHierarchyDataExportJobTrigger`,
      description: 'Runs ETL for the Data Hierarchy table to S3',
      actions: [{ jobName: dataExportJob.ref, arguments: { '--job_type': 'hierarchy' } }],
      type: 'CONDITIONAL',
      predicate: {
        conditions: [{
          crawlerName: crawler.ref,
          logicalOperator: 'EQUALS',
          crawlState: 'SUCCEEDED'
        }]
      },
      startOnCreation: true,
      workflowName: workflow.ref
    }).cfnOptions.condition = this.glueWorkflowCondition;
  }