in source/cdk-infrastructure/lib/back-end/data-analysis/data-analysis-construct.ts [42:309]
constructor(scope: Construct, id: string, props: DataAnalysisProps) {
super(scope, id);
this.solutionId = props.solutionId;
this.solutionVersion = props.solutionVersion;
this.sourceCodeBucket = Bucket.fromBucketName(this, 'sourceCodeBucket', props.sourceCodeBucketName);
this.sourceCodeKeyPrefix = props.sourceCodeKeyPrefix;
this.ddbIssuesTable = props.issuesTable;
this.ddbDataHierarchyTable = props.dataHierarchyTable;
this.startGlueWorkflow = new CfnParameter(this, 'StartGlueWorkflow', {
type: 'String',
description: 'Do you want to perform the Glue Workflow that will extract Amazon Virtual Andon\'s DynamoDB data to S3 for analysis with Athena? If set to \'Yes\', the process will run every Monday at 1am UTC by default',
allowedValues: ['Yes', 'No'],
default: 'No'
});
this.startGlueWorkflow.overrideLogicalId('StartGlueWorkflow');
this.glueWorkflowCondition = new CfnCondition(this, 'GlueWorkflowCondition', {
expression: Fn.conditionEquals(this.startGlueWorkflow.valueAsString, 'Yes')
});
this.glueOutputBucket = new Bucket(this, 'AvaGlueOutputBucket', {
removalPolicy: RemovalPolicy.RETAIN,
encryption: BucketEncryption.S3_MANAGED,
serverAccessLogsBucket: props.logsBucket,
serverAccessLogsPrefix: 'glue-output/',
blockPublicAccess: {
blockPublicAcls: true,
blockPublicPolicy: true,
ignorePublicAcls: true,
restrictPublicBuckets: true
}
});
(this.glueOutputBucket.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
(this.glueOutputBucket.node.defaultChild as CfnResource).overrideLogicalId('AvaGlueOutputBucket');
this.glueOutputBucket.addToResourcePolicy(new PolicyStatement({
effect: Effect.DENY,
actions: ['s3:*'],
resources: [this.glueOutputBucket.bucketArn, this.glueOutputBucket.arnForObjects('*')],
conditions: {
'Bool': { 'aws:SecureTransport': 'False' }
},
principals: [new AnyPrincipal()]
}));
(this.glueOutputBucket.policy!.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
const glueDatabase = new GlueDatabase(this, 'AvaGlueDatabase', { databaseName: 'amazon-virtual-andon-glue-database' });
(glueDatabase.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
(glueDatabase.node.defaultChild as CfnResource).overrideLogicalId('AvaGlueDatabase');
this.glueDatabaseName = glueDatabase.databaseName;
const glueIssuesTable = new GlueTable(this, 'AvaGlueIssuesTable', {
catalogId: Aws.ACCOUNT_ID,
databaseName: glueDatabase.databaseName,
tableInput: {
storageDescriptor: {
columns: [
{ name: 'eventid', type: 'string' },
{ name: 'acknowledged', type: 'string' },
{ name: 'created', type: 'string' },
{ name: 'sitename', type: 'string' },
{ name: 'issuesource', type: 'string' },
{ name: 'priority', type: 'string' },
{ name: 'areaname#status#processname#eventdescription#stationname#devicename#created', type: 'string' },
{ name: 'version', type: 'bigint' },
{ name: 'devicename', type: 'string' },
{ name: 'devicename#eventid', type: 'string' },
{ name: 'createdat', type: 'string' },
{ name: 'areaname', type: 'string' },
{ name: 'processname', type: 'string' },
{ name: 'createddateutc', type: 'date' },
{ name: 'eventdescription', type: 'string' },
{ name: 'areaname#status#processname#stationname#devicename#created', type: 'string' },
{ name: 'stationname', type: 'string' },
{ name: 'id', type: 'string' },
{ name: 'acknowledgedtime', type: 'bigint' },
{ name: 'status', type: 'string' },
{ name: 'updatedat', type: 'string' },
{ name: 'closed', type: 'string' },
{ name: 'resolutiontime', type: 'bigint' },
{ name: 'createdby', type: 'string' },
{ name: 'acknowledgedby', type: 'string' },
{ name: 'closedby', type: 'string' },
{ name: 'rejectedby', type: 'string' },
{ name: 'additionaldetails', type: 'string' }
],
location: `s3://${this.glueOutputBucket.bucketName}/${this.glueOutputObjectPrefix}/issues`,
inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
compressed: false,
numberOfBuckets: -1,
serdeInfo: {
serializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
parameters: { 'serialization.format': '1' }
},
storedAsSubDirectories: false
},
tableType: 'EXTERNAL_TABLE',
parameters: {
EXTERNAL: 'TRUE',
has_encrypted_data: 'false',
classification: 'parquet'
}
}
});
this.glueIssuesTableName = glueIssuesTable.ref;
glueIssuesTable.overrideLogicalId('AvaGlueIssuesTable');
glueIssuesTable.cfnOptions.condition = this.glueWorkflowCondition;
const glueDataHierarchyTable = new GlueTable(this, 'AvaGlueDataHierarchyTable', {
catalogId: Aws.ACCOUNT_ID,
databaseName: glueDatabase.databaseName,
tableInput: {
storageDescriptor: {
columns: [
{ name: 'protocol', type: 'string' },
{ name: 'endpoint', type: 'string' },
{ name: 'filterpolicy', type: 'string' },
{ name: 'id', type: 'string' },
{ name: 'type', type: 'string' },
{ name: 'subscriptionarn', type: 'string' },
{ name: 'stationareaid', type: 'string' },
{ name: 'createdat', type: 'string' },
{ name: 'name', type: 'string' },
{ name: 'description', type: 'string' },
{ name: 'version', type: 'bigint' },
{ name: 'parentid', type: 'string' },
{ name: 'updatedat', type: 'string' },
{ name: 'processareaid', type: 'string' },
{ name: 'eventprocessid', type: 'string' },
{ name: 'eventtype', type: 'string' },
{ name: 'priority', type: 'string' },
{ name: 'rootcauses', type: 'string' },
{ name: 'sms', type: 'string' },
{ name: 'eventimgkey', type: 'string' },
{ name: 'email', type: 'string' },
{ name: 'devicestationid', type: 'string' },
{ name: 'areasiteid', type: 'string' },
{ name: 'alias', type: 'string' }
],
location: `s3://${this.glueOutputBucket.bucketName}/${this.glueOutputObjectPrefix}/data-hierarchy`,
inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
compressed: false,
numberOfBuckets: -1,
serdeInfo: {
serializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
parameters: { 'serialization.format': '1' }
},
storedAsSubDirectories: false
},
tableType: 'EXTERNAL_TABLE',
parameters: {
EXTERNAL: 'TRUE',
has_encrypted_data: 'false',
classification: 'parquet'
}
}
});
this.glueDataHierarchyTableName = glueDataHierarchyTable.ref;
glueDataHierarchyTable.overrideLogicalId('AvaGlueDataHierarchyTable');
glueDataHierarchyTable.cfnOptions.condition = this.glueWorkflowCondition;
const crawlerRole = new Role(this, 'CrawlerRole', {
assumedBy: new ServicePrincipal('glue.amazonaws.com'),
managedPolicies: [ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')],
inlinePolicies: {
'DDBPolicy': new PolicyDocument({
statements: [
new PolicyStatement({
effect: Effect.ALLOW,
actions: ['dynamodb:Scan', 'dynamodb:DescribeTable'],
resources: [`${props.issuesTable.tableArn}*`, `${props.dataHierarchyTable.tableArn}*`]
})
]
})
}
});
(crawlerRole.node.defaultChild as CfnResource).cfnOptions.condition = this.glueWorkflowCondition;
const crawler = new GlueCrawler(this, 'Crawler', {
name: 'amazon-virtual-andon-crawler',
role: crawlerRole.roleArn,
databaseName: glueDatabase.databaseName,
targets: {
dynamoDbTargets: [
{ path: props.issuesTable.tableName },
{ path: props.dataHierarchyTable.tableName }
]
}
});
crawler.cfnOptions.condition = this.glueWorkflowCondition;
const cleanupJob = this.getCleanupJob();
cleanupJob.cfnOptions.condition = this.glueWorkflowCondition;
const workflow = new GlueWorkflow(this, 'AvaEtlWorkflow', {
description: `Workflow for ${Aws.STACK_NAME} CloudFormation stack`
});
workflow.cfnOptions.condition = this.glueWorkflowCondition;
const etlCleanupJobTrigger = new GlueTrigger(this, 'EtlCleanupJobTrigger', {
name: `${Aws.STACK_NAME}-EtlCleanupJobTrigger`,
description: 'Starts the first job (cleanup job) in the ETL workflow. This job will clean any data from S3 that resulted from a prior ETL workflow',
actions: [{ jobName: cleanupJob.ref }],
type: 'SCHEDULED',
schedule: Schedule.cron({ weekDay: 'MON', hour: '00', minute: '00' }).expressionString,
startOnCreation: true,
workflowName: workflow.ref
});
etlCleanupJobTrigger.cfnOptions.condition = this.glueWorkflowCondition;
new GlueTrigger(this, 'EtlCrawlerTrigger', { // NOSONAR: typescript:S1848
name: `${Aws.STACK_NAME}-EtlCrawlerTrigger`,
description: 'Crawls the DynamoDB table to update the Glue Data Catalog',
actions: [{ crawlerName: crawler.ref }],
type: 'CONDITIONAL',
predicate: {
conditions: [{
jobName: cleanupJob.ref,
logicalOperator: 'EQUALS',
state: 'SUCCEEDED'
}]
},
startOnCreation: true,
workflowName: workflow.ref
}).cfnOptions.condition = this.glueWorkflowCondition;
const dataExportJob = this.getDataExportJob();
dataExportJob.cfnOptions.condition = this.glueWorkflowCondition;
new GlueTrigger(this, 'EtlIssuesDataExportJobTrigger', { // NOSONAR: typescript:S1848
name: `${Aws.STACK_NAME}-EtlIssuesDataExportJobTrigger`,
description: 'Runs ETL for the Issues table to S3',
actions: [{ jobName: dataExportJob.ref, arguments: { '--job_type': 'issues' } }],
type: 'CONDITIONAL',
predicate: {
conditions: [{
crawlerName: crawler.ref,
logicalOperator: 'EQUALS',
crawlState: 'SUCCEEDED'
}]
},
startOnCreation: true,
workflowName: workflow.ref
}).cfnOptions.condition = this.glueWorkflowCondition;
new GlueTrigger(this, 'EtlDataHierarchyDataExportJobTrigger', { // NOSONAR: typescript:S1848
name: `${Aws.STACK_NAME}-EtlDataHierarchyDataExportJobTrigger`,
description: 'Runs ETL for the Data Hierarchy table to S3',
actions: [{ jobName: dataExportJob.ref, arguments: { '--job_type': 'hierarchy' } }],
type: 'CONDITIONAL',
predicate: {
conditions: [{
crawlerName: crawler.ref,
logicalOperator: 'EQUALS',
crawlState: 'SUCCEEDED'
}]
},
startOnCreation: true,
workflowName: workflow.ref
}).cfnOptions.condition = this.glueWorkflowCondition;
}