constructor()

in lib/constructs/data-set-enrollment.ts [218:320]


	constructor(scope: Construct, id: string, props: DataSetEnrollmentProps) {
		super(scope, id);	
		
		this.DataLakeTargets = props.DataLakeTargets;
		this.DataLakeBucketName	= props.GlueScriptArguments['--DL_BUCKET'];
		this.DataLakePrefix = props.GlueScriptArguments['--DL_PREFIX'];
		
		this.DataSetName = props.dataSetName;
		
		this.Dataset_SourceDatabaseName = `${props.dataSetName}_src`;
		this.Dataset_DatalakeDatabaseName = `${props.dataSetName}_dl`;
		
		this.Dataset_Source = new glue.CfnDatabase(this, `${props.dataSetName}_src`, {
			catalogId: Aws.ACCOUNT_ID,
			databaseInput: {
				name: this.Dataset_SourceDatabaseName,
				locationUri: `s3://${props.dataLakeBucket.bucketName}/${props.dataSetName}/`
			}
		});
		this.Dataset_Datalake = new glue.CfnDatabase(this, `${props.dataSetName}_dl`, {
			catalogId: Aws.ACCOUNT_ID,
			databaseInput: {
				name:  this.Dataset_DatalakeDatabaseName,
				locationUri: `s3://${props.dataLakeBucket.bucketName}/${props.dataSetName}/`
			}
		});
		

		let connectionArray = [];
		
		
		if(props.SourceConnectionInput){
			this.SourceConnection = new glue.CfnConnection(this, `${props.dataSetName}-src-connection`, {
				catalogId: this.Dataset_Source.catalogId, 
				connectionInput: props.SourceConnectionInput
			});
			if(props.SourceConnectionInput.name){
				connectionArray.push(props.SourceConnectionInput.name);	
			}
		}

		
		this.DataSetGlueRole = new iam.Role(this, `${props.dataSetName}-GlueRole`, {
			assumedBy: new iam.ServicePrincipal('glue.amazonaws.com')
		});
		
		this.DataSetGlueRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'));
		this.DataSetGlueRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'));
		props.dataLakeBucket.grantReadWrite(this.DataSetGlueRole);
		
		
		if(typeof props.SourceAccessPolicy !== 'undefined'){
			props.SourceAccessPolicy.attachToRole(this.DataSetGlueRole);	
		}

		const sourceCrawler = this.setupCrawler(this.Dataset_Source, props.SourceTargets, true, this.Dataset_SourceDatabaseName);
		
		
		
		const glueScript = new s3assets.Asset(this, `${props.dataSetName}-GlueScript`, {
			path: props.GlueScriptPath
		});
		glueScript.grantRead(this.DataSetGlueRole);
		
		
		
		/// The spread operator below (...) makes the connections property conditional. Its only used for JDBC sources at the moment.
		const jobParams = {
			executionProperty: {
				maxConcurrentRuns: 1
			}, 
			name: `${props.dataSetName}_src_to_dl_etl`, 
			timeout: 2880, 
			glueVersion: "2.0", 
			maxCapacity: props.MaxDPUs,
			command: {
				scriptLocation: `s3://${glueScript.s3BucketName}/${glueScript.s3ObjectKey}`, 
				name: "glueetl", 
				pythonVersion: "3"
			}, 
			role: this.DataSetGlueRole.roleArn,
			maxRetries: 0, 
			defaultArguments: props.GlueScriptArguments,
			...(typeof props.SourceConnectionInput !== "undefined" && {
					connections: {
						connections: connectionArray
					}
			})
		}
		const etl_job = new glue.CfnJob(this, `${props.dataSetName}-EtlJob`, jobParams );
		
		
		const datalake_crawler = this.setupCrawler(this.Dataset_Datalake, this.DataLakeTargets, false, this.Dataset_DatalakeDatabaseName);
		
		const datalakeEnrollmentWorkflow = new DataLakeEnrollmentWorkflow(this,`${props.dataSetName}DataLakeWorkflow`,{
			workfowName: `${props.dataSetName}_DataLakeEnrollmentWorkflow`,
			srcCrawler: sourceCrawler,
			etlJob: etl_job,
			datalakeCrawler: datalake_crawler,
			WorkflowCronScheduleExpression: props.WorkflowCronScheduleExpression
		});
		
	}