constructor()

in packages/constructs/L3/ai/gaia-l3-construct/lib/rag-engines/data-import/index.ts [47:287]


  constructor(scope: Construct, id: string, props: DataImportProps) {
    super(scope, id, props);

    const queueKey = new MdaaKmsKey(this, 'DataImportQueuesKey', {
      alias: props.naming.resourceName('DataImportQueuesKey'),
      naming: props.naming,
      createParams: false,
      createOutputs: false,
    });

    const ingestionDealLetterQueue = new MdaaSqsDeadLetterQueue(this, 'DataImportWorkFlowDLQ', {
      encryptionMasterKey: queueKey,
      naming: props.naming,
      createParams: false,
      createOutputs: false,
      queueName: 'DataImportWorkFlowDLQ',
      visibilityTimeout: cdk.Duration.seconds(900),
    });

    const ingestionQueue = new MdaaSqsQueue(this, 'IngestionQueue', {
      encryptionMasterKey: queueKey,
      naming: props.naming,
      createParams: false,
      createOutputs: false,
      queueName: 'VectorDBDataIngestion',
      visibilityTimeout: cdk.Duration.seconds(900),
      deadLetterQueue: {
        queue: ingestionDealLetterQueue,
        maxReceiveCount: 3,
      },
    });

    const uploadBucket = new MdaaBucket(this, 'UploadBucket', {
      encryptionKey: props.encryptionKey,
      naming: props.naming,
      bucketName: `${props.naming.props.org}-${props.naming.props.domain}-${props.naming.props.env}-rag-upload-bucket`,
      createParams: false,
      createOutputs: false,
      transferAcceleration: true,
    });
    MdaaNagSuppressions.addCodeResourceSuppressions(
      uploadBucket,
      [
        { id: 'NIST.800.53.R5-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
        { id: 'HIPAA.Security-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
        { id: 'PCI.DSS.321-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
      ],
      true,
    );
    uploadBucket.addCorsRule({
      allowedHeaders: ['*'],
      allowedMethods: [s3.HttpMethods.PUT, s3.HttpMethods.POST, s3.HttpMethods.GET, s3.HttpMethods.HEAD],
      allowedOrigins: ['*'],
      exposedHeaders: ['ETag'],
      maxAge: 3000,
    });

    uploadBucket.addObjectCreatedNotification(new s3Notifications.SqsDestination(ingestionQueue));

    uploadBucket.addObjectRemovedNotification(new s3Notifications.SqsDestination(ingestionQueue));

    const processingBucket = new MdaaBucket(this, 'ProcessingBucket', {
      encryptionKey: props.encryptionKey,
      naming: props.naming,
      bucketName: `${props.naming.props.org}-${props.naming.props.domain}-${props.naming.props.env}-rag-processing-bucket`,
      createParams: false,
      createOutputs: false,
    });

    MdaaNagSuppressions.addCodeResourceSuppressions(
      processingBucket,
      [
        { id: 'NIST.800.53.R5-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
        { id: 'HIPAA.Security-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
        { id: 'PCI.DSS.321-S3BucketReplicationEnabled', reason: 'MDAA does not enforce bucket replication.' },
      ],
      true,
    );

    const fileImportBatchJob = new FileImportBatchJob(this, 'FileImportBatchJob', {
      encryptionKey: props.encryptionKey,
      naming: props.naming,
      roleHelper: props.roleHelper,
      shared: props.shared,
      config: props.config,
      uploadBucket,
      processingBucket,
      auroraDatabase: props.auroraDatabase,
      ragDynamoDBTables: props.ragDynamoDBTables,
    });

    const fileImportWorkflow = new FileImportWorkflow(this, 'FileImportWorkflow', {
      encryptionKey: props.encryptionKey,
      naming: props.naming,
      shared: props.shared,
      config: props.config,
      fileImportBatchJob,
      ragDynamoDBTables: props.ragDynamoDBTables,
    });

    const websiteCrawlingWorkflow = new WebsiteCrawlingWorkflow(this, 'WebsiteCrawlingWorkflow', {
      encryptionKey: props.encryptionKey,
      naming: props.naming,
      shared: props.shared,
      config: props.config,
      processingBucket,
      auroraDatabase: props.auroraDatabase,
      ragDynamoDBTables: props.ragDynamoDBTables,
    });

    const uploadHandlerRole = new MdaaRole(this, 'UploadHandlerRole', {
      naming: props.naming,
      roleName: 'VectorDbDataIngestionHandlerRole',
      createParams: false,
      createOutputs: false,
      assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'),
    });

    uploadHandlerRole.addToPolicy(
      new iam.PolicyStatement({
        effect: iam.Effect.ALLOW,
        actions: ['ec2:CreateNetworkInterface', 'ec2:DescribeNetworkInterfaces', 'ec2:DeleteNetworkInterface'],
        resources: ['*'],
      }),
    );

    const uploadDlq = new MdaaSqsDeadLetterQueue(this, 'UploadHandlerDLQ', {
      encryptionMasterKey: queueKey,
      naming: props.naming,
      createParams: false,
      createOutputs: false,
      queueName: 'UploadHandlerDLQ',
    });

    const dataImportUploadHandlerCodePath =
      props.config?.codeOverwrites?.dataImportUploadHandlerCodePath !== undefined
        ? props.config.codeOverwrites.dataImportUploadHandlerCodePath
        : path.join(__dirname, './functions/upload-handler');

    const uploadHandler = new MdaaLambdaFunction(this, 'UploadHandler', {
      functionName: 'VectorDbDataIngestionHandler',
      naming: props.naming,
      role: uploadHandlerRole,
      createParams: false,
      createOutputs: false,
      code: lambda.Code.fromAsset(dataImportUploadHandlerCodePath),
      deadLetterQueue: uploadDlq,
      handler: 'index.lambda_handler',
      runtime: props.shared.pythonRuntime,
      architecture: props.shared.lambdaArchitecture,
      timeout: cdk.Duration.minutes(15),
      memorySize: 512,
      tracing: lambda.Tracing.ACTIVE,
      layers: [props.shared.powerToolsLayer, props.shared.commonLayer, props.shared.pythonSDKLayer],
      vpc: props.shared.vpc,
      vpcSubnets: { subnets: props.shared.appSubnets },
      environment: {
        ...props.shared.defaultEnvironmentVariables,
        CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
        API_KEYS_SECRETS_ARN: props.shared.apiKeysSecret.secretArn,
        PROCESSING_BUCKET_NAME: processingBucket.bucketName,
        UPLOAD_BUCKET_NAME: uploadBucket.bucketName,
        WORKSPACES_TABLE_NAME: props.workspacesTable?.tableName ?? '',
        WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME: props.workspacesByObjectTypeIndexName ?? '',
        DOCUMENTS_TABLE_NAME: props.documentsTable.tableName ?? '',
        DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME: props.documentsByCompountKeyIndexName ?? '',
        SAGEMAKER_RAG_MODELS_ENDPOINT: '',
        FILE_IMPORT_WORKFLOW_ARN: fileImportWorkflow?.stateMachine.stateMachineArn ?? '',
        DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME: props.kendraRetrieval?.kendraS3DataSourceBucket?.bucketName ?? '',
      },
    });

    MdaaNagSuppressions.addCodeResourceSuppressions(
      uploadHandler,
      [
        {
          id: 'NIST.800.53.R5-LambdaConcurrency',
          reason: 'Function is S3 Event handler. S3 service will provide concurrency and anti-hammering protections.',
        },
        {
          id: 'HIPAA.Security-LambdaConcurrency',
          reason: 'Function is S3 Event handler. S3 service will provide concurrency and anti-hammering protections.',
        },
        {
          id: 'PCI.DSS.321-LambdaConcurrency',
          reason: 'Function is S3 Event handler. S3 service will provide concurrency and anti-hammering protections.',
        },
        {
          id: 'AwsSolutions-IAM5',
          reason:
            'X-Ray actions only accept wildcard and s3 operations restricted to kms key and s3 buckets managed by stack',
        },
      ],
      true,
    );

    uploadBucket.grantReadWrite(uploadHandlerRole);
    processingBucket.grantReadWrite(uploadHandlerRole);
    queueKey.grantEncryptDecrypt(uploadHandlerRole);
    props.encryptionKey.grantEncryptDecrypt(uploadHandlerRole);
    props.shared.apiKeysSecret.grantRead(uploadHandlerRole);
    props.shared.configParameter.grantRead(uploadHandlerRole);
    props.workspacesTable.grantReadWriteData(uploadHandlerRole);
    props.documentsTable.grantReadWriteData(uploadHandlerRole);
    props.kendraRetrieval?.kendraS3DataSourceBucket?.grantReadWrite(uploadHandlerRole);

    ingestionQueue.grantConsumeMessages(uploadHandlerRole);
    fileImportWorkflow.stateMachine.grantStartExecution(uploadHandlerRole);

    if (props.config.bedrock?.roleArn) {
      uploadHandlerRole.addToPolicy(
        new iam.PolicyStatement({
          actions: ['sts:AssumeRole'],
          resources: [props.config.bedrock.roleArn],
        }),
      );
    }

    uploadHandler.addEventSource(new lambdaEventSources.SqsEventSource(ingestionQueue));

    MdaaNagSuppressions.addCodeResourceSuppressions(
      uploadHandlerRole,
      [
        {
          id: 'AwsSolutions-IAM5',
          reason:
            'X-Ray actions only accept wildcard and s3 operations restricted to kms key and s3 buckets managed by stack',
        },
        { id: 'NIST.800.53.R5-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'HIPAA.Security-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'PCI.DSS.321-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
      ],
      true,
    );

    this.uploadBucket = uploadBucket;
    this.processingBucket = processingBucket;
    this.ingestionQueue = ingestionQueue;
    this.fileImportWorkflow = fileImportWorkflow.stateMachine;
    this.websiteCrawlingWorkflow = websiteCrawlingWorkflow.stateMachine;
  }