constructor()

in packages/constructs/L3/ai/gaia-l3-construct/lib/rag-engines/data-import/file-import-batch-job.ts [33:231]


  constructor(scope: Construct, id: string, props: FileImportBatchJobProps) {
    super(scope, id, props);

    const launchTemplate = new ec2.LaunchTemplate(this, 'LaunchTemplate', {
      requireImdsv2: true,
    });
    const computeEnvironment = new batch.ManagedEc2EcsComputeEnvironment(this, 'ManagedEc2EcsComputeEnvironment', {
      vpc: props.shared.vpc,
      vpcSubnets: { subnets: props.shared.appSubnets },
      allocationStrategy: batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
      instanceTypes: [new ec2.InstanceType('c5.xlarge')],
      maxvCpus: 4,
      minvCpus: 0,
      replaceComputeEnvironment: true,
      updateTimeout: cdk.Duration.minutes(30),
      updateToLatestImageVersion: true,
      launchTemplate: launchTemplate,
    });

    const jobQueue = new batch.JobQueue(this, 'JobQueue', {
      computeEnvironments: [
        {
          computeEnvironment,
          order: 1,
        },
      ],
      priority: 1,
    });

    const fileImportJobRole = new MdaaRole(this, 'FileImportJobRole', {
      naming: props.naming,
      roleName: 'FileImportJobRole',
      createParams: true,
      createOutputs: false,
      assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'),
      managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonECSTaskExecutionRolePolicy')],
    });

    let dockerFileDirectory = `${__dirname}/../../shared`;
    let dockerFileName = 'file-import-dockerfile';

    if (props.config.codeOverwrites?.fileImportBatchJobDockerFilePath) {
      dockerFileDirectory = path.dirname(props.config.codeOverwrites.fileImportBatchJobDockerFilePath);
      dockerFileName = path.basename(props.config.codeOverwrites.fileImportBatchJobDockerFilePath);
    }

    const imageAsset = new aws_ecr_assets.DockerImageAsset(this, 'file-import-image', {
      directory: dockerFileDirectory,
      file: dockerFileName,
      platform: aws_ecr_assets.Platform.LINUX_AMD64,
    });

    const linuxParameters = new batch.LinuxParameters(this, 'FileImportLinuxParams', {});
    linuxParameters.addTmpfs({
      size: cdk.Size.gibibytes(10),
      containerPath: '/tmp',
      mountOptions: [batch.TmpfsMountOption.RW],
    });

    const fileImportContainer = new batch.EcsEc2ContainerDefinition(this, 'FileImportContainer', {
      cpu: 2,
      memory: cdk.Size.mebibytes(2048),
      image: ecs.ContainerImage.fromDockerImageAsset(imageAsset),
      jobRole: fileImportJobRole,
      // allow access to /tmp  via in-memory mount
      // as required by this file-import flow
      // while still maintaining readonly file system
      linuxParameters: linuxParameters,
      readonlyRootFilesystem: true,
      user: 'worker',
      environment: {
        AWS_DEFAULT_REGION: cdk.Stack.of(this).region,
        CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
        API_KEYS_SECRETS_ARN: props.shared.apiKeysSecret.secretArn,
        AURORA_DB_SECRET_ID: props.auroraDatabase?.secret?.secretArn as string,
        PROCESSING_BUCKET_NAME: props.processingBucket.bucketName,
        WORKSPACES_TABLE_NAME: props.ragDynamoDBTables.workspacesTable.tableName,
        WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME: props.ragDynamoDBTables.workspacesByObjectTypeIndexName,
        DOCUMENTS_TABLE_NAME: props.ragDynamoDBTables.documentsTable.tableName ?? '',
        DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME: props.ragDynamoDBTables.documentsByCompountKeyIndexName ?? '',
        SAGEMAKER_RAG_MODELS_ENDPOINT: '',
      },
    });

    const fileImportJob = new batch.EcsJobDefinition(this, 'FileImportJob', {
      container: fileImportContainer,
      timeout: cdk.Duration.minutes(30),
    });

    props.uploadBucket.grantReadWrite(fileImportJobRole);
    props.processingBucket.grantReadWrite(fileImportJobRole);
    props.encryptionKey.grantEncryptDecrypt(fileImportJobRole);
    props.shared.configParameter.grantRead(fileImportJobRole);
    props.shared.apiKeysSecret.grantRead(fileImportJobRole);
    props.ragDynamoDBTables.workspacesTable.grantReadWriteData(fileImportJobRole);
    props.ragDynamoDBTables.documentsTable.grantReadWriteData(fileImportJobRole);

    if (props.auroraDatabase) {
      props.auroraDatabase.secret?.grantRead(fileImportJobRole);
      props.auroraDatabase.connections.allowDefaultPortFrom(computeEnvironment);
    }

    if (props.config.bedrock?.enabled) {
      fileImportJobRole.addToPolicy(
        new iam.PolicyStatement({
          actions: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream'],
          resources: ['*'],
          conditions: {
            StringEquals: {
              'aws:RequestedRegion': props.config.bedrock.region,
            },
          },
        }),
      );

      if (props.config.bedrock?.roleArn) {
        fileImportJobRole.addToPolicy(
          new iam.PolicyStatement({
            actions: ['sts:AssumeRole'],
            resources: [props.config.bedrock.roleArn],
          }),
        );
      }
    }

    MdaaNagSuppressions.addCodeResourceSuppressions(
      computeEnvironment,
      [
        {
          id: 'AwsSolutions-IAM4',
          reason: 'AmazonEC2ContainerServiceforEC2Role is restrictive enough.',
        },
      ],
      true,
    );

    MdaaNagSuppressions.addCodeResourceSuppressions(
      fileImportJobRole,
      [
        {
          id: 'AwsSolutions-IAM4',
          reason: 'Cluster unknown at runtime.  Created during deployment and strictly used for AWS Batch job',
        },
        {
          id: 'AwsSolutions-IAM5',
          reason:
            'AmazonEC2ContainerServiceforEC2Role is restrictive enough.  Resources actions for ECS only support widlcard log group name not known at deployment time.',
        },
        {
          id: 'NIST.800.53.R5-IAMNoInlinePolicy',
          reason:
            'Inline policy maintained by MDAA framework.  Wildcard is towards bedrock but service enabled on region level controls are in place.',
        },
        {
          id: 'HIPAA.Security-IAMNoInlinePolicy',
          reason:
            'Inline policy maintained by MDAA framework.  Wildcard is towards bedrock but service enabled on region level controls are in place.',
        },
        {
          id: 'PCI.DSS.321-IAMNoInlinePolicy',
          reason:
            'Inline policy maintained by MDAA framework.  Wildcard is towards bedrock but service enabled on region level controls are in place.',
        },
      ],
      true,
    );

    MdaaNagSuppressions.addCodeResourceSuppressions(
      fileImportContainer,
      [
        {
          id: 'AwsSolutions-IAM5',
          reason:
            'Log stream generated at deployment time by AWS batch and ecr get authorization only supports * for resource',
        },
        { id: 'NIST.800.53.R5-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'HIPAA.Security-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'PCI.DSS.321-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
      ],
      true,
    );

    MdaaNagSuppressions.addCodeResourceSuppressions(
      fileImportJob,
      [
        {
          id: 'AwsSolutions-IAM5',
          reason: 'Events handled by upstream dynamodb service, resource unknown at deployment time',
        },
        { id: 'NIST.800.53.R5-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'HIPAA.Security-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
        { id: 'PCI.DSS.321-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
      ],
      true,
    );

    this.jobQueue = jobQueue;
    this.fileImportJob = fileImportJob;
  }