in packages/constructs/L3/ai/gaia-l3-construct/lib/rag-engines/data-import/file-import-batch-job.ts [33:231]
constructor(scope: Construct, id: string, props: FileImportBatchJobProps) {
super(scope, id, props);
const launchTemplate = new ec2.LaunchTemplate(this, 'LaunchTemplate', {
requireImdsv2: true,
});
const computeEnvironment = new batch.ManagedEc2EcsComputeEnvironment(this, 'ManagedEc2EcsComputeEnvironment', {
vpc: props.shared.vpc,
vpcSubnets: { subnets: props.shared.appSubnets },
allocationStrategy: batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
instanceTypes: [new ec2.InstanceType('c5.xlarge')],
maxvCpus: 4,
minvCpus: 0,
replaceComputeEnvironment: true,
updateTimeout: cdk.Duration.minutes(30),
updateToLatestImageVersion: true,
launchTemplate: launchTemplate,
});
const jobQueue = new batch.JobQueue(this, 'JobQueue', {
computeEnvironments: [
{
computeEnvironment,
order: 1,
},
],
priority: 1,
});
const fileImportJobRole = new MdaaRole(this, 'FileImportJobRole', {
naming: props.naming,
roleName: 'FileImportJobRole',
createParams: true,
createOutputs: false,
assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'),
managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonECSTaskExecutionRolePolicy')],
});
let dockerFileDirectory = `${__dirname}/../../shared`;
let dockerFileName = 'file-import-dockerfile';
if (props.config.codeOverwrites?.fileImportBatchJobDockerFilePath) {
dockerFileDirectory = path.dirname(props.config.codeOverwrites.fileImportBatchJobDockerFilePath);
dockerFileName = path.basename(props.config.codeOverwrites.fileImportBatchJobDockerFilePath);
}
const imageAsset = new aws_ecr_assets.DockerImageAsset(this, 'file-import-image', {
directory: dockerFileDirectory,
file: dockerFileName,
platform: aws_ecr_assets.Platform.LINUX_AMD64,
});
const linuxParameters = new batch.LinuxParameters(this, 'FileImportLinuxParams', {});
linuxParameters.addTmpfs({
size: cdk.Size.gibibytes(10),
containerPath: '/tmp',
mountOptions: [batch.TmpfsMountOption.RW],
});
const fileImportContainer = new batch.EcsEc2ContainerDefinition(this, 'FileImportContainer', {
cpu: 2,
memory: cdk.Size.mebibytes(2048),
image: ecs.ContainerImage.fromDockerImageAsset(imageAsset),
jobRole: fileImportJobRole,
// allow access to /tmp via in-memory mount
// as required by this file-import flow
// while still maintaining readonly file system
linuxParameters: linuxParameters,
readonlyRootFilesystem: true,
user: 'worker',
environment: {
AWS_DEFAULT_REGION: cdk.Stack.of(this).region,
CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
API_KEYS_SECRETS_ARN: props.shared.apiKeysSecret.secretArn,
AURORA_DB_SECRET_ID: props.auroraDatabase?.secret?.secretArn as string,
PROCESSING_BUCKET_NAME: props.processingBucket.bucketName,
WORKSPACES_TABLE_NAME: props.ragDynamoDBTables.workspacesTable.tableName,
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME: props.ragDynamoDBTables.workspacesByObjectTypeIndexName,
DOCUMENTS_TABLE_NAME: props.ragDynamoDBTables.documentsTable.tableName ?? '',
DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME: props.ragDynamoDBTables.documentsByCompountKeyIndexName ?? '',
SAGEMAKER_RAG_MODELS_ENDPOINT: '',
},
});
const fileImportJob = new batch.EcsJobDefinition(this, 'FileImportJob', {
container: fileImportContainer,
timeout: cdk.Duration.minutes(30),
});
props.uploadBucket.grantReadWrite(fileImportJobRole);
props.processingBucket.grantReadWrite(fileImportJobRole);
props.encryptionKey.grantEncryptDecrypt(fileImportJobRole);
props.shared.configParameter.grantRead(fileImportJobRole);
props.shared.apiKeysSecret.grantRead(fileImportJobRole);
props.ragDynamoDBTables.workspacesTable.grantReadWriteData(fileImportJobRole);
props.ragDynamoDBTables.documentsTable.grantReadWriteData(fileImportJobRole);
if (props.auroraDatabase) {
props.auroraDatabase.secret?.grantRead(fileImportJobRole);
props.auroraDatabase.connections.allowDefaultPortFrom(computeEnvironment);
}
if (props.config.bedrock?.enabled) {
fileImportJobRole.addToPolicy(
new iam.PolicyStatement({
actions: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream'],
resources: ['*'],
conditions: {
StringEquals: {
'aws:RequestedRegion': props.config.bedrock.region,
},
},
}),
);
if (props.config.bedrock?.roleArn) {
fileImportJobRole.addToPolicy(
new iam.PolicyStatement({
actions: ['sts:AssumeRole'],
resources: [props.config.bedrock.roleArn],
}),
);
}
}
MdaaNagSuppressions.addCodeResourceSuppressions(
computeEnvironment,
[
{
id: 'AwsSolutions-IAM4',
reason: 'AmazonEC2ContainerServiceforEC2Role is restrictive enough.',
},
],
true,
);
MdaaNagSuppressions.addCodeResourceSuppressions(
fileImportJobRole,
[
{
id: 'AwsSolutions-IAM4',
reason: 'Cluster unknown at runtime. Created during deployment and strictly used for AWS Batch job',
},
{
id: 'AwsSolutions-IAM5',
reason:
'AmazonEC2ContainerServiceforEC2Role is restrictive enough. Resources actions for ECS only support widlcard log group name not known at deployment time.',
},
{
id: 'NIST.800.53.R5-IAMNoInlinePolicy',
reason:
'Inline policy maintained by MDAA framework. Wildcard is towards bedrock but service enabled on region level controls are in place.',
},
{
id: 'HIPAA.Security-IAMNoInlinePolicy',
reason:
'Inline policy maintained by MDAA framework. Wildcard is towards bedrock but service enabled on region level controls are in place.',
},
{
id: 'PCI.DSS.321-IAMNoInlinePolicy',
reason:
'Inline policy maintained by MDAA framework. Wildcard is towards bedrock but service enabled on region level controls are in place.',
},
],
true,
);
MdaaNagSuppressions.addCodeResourceSuppressions(
fileImportContainer,
[
{
id: 'AwsSolutions-IAM5',
reason:
'Log stream generated at deployment time by AWS batch and ecr get authorization only supports * for resource',
},
{ id: 'NIST.800.53.R5-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
{ id: 'HIPAA.Security-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
{ id: 'PCI.DSS.321-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
],
true,
);
MdaaNagSuppressions.addCodeResourceSuppressions(
fileImportJob,
[
{
id: 'AwsSolutions-IAM5',
reason: 'Events handled by upstream dynamodb service, resource unknown at deployment time',
},
{ id: 'NIST.800.53.R5-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
{ id: 'HIPAA.Security-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
{ id: 'PCI.DSS.321-IAMNoInlinePolicy', reason: 'Inline policy managed by MDAA framework.' },
],
true,
);
this.jobQueue = jobQueue;
this.fileImportJob = fileImportJob;
}