lib/ide-services-resources.ts (529 lines of code) (raw):

import * as cdk from 'aws-cdk-lib'; import { Construct } from 'constructs'; import * as eks from 'aws-cdk-lib/aws-eks'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; import * as rds from 'aws-cdk-lib/aws-rds'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as cr from 'aws-cdk-lib/custom-resources'; import { KubectlV32Layer } from '@aws-cdk/lambda-layer-kubectl-v32'; import {KubernetesManifest} from "aws-cdk-lib/aws-eks/lib/k8s-manifest"; export class IdeServicesResources extends cdk.NestedStack { cluster: eks.Cluster; externalSecret: KubernetesManifest; mellumExternalSecret?: KubernetesManifest; dataBucket: s3.Bucket; bedrockUser: iam.User; bedrockAccessKey: iam.CfnAccessKey; serviceAccount: eks.ServiceAccount; constructor(scope: Construct, id: string, props?: cdk.NestedStackProps) { super(scope, id, props); const kubernetesVersion = eks.KubernetesVersion.V1_32; let albControllerVersion = eks.AlbControllerVersion.V2_8_2; // Create a VPC for the EKS cluster const vpc = new ec2.Vpc(this, 'IdeServicesVpc', { maxAzs: 2, natGateways: 1, }); // Get instance type configuration from context or use defaults const eksDefaultCapacity = this.node.tryGetContext('eksDefaultCapacity') || 2; const eksInstanceClass = this.node.tryGetContext('eksInstanceClass') || 'C6G'; const eksInstanceSize = this.node.tryGetContext('eksInstanceSize') || 'MEDIUM'; // Create the EKS cluster this.cluster = new eks.Cluster(this, 'IdeServicesCluster', { version: kubernetesVersion, albController: { version: albControllerVersion }, vpc: vpc, defaultCapacity: eksDefaultCapacity, defaultCapacityInstance: ec2.InstanceType.of( ec2.InstanceClass[eksInstanceClass as keyof typeof ec2.InstanceClass], ec2.InstanceSize[eksInstanceSize as keyof typeof ec2.InstanceSize] ), kubectlLayer: new KubectlV32Layer(this, 'kubectl'), authenticationMode: eks.AuthenticationMode.API_AND_CONFIG_MAP, bootstrapClusterCreatorAdminPermissions: true, // Enable CloudWatch logging clusterLogging: [ eks.ClusterLoggingTypes.API, eks.ClusterLoggingTypes.AUDIT, eks.ClusterLoggingTypes.AUTHENTICATOR, eks.ClusterLoggingTypes.CONTROLLER_MANAGER, eks.ClusterLoggingTypes.SCHEDULER, ], }); // Add a GPU node group if useMellum is enabled const useMellum = this.node.tryGetContext('useMellum') || false; if (useMellum) { const gpuNodeGroupMinSize = this.node.tryGetContext('gpuNodeGroupMinSize') || 1; const gpuNodeGroupMaxSize = this.node.tryGetContext('gpuNodeGroupMaxSize') || 2; const gpuNodeGroupInstanceClass = this.node.tryGetContext('gpuNodeGroupInstanceClass') || 'G6'; const gpuNodeGroupInstanceSize = this.node.tryGetContext('gpuNodeGroupInstanceSize') || 'XLARGE'; let gpuNodeGroup = this.cluster.addNodegroupCapacity('IdeServicesGpuNodeGroup', { instanceTypes: [ec2.InstanceType.of( ec2.InstanceClass[gpuNodeGroupInstanceClass as keyof typeof ec2.InstanceClass], ec2.InstanceSize[gpuNodeGroupInstanceSize as keyof typeof ec2.InstanceSize] )], minSize: gpuNodeGroupMinSize, maxSize: gpuNodeGroupMaxSize, diskSize: 100, labels: { 'node-type': 'gpu', 'accelerator': 'gpu' }, taints: [ { effect: eks.TaintEffect.NO_SCHEDULE, key: 'nvidia.com/gpu', value: 'true' } ] }); const nvidiaDevicePlugin = this.cluster.addManifest('NvidiaGpuDevicePlugin', { apiVersion: 'apps/v1', kind: 'DaemonSet', metadata: {name: 'nvidia-device-plugin-daemonset', namespace: 'kube-system'}, spec: { selector: {matchLabels: {name: 'nvidia-device-plugin-ds'}}, template: { metadata: {labels: {name: 'nvidia-device-plugin-ds'}}, spec: { priorityClassName: 'system-node-critical', nodeSelector: {'accelerator': 'gpu'}, tolerations: [ {key: 'nvidia.com/gpu', operator: 'Equal', value: 'true', effect: 'NoSchedule'} ], containers: [{ name: 'nvidia-dp-cntr', image: 'nvcr.io/nvidia/k8s-device-plugin:v0.16.2', args: ['--fail-on-init-error=false'], securityContext: {privileged: true}, volumeMounts: [{name: 'device-plugin', mountPath: '/var/lib/kubelet/device-plugins'}] }], volumes: [{name: 'device-plugin', hostPath: {path: '/var/lib/kubelet/device-plugins'}}] } } } }); nvidiaDevicePlugin.node.addDependency(gpuNodeGroup); } const prefixListId = this.getCloudFrontPrefixList() this.cluster.clusterSecurityGroup.addIngressRule( ec2.Peer.prefixList(prefixListId.toString()), ec2.Port.tcp(80), 'Allow HTTP from CloudFront' ); // Add this after the cluster is created, before creating the service account // Create the namespace for IDE services const namespace = this.cluster.addManifest('IdeServicesNamespace', { apiVersion: 'v1', kind: 'Namespace', metadata: { name: 'kube-ide-services', }, }); // Create a service account for the application this.serviceAccount = this.cluster.addServiceAccount('IdeServicesServiceAccount', { name: 'ide-services-sa', namespace: 'kube-ide-services', }); // Attach CloudWatchAgentServerPolicy to the service account this.serviceAccount.role.addManagedPolicy( iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy') ); // Make sure the service account depends on the namespace this.serviceAccount.node.addDependency(namespace); // Enable cluster autoscaler with IRSA const metricsAddon = new eks.CfnAddon(this, 'metrics-server', { clusterName: this.cluster.clusterName, addonName: 'metrics-server', addonVersion: 'v0.7.2-eksbuild.3', serviceAccountRoleArn: this.serviceAccount.role.roleArn, resolveConflicts: 'OVERWRITE', }); metricsAddon.node.addDependency(this.cluster); // Enable CloudWatch Container Insights const cloudwatchAddon = new eks.CfnAddon(this, 'cloudwatch-observability', { clusterName: this.cluster.clusterName, addonName: 'amazon-cloudwatch-observability', addonVersion: 'v4.0.1-eksbuild.1', serviceAccountRoleArn: this.serviceAccount.role.roleArn, resolveConflicts: 'OVERWRITE', }); cloudwatchAddon.node.addDependency(this.cluster); // Install External Secrets Operator const externalSecretsChart = this.cluster.addHelmChart('ExternalSecretsOperator', { chart: 'external-secrets', repository: 'https://charts.external-secrets.io', namespace: 'external-secrets', release: 'external-secrets', createNamespace: true, wait: true, timeout: cdk.Duration.minutes(15), values: { // Ensure webhook is enabled webhook: { create: true } } }); // Create a security group for the RDS instance const dbSecurityGroup = new ec2.SecurityGroup(this, 'DatabaseSecurityGroup', { vpc, description: 'Security group for IDE Services PostgreSQL database', allowAllOutbound: true, }); // Allow inbound traffic from the EKS cluster to the RDS instance dbSecurityGroup.addIngressRule( ec2.Peer.securityGroupId(this.cluster.clusterSecurityGroup.securityGroupId), ec2.Port.tcp(5432), 'Allow PostgreSQL access from EKS cluster' ); // Get DB instance type configuration from context or use defaults const dbInstanceClass = this.node.tryGetContext('dbInstanceClass') || 'C6G'; const dbInstanceSize = this.node.tryGetContext('dbInstanceSize') || 'MEDIUM'; // Create a PostgreSQL RDS instance const dbInstance = new rds.DatabaseInstance(this, 'IdeServicesDatabase', { engine: rds.DatabaseInstanceEngine.postgres({ version: rds.PostgresEngineVersion.VER_17_4, }), instanceType: ec2.InstanceType.of( ec2.InstanceClass[dbInstanceClass as keyof typeof ec2.InstanceClass], ec2.InstanceSize[dbInstanceSize as keyof typeof ec2.InstanceSize] ), vpc, vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, }, securityGroups: [dbSecurityGroup], allocatedStorage: 20, storageType: rds.StorageType.GP3, databaseName: 'ideservices', credentials: rds.Credentials.fromGeneratedSecret('postgres'), // Auto-generate and store credentials in Secrets Manager multiAz: true, // Enable Multi-AZ for high availability backupRetention: cdk.Duration.days(7), // Retain backups for 7 days deletionProtection: false, // Enable deletion protection removalPolicy: cdk.RemovalPolicy.SNAPSHOT, // Create a snapshot before deleting the database parameters: { 'rds.force_ssl': '0', // Allow non-SSL connections 'password_encryption': 'md5', // Use MD5 password encryption (more compatible) }, }); // Create an S3 bucket for general-purpose storage this.dataBucket = new s3.Bucket(this, 'IdeServicesDataBucket', { versioned: true, encryption: s3.BucketEncryption.S3_MANAGED, blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, enforceSSL: true, removalPolicy: cdk.RemovalPolicy.RETAIN, // Retain the bucket when the stack is deleted }); // Grant the External Secrets service account access to the RDS secret dbInstance.secret?.grantRead(this.serviceAccount); // Grant permissions to access the secret const secretPolicy = new iam.PolicyStatement({ actions: [ 'secretsmanager:GetResourcePolicy', 'secretsmanager:GetSecretValue', 'secretsmanager:DescribeSecret', 'secretsmanager:ListSecretVersionIds' ], resources: [dbInstance.secret!.secretArn] }); this.serviceAccount.addToPrincipalPolicy(secretPolicy); // Grant the EKS service account role access to the RDS instance dbInstance.grantConnect(this.serviceAccount); // Grant the EKS service account role access to the S3 buckets this.dataBucket.grantReadWrite(this.serviceAccount); // Create a SecretStore to access AWS Secrets Manager const secretStore = this.cluster.addManifest('AWSSecretStore', { apiVersion: 'external-secrets.io/v1', kind: 'SecretStore', metadata: { name: 'aws-secretstore', namespace: 'kube-ide-services', }, spec: { provider: { aws: { service: 'SecretsManager', region: this.region, auth: { jwt: { serviceAccountRef: { name: 'ide-services-sa', namespace: 'kube-ide-services', }, }, }, }, }, }, }); // Explicitly add dependency on the Helm chart and SA secretStore.node.addDependency(namespace); secretStore.node.addDependency(externalSecretsChart); secretStore.node.addDependency(this.serviceAccount); // Create an ExternalSecret to fetch RDS credentials this.externalSecret = this.cluster.addManifest('RDSExternalSecret', { apiVersion: 'external-secrets.io/v1', kind: 'ExternalSecret', metadata: { name: 'rds-credentials', namespace: 'kube-ide-services', }, spec: { refreshInterval: '1h', secretStoreRef: { name: 'aws-secretstore', kind: 'SecretStore', }, target: { name: 'ide-services-rds-credentials', creationPolicy: 'Owner', }, data: [ { secretKey: 'host', remoteRef: { key: dbInstance.secret?.secretName || '', property: 'host', }, }, { secretKey: 'database', remoteRef: { key: dbInstance.secret?.secretName || '', property: 'dbname', }, }, { secretKey: 'user', remoteRef: { key: dbInstance.secret?.secretName || '', property: 'username', }, }, { secretKey: 'password', remoteRef: { key: dbInstance.secret?.secretName || '', property: 'password', }, }, ], }, }); // Ensure the ExternalSecret is created after the SecretStore this.externalSecret.node.addDependency(namespace); this.externalSecret.node.addDependency(secretStore); // Create mellum secrets if mellum is enabled if (useMellum) { const mellumSecret = this.generateAndStoreRsaKeys(); // Grant the External Secrets service account access to the mellum secret mellumSecret.grantRead(this.serviceAccount); // Add mellum secret policy const mellumSecretPolicy = new iam.PolicyStatement({ actions: [ 'secretsmanager:GetResourcePolicy', 'secretsmanager:GetSecretValue', 'secretsmanager:DescribeSecret', 'secretsmanager:ListSecretVersionIds' ], resources: [mellumSecret.secretArn] }); this.serviceAccount.addToPrincipalPolicy(mellumSecretPolicy); // Create an ExternalSecret to fetch mellum RSA keys this.mellumExternalSecret = this.cluster.addManifest('MellumExternalSecret', { apiVersion: 'external-secrets.io/v1', kind: 'ExternalSecret', metadata: { name: 'mellum-rsa-keys', namespace: 'kube-ide-services', }, spec: { refreshInterval: '1h', secretStoreRef: { name: 'aws-secretstore', kind: 'SecretStore', }, target: { name: 'ide-services-mellum-credentials', creationPolicy: 'Owner', }, data: [ { secretKey: 'jwtPublicKey', remoteRef: { key: mellumSecret.secretName, property: 'public_key', }, }, { secretKey: 'jwtPrivateKey', remoteRef: { key: mellumSecret.secretName, property: 'private_key', }, }, ], }, }); // Ensure the ExternalSecret is created after the SecretStore this.mellumExternalSecret.node.addDependency(namespace); this.mellumExternalSecret.node.addDependency(secretStore); // Create a secret in AWS Secrets Manager for mellum token const mellumTokenSecret = new secretsmanager.Secret(this, 'MellumTokenSecret', { secretName: `ide-services-mellum-token-${this.stackName}`, description: 'Mellum token for registry pull secrets', secretStringValue: cdk.SecretValue.unsafePlainText( this.node.tryGetContext('mellumToken') || '' ) }); // Grant the External Secrets service account access to the mellum token secret mellumTokenSecret.grantRead(this.serviceAccount); // Add mellum token secret policy const mellumTokenSecretPolicy = new iam.PolicyStatement({ actions: [ 'secretsmanager:GetResourcePolicy', 'secretsmanager:GetSecretValue', 'secretsmanager:DescribeSecret', 'secretsmanager:ListSecretVersionIds' ], resources: [mellumTokenSecret.secretArn] }); this.serviceAccount.addToPrincipalPolicy(mellumTokenSecretPolicy); // Create an ExternalSecret for mellum registry pull secrets const mellumPullSecretsExternalSecret = this.cluster.addManifest('MellumPullSecretsExternalSecret', { apiVersion: 'external-secrets.io/v1', kind: 'ExternalSecret', metadata: { name: 'mellum-pull-secrets', namespace: 'kube-ide-services', }, spec: { refreshInterval: '1h', secretStoreRef: { name: 'aws-secretstore', kind: 'SecretStore', }, target: { name: 'ide-services-medium-registry-credentials', creationPolicy: 'Owner', template: { type: 'kubernetes.io/dockerconfigjson', data: { '.dockerconfigjson': `{"auths":{"docker.io":{"auth":"{{ print "mellum:" .token | b64enc }}"}}}` } } }, data: [ { secretKey: 'token', remoteRef: { key: mellumTokenSecret.secretName, }, }, ], }, }); // Ensure the ExternalSecret is created after the SecretStore mellumPullSecretsExternalSecret.node.addDependency(namespace); mellumPullSecretsExternalSecret.node.addDependency(secretStore); } // Create an IAM policy for Bedrock access const bedrockPolicy = new iam.PolicyStatement({ effect: iam.Effect.ALLOW, actions: [ 'bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream', ], resources: [ // Claude 3.5 Sonnet v2 `arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-3-5-sonnet-20240620-v2:0`, // Claude 3.5 Haiku `arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-3-5-haiku-20240307-v1:0`, ], }); // Create an IAM policy for Bedrock read-only access const bedrockReadOnlyPolicy = new iam.PolicyStatement({ effect: iam.Effect.ALLOW, actions: [ 'bedrock:GetFoundationModel', 'bedrock:ListFoundationModels', 'bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream' ], resources: ['*'], }); // Create an IAM user for Bedrock access this.bedrockUser = new iam.User(this, 'BedrockUser', { userName: `ide-services-bedrock-user-${id}`, }); // Attach the Bedrock policies to the user this.bedrockUser.addToPrincipalPolicy(bedrockPolicy); this.bedrockUser.addToPrincipalPolicy(bedrockReadOnlyPolicy); // Create access key for the Bedrock user this.bedrockAccessKey = new iam.CfnAccessKey(this, 'BedrockUserAccessKey', { userName: this.bedrockUser.userName, }); // Allow writing logs to CloudWatch const cloudWatchPolicy = new iam.PolicyStatement({ actions: [ 'logs:PutLogEvents', 'logs:CreateLogStream', 'logs:DescribeLogStreams', 'logs:DescribeLogGroups', 'logs:CreateLogGroup', ], resources: ['*'], // Replace '*' with the specific ARN for the log group if you know it }); this.serviceAccount.addToPrincipalPolicy(cloudWatchPolicy); } private generateAndStoreRsaKeys() { // Create the Lambda function for RSA generation using Node.js runtime const rsaGeneratorFunction = new lambda.Function(this, 'RSAGeneratorFunction', { runtime: lambda.Runtime.NODEJS_18_X, handler: 'index.handler', timeout: cdk.Duration.minutes(5), code: lambda.Code.fromInline(` const crypto = require('crypto'); exports.handler = async (event, context) => { console.log('Event:', JSON.stringify(event, null, 2)); try { const requestType = event.RequestType || 'Create'; if (requestType === 'Create' || requestType === 'Update') { // Generate RSA key pair const { publicKey, privateKey } = crypto.generateKeyPairSync('rsa', { modulusLength: 2048, publicKeyEncoding: { type: 'spki', format: 'pem' }, privateKeyEncoding: { type: 'pkcs8', format: 'pem' } }); console.log('RSA key pair generated successfully'); return { PhysicalResourceId: 'RSAKeyPair-' + Date.now(), Data: { public_key: publicKey, private_key: privateKey } }; } else { // For Delete operations, just return success return { PhysicalResourceId: event.PhysicalResourceId || 'RSAKeyPair' }; } } catch (error) { console.error('Error generating RSA keys:', error); throw new Error(\`Failed to generate RSA keys: \${error.message}\`); } }; `), }); // Create a custom resource to generate RSA key pair using the Lambda function const rsaGeneratorProvider = new cr.Provider(this, 'RSAGeneratorProvider', { onEventHandler: rsaGeneratorFunction, }); const generateRSAKeys = new cdk.CustomResource(this, 'GenerateRSAKeys', { serviceToken: rsaGeneratorProvider.serviceToken, properties: { // Force update by adding timestamp Timestamp: Date.now().toString() } }); // Create a secret in AWS Secrets Manager for mellum RSA keys with generated values const mellumSecret = new secretsmanager.Secret(this, 'MellumRSAKeys', { secretName: `ide-services-mellum-rsa-keys-${this.stackName}`, description: 'RSA public and private keys for Mellum authentication', secretObjectValue: { public_key: cdk.SecretValue.unsafePlainText(generateRSAKeys.getAtt('public_key').toString()), private_key: cdk.SecretValue.unsafePlainText(generateRSAKeys.getAtt('private_key').toString()) } }); return mellumSecret; } private getCloudFrontPrefixList(): string { // Create a custom resource to fetch the CloudFront managed prefix list const describePrefixLists = new cr.AwsCustomResource(this, 'DescribePrefixLists', { onUpdate: { // The AWS service and API call we want to make service: 'EC2', action: 'describeManagedPrefixLists', // Parameters for the API call parameters: { Filters: [ { Name: 'owner-id', Values: ['AWS'] }, { Name: 'prefix-list-name', Values: ['com.amazonaws.global.cloudfront.origin-facing'] } ] }, // What to extract from the response physicalResourceId: cr.PhysicalResourceId.of('CloudFrontPrefixList'), }, // Policy that allows the custom resource to call the API policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE, }), }); // Extract the prefix list ID from the custom resource result return describePrefixLists.getResponseField('PrefixLists.0.PrefixListId'); } }