lib/ide-services-resources.ts (529 lines of code) (raw):
import * as cdk from 'aws-cdk-lib';
import { Construct } from 'constructs';
import * as eks from 'aws-cdk-lib/aws-eks';
import * as ec2 from 'aws-cdk-lib/aws-ec2';
import * as rds from 'aws-cdk-lib/aws-rds';
import * as s3 from 'aws-cdk-lib/aws-s3';
import * as iam from 'aws-cdk-lib/aws-iam';
import * as lambda from 'aws-cdk-lib/aws-lambda';
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
import * as cr from 'aws-cdk-lib/custom-resources';
import { KubectlV32Layer } from '@aws-cdk/lambda-layer-kubectl-v32';
import {KubernetesManifest} from "aws-cdk-lib/aws-eks/lib/k8s-manifest";
export class IdeServicesResources extends cdk.NestedStack {
cluster: eks.Cluster;
externalSecret: KubernetesManifest;
mellumExternalSecret?: KubernetesManifest;
dataBucket: s3.Bucket;
bedrockUser: iam.User;
bedrockAccessKey: iam.CfnAccessKey;
serviceAccount: eks.ServiceAccount;
constructor(scope: Construct, id: string, props?: cdk.NestedStackProps) {
super(scope, id, props);
const kubernetesVersion = eks.KubernetesVersion.V1_32;
let albControllerVersion = eks.AlbControllerVersion.V2_8_2;
// Create a VPC for the EKS cluster
const vpc = new ec2.Vpc(this, 'IdeServicesVpc', {
maxAzs: 2,
natGateways: 1,
});
// Get instance type configuration from context or use defaults
const eksDefaultCapacity = this.node.tryGetContext('eksDefaultCapacity') || 2;
const eksInstanceClass = this.node.tryGetContext('eksInstanceClass') || 'C6G';
const eksInstanceSize = this.node.tryGetContext('eksInstanceSize') || 'MEDIUM';
// Create the EKS cluster
this.cluster = new eks.Cluster(this, 'IdeServicesCluster', {
version: kubernetesVersion,
albController: {
version: albControllerVersion
},
vpc: vpc,
defaultCapacity: eksDefaultCapacity,
defaultCapacityInstance: ec2.InstanceType.of(
ec2.InstanceClass[eksInstanceClass as keyof typeof ec2.InstanceClass],
ec2.InstanceSize[eksInstanceSize as keyof typeof ec2.InstanceSize]
),
kubectlLayer: new KubectlV32Layer(this, 'kubectl'),
authenticationMode: eks.AuthenticationMode.API_AND_CONFIG_MAP,
bootstrapClusterCreatorAdminPermissions: true,
// Enable CloudWatch logging
clusterLogging: [
eks.ClusterLoggingTypes.API,
eks.ClusterLoggingTypes.AUDIT,
eks.ClusterLoggingTypes.AUTHENTICATOR,
eks.ClusterLoggingTypes.CONTROLLER_MANAGER,
eks.ClusterLoggingTypes.SCHEDULER,
],
});
// Add a GPU node group if useMellum is enabled
const useMellum = this.node.tryGetContext('useMellum') || false;
if (useMellum) {
const gpuNodeGroupMinSize = this.node.tryGetContext('gpuNodeGroupMinSize') || 1;
const gpuNodeGroupMaxSize = this.node.tryGetContext('gpuNodeGroupMaxSize') || 2;
const gpuNodeGroupInstanceClass = this.node.tryGetContext('gpuNodeGroupInstanceClass') || 'G6';
const gpuNodeGroupInstanceSize = this.node.tryGetContext('gpuNodeGroupInstanceSize') || 'XLARGE';
let gpuNodeGroup = this.cluster.addNodegroupCapacity('IdeServicesGpuNodeGroup', {
instanceTypes: [ec2.InstanceType.of(
ec2.InstanceClass[gpuNodeGroupInstanceClass as keyof typeof ec2.InstanceClass],
ec2.InstanceSize[gpuNodeGroupInstanceSize as keyof typeof ec2.InstanceSize]
)],
minSize: gpuNodeGroupMinSize,
maxSize: gpuNodeGroupMaxSize,
diskSize: 100,
labels: {
'node-type': 'gpu',
'accelerator': 'gpu'
},
taints: [
{
effect: eks.TaintEffect.NO_SCHEDULE,
key: 'nvidia.com/gpu',
value: 'true'
}
]
});
const nvidiaDevicePlugin = this.cluster.addManifest('NvidiaGpuDevicePlugin', {
apiVersion: 'apps/v1',
kind: 'DaemonSet',
metadata: {name: 'nvidia-device-plugin-daemonset', namespace: 'kube-system'},
spec: {
selector: {matchLabels: {name: 'nvidia-device-plugin-ds'}},
template: {
metadata: {labels: {name: 'nvidia-device-plugin-ds'}},
spec: {
priorityClassName: 'system-node-critical',
nodeSelector: {'accelerator': 'gpu'},
tolerations: [
{key: 'nvidia.com/gpu', operator: 'Equal', value: 'true', effect: 'NoSchedule'}
],
containers: [{
name: 'nvidia-dp-cntr',
image: 'nvcr.io/nvidia/k8s-device-plugin:v0.16.2',
args: ['--fail-on-init-error=false'],
securityContext: {privileged: true},
volumeMounts: [{name: 'device-plugin', mountPath: '/var/lib/kubelet/device-plugins'}]
}],
volumes: [{name: 'device-plugin', hostPath: {path: '/var/lib/kubelet/device-plugins'}}]
}
}
}
});
nvidiaDevicePlugin.node.addDependency(gpuNodeGroup);
}
const prefixListId = this.getCloudFrontPrefixList()
this.cluster.clusterSecurityGroup.addIngressRule(
ec2.Peer.prefixList(prefixListId.toString()),
ec2.Port.tcp(80),
'Allow HTTP from CloudFront'
);
// Add this after the cluster is created, before creating the service account
// Create the namespace for IDE services
const namespace = this.cluster.addManifest('IdeServicesNamespace', {
apiVersion: 'v1',
kind: 'Namespace',
metadata: {
name: 'kube-ide-services',
},
});
// Create a service account for the application
this.serviceAccount = this.cluster.addServiceAccount('IdeServicesServiceAccount', {
name: 'ide-services-sa',
namespace: 'kube-ide-services',
});
// Attach CloudWatchAgentServerPolicy to the service account
this.serviceAccount.role.addManagedPolicy(
iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy')
);
// Make sure the service account depends on the namespace
this.serviceAccount.node.addDependency(namespace);
// Enable cluster autoscaler with IRSA
const metricsAddon = new eks.CfnAddon(this, 'metrics-server', {
clusterName: this.cluster.clusterName,
addonName: 'metrics-server',
addonVersion: 'v0.7.2-eksbuild.3',
serviceAccountRoleArn: this.serviceAccount.role.roleArn,
resolveConflicts: 'OVERWRITE',
});
metricsAddon.node.addDependency(this.cluster);
// Enable CloudWatch Container Insights
const cloudwatchAddon = new eks.CfnAddon(this, 'cloudwatch-observability', {
clusterName: this.cluster.clusterName,
addonName: 'amazon-cloudwatch-observability',
addonVersion: 'v4.0.1-eksbuild.1',
serviceAccountRoleArn: this.serviceAccount.role.roleArn,
resolveConflicts: 'OVERWRITE',
});
cloudwatchAddon.node.addDependency(this.cluster);
// Install External Secrets Operator
const externalSecretsChart = this.cluster.addHelmChart('ExternalSecretsOperator', {
chart: 'external-secrets',
repository: 'https://charts.external-secrets.io',
namespace: 'external-secrets',
release: 'external-secrets',
createNamespace: true,
wait: true,
timeout: cdk.Duration.minutes(15),
values: {
// Ensure webhook is enabled
webhook: {
create: true
}
}
});
// Create a security group for the RDS instance
const dbSecurityGroup = new ec2.SecurityGroup(this, 'DatabaseSecurityGroup', {
vpc,
description: 'Security group for IDE Services PostgreSQL database',
allowAllOutbound: true,
});
// Allow inbound traffic from the EKS cluster to the RDS instance
dbSecurityGroup.addIngressRule(
ec2.Peer.securityGroupId(this.cluster.clusterSecurityGroup.securityGroupId),
ec2.Port.tcp(5432),
'Allow PostgreSQL access from EKS cluster'
);
// Get DB instance type configuration from context or use defaults
const dbInstanceClass = this.node.tryGetContext('dbInstanceClass') || 'C6G';
const dbInstanceSize = this.node.tryGetContext('dbInstanceSize') || 'MEDIUM';
// Create a PostgreSQL RDS instance
const dbInstance = new rds.DatabaseInstance(this, 'IdeServicesDatabase', {
engine: rds.DatabaseInstanceEngine.postgres({
version: rds.PostgresEngineVersion.VER_17_4,
}),
instanceType: ec2.InstanceType.of(
ec2.InstanceClass[dbInstanceClass as keyof typeof ec2.InstanceClass],
ec2.InstanceSize[dbInstanceSize as keyof typeof ec2.InstanceSize]
),
vpc,
vpcSubnets: {
subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS,
},
securityGroups: [dbSecurityGroup],
allocatedStorage: 20,
storageType: rds.StorageType.GP3,
databaseName: 'ideservices',
credentials: rds.Credentials.fromGeneratedSecret('postgres'), // Auto-generate and store credentials in Secrets Manager
multiAz: true, // Enable Multi-AZ for high availability
backupRetention: cdk.Duration.days(7), // Retain backups for 7 days
deletionProtection: false, // Enable deletion protection
removalPolicy: cdk.RemovalPolicy.SNAPSHOT, // Create a snapshot before deleting the database
parameters: {
'rds.force_ssl': '0', // Allow non-SSL connections
'password_encryption': 'md5', // Use MD5 password encryption (more compatible)
},
});
// Create an S3 bucket for general-purpose storage
this.dataBucket = new s3.Bucket(this, 'IdeServicesDataBucket', {
versioned: true,
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
enforceSSL: true,
removalPolicy: cdk.RemovalPolicy.RETAIN, // Retain the bucket when the stack is deleted
});
// Grant the External Secrets service account access to the RDS secret
dbInstance.secret?.grantRead(this.serviceAccount);
// Grant permissions to access the secret
const secretPolicy = new iam.PolicyStatement({
actions: [
'secretsmanager:GetResourcePolicy',
'secretsmanager:GetSecretValue',
'secretsmanager:DescribeSecret',
'secretsmanager:ListSecretVersionIds'
],
resources: [dbInstance.secret!.secretArn]
});
this.serviceAccount.addToPrincipalPolicy(secretPolicy);
// Grant the EKS service account role access to the RDS instance
dbInstance.grantConnect(this.serviceAccount);
// Grant the EKS service account role access to the S3 buckets
this.dataBucket.grantReadWrite(this.serviceAccount);
// Create a SecretStore to access AWS Secrets Manager
const secretStore = this.cluster.addManifest('AWSSecretStore', {
apiVersion: 'external-secrets.io/v1',
kind: 'SecretStore',
metadata: {
name: 'aws-secretstore',
namespace: 'kube-ide-services',
},
spec: {
provider: {
aws: {
service: 'SecretsManager',
region: this.region,
auth: {
jwt: {
serviceAccountRef: {
name: 'ide-services-sa',
namespace: 'kube-ide-services',
},
},
},
},
},
},
});
// Explicitly add dependency on the Helm chart and SA
secretStore.node.addDependency(namespace);
secretStore.node.addDependency(externalSecretsChart);
secretStore.node.addDependency(this.serviceAccount);
// Create an ExternalSecret to fetch RDS credentials
this.externalSecret = this.cluster.addManifest('RDSExternalSecret', {
apiVersion: 'external-secrets.io/v1',
kind: 'ExternalSecret',
metadata: {
name: 'rds-credentials',
namespace: 'kube-ide-services',
},
spec: {
refreshInterval: '1h',
secretStoreRef: {
name: 'aws-secretstore',
kind: 'SecretStore',
},
target: {
name: 'ide-services-rds-credentials',
creationPolicy: 'Owner',
},
data: [
{
secretKey: 'host',
remoteRef: {
key: dbInstance.secret?.secretName || '',
property: 'host',
},
},
{
secretKey: 'database',
remoteRef: {
key: dbInstance.secret?.secretName || '',
property: 'dbname',
},
},
{
secretKey: 'user',
remoteRef: {
key: dbInstance.secret?.secretName || '',
property: 'username',
},
},
{
secretKey: 'password',
remoteRef: {
key: dbInstance.secret?.secretName || '',
property: 'password',
},
},
],
},
});
// Ensure the ExternalSecret is created after the SecretStore
this.externalSecret.node.addDependency(namespace);
this.externalSecret.node.addDependency(secretStore);
// Create mellum secrets if mellum is enabled
if (useMellum) {
const mellumSecret = this.generateAndStoreRsaKeys();
// Grant the External Secrets service account access to the mellum secret
mellumSecret.grantRead(this.serviceAccount);
// Add mellum secret policy
const mellumSecretPolicy = new iam.PolicyStatement({
actions: [
'secretsmanager:GetResourcePolicy',
'secretsmanager:GetSecretValue',
'secretsmanager:DescribeSecret',
'secretsmanager:ListSecretVersionIds'
],
resources: [mellumSecret.secretArn]
});
this.serviceAccount.addToPrincipalPolicy(mellumSecretPolicy);
// Create an ExternalSecret to fetch mellum RSA keys
this.mellumExternalSecret = this.cluster.addManifest('MellumExternalSecret', {
apiVersion: 'external-secrets.io/v1',
kind: 'ExternalSecret',
metadata: {
name: 'mellum-rsa-keys',
namespace: 'kube-ide-services',
},
spec: {
refreshInterval: '1h',
secretStoreRef: {
name: 'aws-secretstore',
kind: 'SecretStore',
},
target: {
name: 'ide-services-mellum-credentials',
creationPolicy: 'Owner',
},
data: [
{
secretKey: 'jwtPublicKey',
remoteRef: {
key: mellumSecret.secretName,
property: 'public_key',
},
},
{
secretKey: 'jwtPrivateKey',
remoteRef: {
key: mellumSecret.secretName,
property: 'private_key',
},
},
],
},
});
// Ensure the ExternalSecret is created after the SecretStore
this.mellumExternalSecret.node.addDependency(namespace);
this.mellumExternalSecret.node.addDependency(secretStore);
// Create a secret in AWS Secrets Manager for mellum token
const mellumTokenSecret = new secretsmanager.Secret(this, 'MellumTokenSecret', {
secretName: `ide-services-mellum-token-${this.stackName}`,
description: 'Mellum token for registry pull secrets',
secretStringValue: cdk.SecretValue.unsafePlainText(
this.node.tryGetContext('mellumToken') || ''
)
});
// Grant the External Secrets service account access to the mellum token secret
mellumTokenSecret.grantRead(this.serviceAccount);
// Add mellum token secret policy
const mellumTokenSecretPolicy = new iam.PolicyStatement({
actions: [
'secretsmanager:GetResourcePolicy',
'secretsmanager:GetSecretValue',
'secretsmanager:DescribeSecret',
'secretsmanager:ListSecretVersionIds'
],
resources: [mellumTokenSecret.secretArn]
});
this.serviceAccount.addToPrincipalPolicy(mellumTokenSecretPolicy);
// Create an ExternalSecret for mellum registry pull secrets
const mellumPullSecretsExternalSecret = this.cluster.addManifest('MellumPullSecretsExternalSecret', {
apiVersion: 'external-secrets.io/v1',
kind: 'ExternalSecret',
metadata: {
name: 'mellum-pull-secrets',
namespace: 'kube-ide-services',
},
spec: {
refreshInterval: '1h',
secretStoreRef: {
name: 'aws-secretstore',
kind: 'SecretStore',
},
target: {
name: 'ide-services-medium-registry-credentials',
creationPolicy: 'Owner',
template: {
type: 'kubernetes.io/dockerconfigjson',
data: {
'.dockerconfigjson': `{"auths":{"docker.io":{"auth":"{{ print "mellum:" .token | b64enc }}"}}}`
}
}
},
data: [
{
secretKey: 'token',
remoteRef: {
key: mellumTokenSecret.secretName,
},
},
],
},
});
// Ensure the ExternalSecret is created after the SecretStore
mellumPullSecretsExternalSecret.node.addDependency(namespace);
mellumPullSecretsExternalSecret.node.addDependency(secretStore);
}
// Create an IAM policy for Bedrock access
const bedrockPolicy = new iam.PolicyStatement({
effect: iam.Effect.ALLOW,
actions: [
'bedrock:InvokeModel',
'bedrock:InvokeModelWithResponseStream',
],
resources: [
// Claude 3.5 Sonnet v2
`arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-3-5-sonnet-20240620-v2:0`,
// Claude 3.5 Haiku
`arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-3-5-haiku-20240307-v1:0`,
],
});
// Create an IAM policy for Bedrock read-only access
const bedrockReadOnlyPolicy = new iam.PolicyStatement({
effect: iam.Effect.ALLOW,
actions: [
'bedrock:GetFoundationModel',
'bedrock:ListFoundationModels',
'bedrock:InvokeModel',
'bedrock:InvokeModelWithResponseStream'
],
resources: ['*'],
});
// Create an IAM user for Bedrock access
this.bedrockUser = new iam.User(this, 'BedrockUser', {
userName: `ide-services-bedrock-user-${id}`,
});
// Attach the Bedrock policies to the user
this.bedrockUser.addToPrincipalPolicy(bedrockPolicy);
this.bedrockUser.addToPrincipalPolicy(bedrockReadOnlyPolicy);
// Create access key for the Bedrock user
this.bedrockAccessKey = new iam.CfnAccessKey(this, 'BedrockUserAccessKey', {
userName: this.bedrockUser.userName,
});
// Allow writing logs to CloudWatch
const cloudWatchPolicy = new iam.PolicyStatement({
actions: [
'logs:PutLogEvents',
'logs:CreateLogStream',
'logs:DescribeLogStreams',
'logs:DescribeLogGroups',
'logs:CreateLogGroup',
],
resources: ['*'], // Replace '*' with the specific ARN for the log group if you know it
});
this.serviceAccount.addToPrincipalPolicy(cloudWatchPolicy);
}
private generateAndStoreRsaKeys() {
// Create the Lambda function for RSA generation using Node.js runtime
const rsaGeneratorFunction = new lambda.Function(this, 'RSAGeneratorFunction', {
runtime: lambda.Runtime.NODEJS_18_X,
handler: 'index.handler',
timeout: cdk.Duration.minutes(5),
code: lambda.Code.fromInline(`
const crypto = require('crypto');
exports.handler = async (event, context) => {
console.log('Event:', JSON.stringify(event, null, 2));
try {
const requestType = event.RequestType || 'Create';
if (requestType === 'Create' || requestType === 'Update') {
// Generate RSA key pair
const { publicKey, privateKey } = crypto.generateKeyPairSync('rsa', {
modulusLength: 2048,
publicKeyEncoding: {
type: 'spki',
format: 'pem'
},
privateKeyEncoding: {
type: 'pkcs8',
format: 'pem'
}
});
console.log('RSA key pair generated successfully');
return {
PhysicalResourceId: 'RSAKeyPair-' + Date.now(),
Data: {
public_key: publicKey,
private_key: privateKey
}
};
} else {
// For Delete operations, just return success
return {
PhysicalResourceId: event.PhysicalResourceId || 'RSAKeyPair'
};
}
} catch (error) {
console.error('Error generating RSA keys:', error);
throw new Error(\`Failed to generate RSA keys: \${error.message}\`);
}
};
`),
});
// Create a custom resource to generate RSA key pair using the Lambda function
const rsaGeneratorProvider = new cr.Provider(this, 'RSAGeneratorProvider', {
onEventHandler: rsaGeneratorFunction,
});
const generateRSAKeys = new cdk.CustomResource(this, 'GenerateRSAKeys', {
serviceToken: rsaGeneratorProvider.serviceToken,
properties: {
// Force update by adding timestamp
Timestamp: Date.now().toString()
}
});
// Create a secret in AWS Secrets Manager for mellum RSA keys with generated values
const mellumSecret = new secretsmanager.Secret(this, 'MellumRSAKeys', {
secretName: `ide-services-mellum-rsa-keys-${this.stackName}`,
description: 'RSA public and private keys for Mellum authentication',
secretObjectValue: {
public_key: cdk.SecretValue.unsafePlainText(generateRSAKeys.getAtt('public_key').toString()),
private_key: cdk.SecretValue.unsafePlainText(generateRSAKeys.getAtt('private_key').toString())
}
});
return mellumSecret;
}
private getCloudFrontPrefixList(): string {
// Create a custom resource to fetch the CloudFront managed prefix list
const describePrefixLists = new cr.AwsCustomResource(this, 'DescribePrefixLists', {
onUpdate: {
// The AWS service and API call we want to make
service: 'EC2',
action: 'describeManagedPrefixLists',
// Parameters for the API call
parameters: {
Filters: [
{
Name: 'owner-id',
Values: ['AWS']
},
{
Name: 'prefix-list-name',
Values: ['com.amazonaws.global.cloudfront.origin-facing']
}
]
},
// What to extract from the response
physicalResourceId: cr.PhysicalResourceId.of('CloudFrontPrefixList'),
},
// Policy that allows the custom resource to call the API
policy: cr.AwsCustomResourcePolicy.fromSdkCalls({
resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE,
}),
});
// Extract the prefix list ID from the custom resource result
return describePrefixLists.getResponseField('PrefixLists.0.PrefixListId');
}
}