in core/src/emr-eks-platform/emr-eks-cluster.ts [112:361]
private constructor(scope: Construct, id: string, props: EmrEksClusterProps) {
super(scope, id);
this.clusterName = props.eksClusterName ?? EmrEksCluster.DEFAULT_CLUSTER_NAME;
// create an Amazon EKS CLuster with default parameters if not provided in the properties
if ( props.eksVpcAttributes != undefined) {
this.eksVpc = Vpc.fromVpcAttributes(this, 'eksProvidedVpc', props.eksVpcAttributes);
this.eksCluster = new Cluster(scope, `${this.clusterName}Cluster`, {
defaultCapacity: 0,
clusterName: this.clusterName,
version: props.kubernetesVersion || EmrEksCluster.DEFAULT_EKS_VERSION,
vpc: this.eksVpc,
});
} else {
this.eksCluster = new Cluster(scope, `${this.clusterName}Cluster`, {
defaultCapacity: 0,
clusterName: this.clusterName,
version: props.kubernetesVersion || EmrEksCluster.DEFAULT_EKS_VERSION,
});
}
// Add the provided Amazon IAM Role as Amazon EKS Admin
this.eksCluster.awsAuth.addMastersRole(Role.fromRoleArn( this, 'AdminRole', props.eksAdminRoleArn ), 'AdminRole');
// Create a Kubernetes Service Account for the Cluster Autoscaler with Amazon IAM Role
const AutoscalerServiceAccount = this.eksCluster.addServiceAccount('Autoscaler', {
name: 'cluster-autoscaler',
namespace: 'kube-system',
});
// Add the proper Amazon IAM Policy to the Amazon IAM Role for the Cluster Autoscaler
AutoscalerServiceAccount.addToPrincipalPolicy(
EmrEksCluster.AUTOSCALING_POLICY,
);
// @todo: check if we can create the service account from the Helm Chart
// @todo: check if there's a workaround to run it with wait:true - at the moment the custom resource times out if you do that.
// Deploy the Helm Chart for Kubernetes Cluster Autoscaler
this.eksCluster.addHelmChart('AutoScaler', {
chart: 'cluster-autoscaler',
repository: 'https://kubernetes.github.io/autoscaler',
namespace: 'kube-system',
timeout: Duration.minutes(14),
values: {
cloudProvider: 'aws',
awsRegion: Stack.of(this).region,
autoDiscovery: { clusterName: this.clusterName },
rbac: {
serviceAccount: {
name: 'cluster-autoscaler',
create: false,
},
},
extraArgs: {
'skip-nodes-with-local-storage': false,
'scan-interval': '5s',
'expander': 'least-waste',
'balance-similar-node-groups': true,
'skip-nodes-with-system-pods': false,
},
},
});
// Tags the Amazon VPC and Subnets of the Amazon EKS Cluster
Tags.of(this.eksCluster.vpc).add(
'for-use-with-amazon-emr-managed-policies',
'true',
);
this.eksCluster.vpc.privateSubnets.forEach((subnet) =>
Tags.of(subnet).add('for-use-with-amazon-emr-managed-policies', 'true'),
);
this.eksCluster.vpc.publicSubnets.forEach((subnet) =>
Tags.of(subnet).add('for-use-with-amazon-emr-managed-policies', 'true'),
);
// Create Amazon IAM ServiceLinkedRole for Amazon EMR and add to kubernetes configmap
// required to add a dependency on the Amazon EMR virtual cluster
this.emrServiceRole = new CfnServiceLinkedRole(this, 'EmrServiceRole', {
awsServiceName: 'emr-containers.amazonaws.com',
});
this.eksCluster.awsAuth.addMastersRole(
Role.fromRoleArn(
this,
'ServiceRoleForAmazonEMRContainers',
`arn:aws:iam::${
Stack.of(this).account
}:role/AWSServiceRoleForAmazonEMRContainers`,
),
'emr-containers',
);
// store the OIDC provider for creating execution roles later
this.eksOidcProvider = new FederatedPrincipal(
this.eksCluster.openIdConnectProvider.openIdConnectProviderArn,
[],
'sts:AssumeRoleWithWebIdentity',
);
// Create the custom resource provider for tagging the EC2 Auto Scaling groups
this.nodegroupAsgTagsProviderServiceToken = new EmrEksNodegroupAsgTagProvider(this, 'AsgTagProvider', {
eksClusterName: this.clusterName,
}).provider.serviceToken;
// Create the Amazon EKS Nodegroup for tooling
this.addNodegroupCapacity('tooling', EmrEksNodegroup.TOOLING_ALL);
// Create default Amazon EMR on EKS Nodegroups. This will create one Amazon EKS nodegroup per AZ
// Also create default configurations and pod templates for these nodegroups
this.addEmrEksNodegroup('criticalAll', EmrEksNodegroup.CRITICAL_ALL);
this.addEmrEksNodegroup('sharedDriver', EmrEksNodegroup.SHARED_DRIVER);
this.addEmrEksNodegroup('sharedExecutor', EmrEksNodegroup.SHARED_EXECUTOR);
// Add a nodegroup for notebooks
this.addEmrEksNodegroup('notebookDriver', EmrEksNodegroup.NOTEBOOK_DRIVER);
this.addEmrEksNodegroup('notebookExecutor', EmrEksNodegroup.NOTEBOOK_EXECUTOR);
this.addEmrEksNodegroup('notebook', EmrEksNodegroup.NOTEBOOK_WITHOUT_PODTEMPLATE);
// Create an Amazon S3 Bucket for default podTemplate assets
this.assetBucket = SingletonBucket.getOrCreate(this, `${this.clusterName.toLowerCase()}-emr-eks-assets`);
// Configure the podTemplate location
this.podTemplateLocation = {
bucketName: this.assetBucket.bucketName,
objectKey: `${this.clusterName}/pod-template`,
};
// Upload the default podTemplate to the Amazon S3 asset bucket
this.uploadPodTemplate('defaultPodTemplates', join(__dirname, 'resources/k8s/pod-template'));
// Replace the pod template location for driver and executor with the correct Amazon S3 path in the notebook default config
// NotebookDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-driver.yaml`);
// NotebookDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-executor.yaml`);
this.notebookDefaultConfig = JSON.stringify(NotebookDefaultConfig);
// Replace the pod template location for driver and executor with the correct Amazon S3 path in the critical default config
CriticalDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-driver.yaml`);
CriticalDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-executor.yaml`);
this.criticalDefaultConfig = JSON.stringify(CriticalDefaultConfig);
// Replace the pod template location for driver and executor with the correct Amazon S3 path in the shared default config
SharedDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-driver.yaml`);
SharedDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-executor.yaml`);
this.sharedDefaultConfig = JSON.stringify(SharedDefaultConfig);
// Deploy the Helm Chart for the Certificate Manager. Required for EMR Studio ALB.
const certManager = this.eksCluster.addHelmChart('CertManager', {
createNamespace: true,
namespace: 'cert-manager',
chart: 'cert-manager',
repository: 'https://charts.jetstack.io',
version: 'v1.4.0',
timeout: Duration.minutes(14),
});
//Create service account for ALB and install ALB
const albPolicyDocument = PolicyDocument.fromJson(IamPolicyAlb);
const albIAMPolicy = new Policy(
this,
'AWSLoadBalancerControllerIAMPolicy',
{ document: albPolicyDocument },
);
const albServiceAccount = this.eksCluster.addServiceAccount('ALB', {
name: 'aws-load-balancer-controller',
namespace: 'kube-system',
});
albIAMPolicy.attachToRole(albServiceAccount.role);
const albService = this.eksCluster.addHelmChart('ALB', {
chart: 'aws-load-balancer-controller',
repository: 'https://aws.github.io/eks-charts',
namespace: 'kube-system',
timeout: Duration.minutes(14),
values: {
clusterName: this.clusterName,
serviceAccount: {
name: 'aws-load-balancer-controller',
create: false,
},
},
});
albService.node.addDependency(albServiceAccount);
albService.node.addDependency(certManager);
// Add the kubernetes dashboard from helm chart
this.eksCluster.addHelmChart('KubernetesDashboard', {
createNamespace: true,
namespace: 'kubernetes-dashboard',
chart: 'kubernetes-dashboard',
repository: 'https://kubernetes.github.io/dashboard/',
version: 'v5.0.4',
timeout: Duration.minutes(2),
values: {
fullnameOverride: 'kubernetes-dashboard',
resources: {
limits: {
memory: '600Mi',
},
},
},
});
// Add the kubernetes dashboard service account
this.eksCluster.addManifest('kubedashboard', {
apiVersion: 'v1',
kind: 'ServiceAccount',
metadata: {
name: 'eks-admin',
namespace: 'kube-system',
},
});
// Add the kubernetes dashboard cluster role binding
this.eksCluster.addManifest('kubedashboardrolebinding', {
apiVersion: 'rbac.authorization.k8s.io/v1beta1',
kind: 'ClusterRoleBinding',
metadata: {
name: 'eks-admin',
},
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'ClusterRole',
name: 'cluster-admin',
},
subjects: [
{
kind: 'ServiceAccount',
name: 'eks-admin',
namespace: 'kube-system',
},
],
});
// Set the custom resource provider service token here to avoid circular dependencies
this.managedEndpointProviderServiceToken = new EmrManagedEndpointProvider(this, 'ManagedEndpointProvider').provider.serviceToken;
// Provide the Kubernetes Dashboard URL in AWS CloudFormation output
new CfnOutput(this, 'kubernetesDashboardURL', {
description: 'Access Kubernetes Dashboard via kubectl proxy and this URL',
value: 'http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:https/proxy/#/login',
});
// Provide the podTemplate location on Amazon S3
new CfnOutput(this, 'podTemplateLocation', {
description: 'Use podTemplates in Amazon EMR jobs from this Amazon S3 Location',
value: this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}`),
});
}