terraform/eks/daemon/entity/main.tf (327 lines of code) (raw):

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT module "common" { source = "../../../common" } module "basic_components" { source = "../../../basic_components" region = var.region } locals { aws_eks = "aws eks --region ${var.region}" } data "aws_eks_cluster_auth" "this" { name = aws_eks_cluster.this.name } resource "aws_eks_cluster" "this" { name = "cwagent-eks-integ-${module.common.testing_id}" role_arn = module.basic_components.role_arn version = var.k8s_version vpc_config { subnet_ids = module.basic_components.public_subnet_ids security_group_ids = [module.basic_components.security_group] } } # EKS Node Groups resource "aws_eks_node_group" "this" { cluster_name = aws_eks_cluster.this.name node_group_name = "cwagent-eks-integ-node-${module.common.testing_id}" node_role_arn = aws_iam_role.node_role.arn subnet_ids = module.basic_components.public_subnet_ids scaling_config { desired_size = 1 max_size = 1 min_size = 1 } ami_type = var.ami_type capacity_type = "ON_DEMAND" disk_size = 20 instance_types = [var.instance_type] depends_on = [ aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly, aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy, aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy, aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy, aws_iam_role_policy_attachment.node_AWSXRayDaemonWriteAccess ] } # EKS Node IAM Role resource "aws_iam_role" "node_role" { name = "cwagent-eks-Worker-Role-${module.common.testing_id}" assume_role_policy = <<POLICY { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ec2.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } POLICY } resource "aws_iam_role_policy_attachment" "node_AmazonEKSWorkerNodePolicy" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" role = aws_iam_role.node_role.name } resource "aws_iam_role_policy_attachment" "node_AmazonEKS_CNI_Policy" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" role = aws_iam_role.node_role.name } resource "aws_iam_role_policy_attachment" "node_AmazonEC2ContainerRegistryReadOnly" { policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" role = aws_iam_role.node_role.name } resource "aws_iam_role_policy_attachment" "node_CloudWatchAgentServerPolicy" { policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" role = aws_iam_role.node_role.name } resource "aws_iam_role_policy_attachment" "node_AWSXRayDaemonWriteAccess" { policy_arn = "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess" role = aws_iam_role.node_role.name } # TODO: these security groups be created once and then reused # EKS Cluster Security Group resource "aws_security_group" "eks_cluster_sg" { name = "cwagent-eks-cluster-sg-${module.common.testing_id}" description = "Cluster communication with worker nodes" vpc_id = module.basic_components.vpc_id } resource "aws_security_group_rule" "cluster_inbound" { description = "Allow worker nodes to communicate with the cluster API Server" from_port = 443 protocol = "tcp" security_group_id = aws_security_group.eks_cluster_sg.id source_security_group_id = aws_security_group.eks_nodes_sg.id to_port = 443 type = "ingress" } resource "aws_security_group_rule" "cluster_outbound" { description = "Allow cluster API Server to communicate with the worker nodes" from_port = 1024 protocol = "tcp" security_group_id = aws_security_group.eks_cluster_sg.id source_security_group_id = aws_security_group.eks_nodes_sg.id to_port = 65535 type = "egress" } # EKS Node Security Group resource "aws_security_group" "eks_nodes_sg" { name = "cwagent-eks-node-sg-${module.common.testing_id}" description = "Security group for all nodes in the cluster" vpc_id = module.basic_components.vpc_id egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } resource "aws_security_group_rule" "nodes_internal" { description = "Allow nodes to communicate with each other" from_port = 0 protocol = "-1" security_group_id = aws_security_group.eks_nodes_sg.id source_security_group_id = aws_security_group.eks_nodes_sg.id to_port = 65535 type = "ingress" } resource "aws_security_group_rule" "nodes_cluster_inbound" { description = "Allow worker Kubelets and pods to receive communication from the cluster control plane" from_port = 1025 protocol = "tcp" security_group_id = aws_security_group.eks_nodes_sg.id source_security_group_id = aws_security_group.eks_cluster_sg.id to_port = 65535 type = "ingress" } resource "null_resource" "clone_helm_chart" { triggers = { timestamp = "${timestamp()}" # Forces re-run on every apply } provisioner "local-exec" { command = <<-EOT if [ ! -d "./helm-charts" ]; then git clone -b ${var.helm_chart_branch} https://github.com/aws-observability/helm-charts.git ./helm-charts fi EOT } } resource "helm_release" "aws_observability" { name = "amazon-cloudwatch-observability" chart = "./helm-charts/charts/amazon-cloudwatch-observability" namespace = "amazon-cloudwatch" create_namespace = true set { name = "clusterName" value = aws_eks_cluster.this.name } set { name = "region" value = "us-west-2" } depends_on = [ aws_eks_cluster.this, aws_eks_node_group.this, null_resource.clone_helm_chart] } resource "null_resource" "kubectl" { depends_on = [ aws_eks_cluster.this, aws_eks_node_group.this, ] provisioner "local-exec" { command = <<-EOT ${local.aws_eks} update-kubeconfig --name ${aws_eks_cluster.this.name} ${local.aws_eks} list-clusters --output text ${local.aws_eks} describe-cluster --name ${aws_eks_cluster.this.name} --output text EOT } } resource "null_resource" "update_image" { depends_on = [helm_release.aws_observability, null_resource.kubectl] triggers = { timestamp = "${timestamp()}" # Forces re-run on every apply } provisioner "local-exec" { command = <<-EOT kubectl -n amazon-cloudwatch patch AmazonCloudWatchAgent cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${var.cwagent_image_repo}:${var.cwagent_image_tag}"}]' kubectl set image deployment/amazon-cloudwatch-observability-controller-manager -n amazon-cloudwatch manager=public.ecr.aws/cloudwatch-agent/cloudwatch-agent-operator:latest sleep 10 EOT } } resource "kubernetes_pod" "log_generator" { depends_on = [aws_eks_node_group.this] metadata { name = "log-generator" namespace = "default" } spec { container { name = "log-generator" image = "busybox" # Run shell script that generate a log line every second command = ["/bin/sh", "-c"] args = ["while true; do echo \"Log entry at $(date)\"; sleep 1; done"] } restart_policy = "Always" } } resource "kubernetes_pod" "petclinic_instrumentation" { depends_on = [aws_eks_node_group.this, helm_release.aws_observability, null_resource.update_image] metadata { name = "petclinic-instrumentation-default-env" annotations = { "instrumentation.opentelemetry.io/inject-java" = "true" } labels = { app = "petclinic" } } spec { container { name = "petclinic" image = "506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integ-test-petclinic:latest" port { container_port = 8080 } env { name = "OTEL_SERVICE_NAME" value = "petclinic-custom-service-name" } } } } resource "kubernetes_pod" "petclinic_custom_env" { depends_on = [aws_eks_node_group.this, helm_release.aws_observability, null_resource.update_image] metadata { name = "petclinic-instrumentation-custom-env" annotations = { "instrumentation.opentelemetry.io/inject-java" = "true" } labels = { app = "petclinic" } } spec { container { name = "petclinic" image = "506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integ-test-petclinic:latest" port { container_port = 8080 } env { name = "OTEL_SERVICE_NAME" value = "petclinic-custom-service-name" } env { name = "OTEL_RESOURCE_ATTRIBUTES" value = "deployment.environment=petclinic-custom-environment" } } } } # Traffic generator pod with bash command resource "kubernetes_pod" "traffic_generator_instrumentation" { depends_on = [kubernetes_pod.petclinic_instrumentation, kubernetes_pod.petclinic_custom_env, kubernetes_service.petclinic_service] metadata { name = "traffic-generator-instrumentation-default-env" } spec { container { name = "traffic-generator" image = "alpine" # Run the curl command as a loop to repeatedly send requests command = ["/bin/sh", "-c"] args = [ "apk add --no-cache curl && while true; do curl -s http://petclinic-service:8080/client-call; sleep 1; done" ] } } } # Service for Petclinic Pods to load-balance traffic resource "kubernetes_service" "petclinic_service" { metadata { name = "petclinic-service" } spec { selector = { app = "petclinic" } port { port = 8080 target_port = 8080 } } } # Get the single instance ID of the node in the node group data "aws_instances" "eks_node" { depends_on = [ aws_eks_node_group.this ] filter { name = "tag:eks:nodegroup-name" values = [aws_eks_node_group.this.node_group_name] } } # Retrieve details of the single instance to get private DNS data "aws_instance" "eks_node_detail" { depends_on = [ data.aws_instances.eks_node ] instance_id = data.aws_instances.eks_node.ids[0] } resource "null_resource" "validator" { depends_on = [ aws_eks_node_group.this, helm_release.aws_observability, null_resource.update_image, kubernetes_pod.log_generator, kubernetes_pod.petclinic_instrumentation, kubernetes_pod.petclinic_custom_env, kubernetes_pod.traffic_generator_instrumentation ] triggers = { always_run = timestamp() } provisioner "local-exec" { command = <<-EOT echo "Validating EKS logs for entity fields" cd ../../../.. go test ${var.test_dir} -timeout 1h -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON -instanceId=${data.aws_instance.eks_node_detail.instance_id} EOT } }