terraform/eks/daemon/app_signals/main.tf (472 lines of code) (raw):
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT
module "common" {
source = "../../../common"
cwagent_image_repo = var.cwagent_image_repo
cwagent_image_tag = var.cwagent_image_tag
}
module "basic_components" {
source = "../../../basic_components"
region = var.region
}
data "aws_eks_cluster_auth" "this" {
name = aws_eks_cluster.this.name
}
resource "aws_eks_cluster" "this" {
name = "cwagent-eks-integ-${module.common.testing_id}"
role_arn = module.basic_components.role_arn
version = var.k8s_version
enabled_cluster_log_types = [
"api",
"audit",
"authenticator",
"controllerManager",
"scheduler"
]
vpc_config {
subnet_ids = module.basic_components.public_subnet_ids
security_group_ids = [module.basic_components.security_group]
}
}
# EKS Node Groups
resource "aws_eks_node_group" "this" {
cluster_name = aws_eks_cluster.this.name
node_group_name = "cwagent-eks-integ-node"
node_role_arn = aws_iam_role.node_role.arn
subnet_ids = module.basic_components.public_subnet_ids
scaling_config {
desired_size = 1
max_size = 1
min_size = 1
}
ami_type = "AL2_x86_64"
capacity_type = "ON_DEMAND"
disk_size = 20
instance_types = ["t3.medium"]
depends_on = [
aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly,
aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy,
aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy,
aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy,
aws_iam_role_policy_attachment.node_AWSXRayDaemonWriteAccess
]
}
# EKS Node IAM Role
resource "aws_iam_role" "node_role" {
name = "cwagent-eks-Worker-Role-${module.common.testing_id}"
assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}
resource "aws_iam_role_policy_attachment" "node_AmazonEKSWorkerNodePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.node_role.name
}
resource "aws_iam_role_policy_attachment" "node_AmazonEKS_CNI_Policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.node_role.name
}
resource "aws_iam_role_policy_attachment" "node_AmazonEC2ContainerRegistryReadOnly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.node_role.name
}
resource "aws_iam_role_policy_attachment" "node_CloudWatchAgentServerPolicy" {
policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
role = aws_iam_role.node_role.name
}
resource "aws_iam_role_policy_attachment" "node_AWSXRayDaemonWriteAccess" {
policy_arn = "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess"
role = aws_iam_role.node_role.name
}
# TODO: these security groups be created once and then reused
# EKS Cluster Security Group
resource "aws_security_group" "eks_cluster_sg" {
name = "cwagent-eks-cluster-sg-${module.common.testing_id}"
description = "Cluster communication with worker nodes"
vpc_id = module.basic_components.vpc_id
}
resource "aws_security_group_rule" "cluster_inbound" {
description = "Allow worker nodes to communicate with the cluster API Server"
from_port = 443
protocol = "tcp"
security_group_id = aws_security_group.eks_cluster_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 443
type = "ingress"
}
resource "aws_security_group_rule" "cluster_outbound" {
description = "Allow cluster API Server to communicate with the worker nodes"
from_port = 1024
protocol = "tcp"
security_group_id = aws_security_group.eks_cluster_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 65535
type = "egress"
}
# EKS Node Security Group
resource "aws_security_group" "eks_nodes_sg" {
name = "cwagent-eks-node-sg-${module.common.testing_id}"
description = "Security group for all nodes in the cluster"
vpc_id = module.basic_components.vpc_id
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_security_group_rule" "nodes_internal" {
description = "Allow nodes to communicate with each other"
from_port = 0
protocol = "-1"
security_group_id = aws_security_group.eks_nodes_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 65535
type = "ingress"
}
resource "aws_security_group_rule" "nodes_cluster_inbound" {
description = "Allow worker Kubelets and pods to receive communication from the cluster control plane"
from_port = 1025
protocol = "tcp"
security_group_id = aws_security_group.eks_nodes_sg.id
source_security_group_id = aws_security_group.eks_cluster_sg.id
to_port = 65535
type = "ingress"
}
resource "kubernetes_namespace" "namespace" {
metadata {
name = "amazon-cloudwatch"
}
}
# TODO: how do we support different deployment types? Should they be in separate terraform
# files, and spawn separate tests?
resource "kubernetes_daemonset" "service" {
depends_on = [
kubernetes_namespace.namespace,
kubernetes_config_map.cwagentconfig,
kubernetes_service_account.cwagentservice,
aws_eks_node_group.this
]
metadata {
name = "cloudwatch-agent"
namespace = "amazon-cloudwatch"
}
spec {
selector {
match_labels = {
"name" : "cloudwatch-agent"
}
}
template {
metadata {
labels = {
"name" : "cloudwatch-agent"
}
}
spec {
node_selector = {
"kubernetes.io/os" : "linux"
}
container {
name = "cwagent"
image = "${var.cwagent_image_repo}:${var.cwagent_image_tag}"
image_pull_policy = "Always"
resources {
limits = {
"cpu" : "200m",
"memory" : "200Mi"
}
requests = {
"cpu" : "200m",
"memory" : "200Mi"
}
}
port {
container_port = 4316
host_port = 4316
protocol = "TCP"
}
env {
name = "HOST_IP"
value_from {
field_ref {
field_path = "status.hostIP"
}
}
}
env {
name = "HOST_NAME"
value_from {
field_ref {
field_path = "spec.nodeName"
}
}
}
env {
name = "K8S_NAMESPACE"
value_from {
field_ref {
field_path = "metadata.namespace"
}
}
}
volume_mount {
mount_path = "/etc/cwagentconfig"
name = "cwagentconfig"
}
volume_mount {
mount_path = "/rootfs"
name = "rootfs"
read_only = true
}
volume_mount {
mount_path = "/var/run/docker.sock"
name = "dockersock"
read_only = true
}
volume_mount {
mount_path = "/var/lib/docker"
name = "varlibdocker"
read_only = true
}
volume_mount {
mount_path = "/run/containerd/containerd.sock"
name = "containerdsock"
read_only = true
}
volume_mount {
mount_path = "/sys"
name = "sys"
read_only = true
}
volume_mount {
mount_path = "/dev/disk"
name = "devdisk"
read_only = true
}
}
volume {
name = "cwagentconfig"
config_map {
name = "cwagentconfig"
}
}
volume {
name = "rootfs"
host_path {
path = "/"
}
}
volume {
name = "dockersock"
host_path {
path = "/var/run/docker.sock"
}
}
volume {
name = "varlibdocker"
host_path {
path = "/var/lib/docker"
}
}
volume {
name = "containerdsock"
host_path {
path = "/run/containerd/containerd.sock"
}
}
volume {
name = "sys"
host_path {
path = "/sys"
}
}
volume {
name = "devdisk"
host_path {
path = "/dev/disk"
}
}
container {
name = "app-signals-client"
image = "public.ecr.aws/docker/library/golang:latest"
image_pull_policy = "Always"
resources {
limits = {
"cpu" : "50m",
"memory" : "300Mi"
}
requests = {
"cpu" : "50m",
"memory" : "300Mi"
}
}
command = [
"/bin/sh",
"-c",
"while true; echo '${data.template_file.traceid_generator.rendered}' > traceid_generator.go && chmod +x traceid_generator.go; export START_TIME=$(date +%s%N); export TRACE_ID=$(go run ./traceid_generator.go); do echo '${data.template_file.server_consumer.rendered}' | sed -e \"s/START_TIME/$START_TIME/\" > server_consumer.json; curl -H 'Content-Type: application/json' -d @server_consumer.json -i http://127.0.0.1:4316/v1/metrics --verbose; echo '${data.template_file.client_producer.rendered}' | sed -e \"s/START_TIME/$START_TIME/\" > client_producer.json; curl -H 'Content-Type: application/json' -d @client_producer.json -i http://127.0.0.1:4316/v1/metrics --verbose; echo '${data.template_file.traces.rendered}' | sed -e \"s/START_TIME/$START_TIME/\" | sed -e \"s/TRACE_ID/$TRACE_ID/\" > traces.json; curl -H 'Content-Type: application/json' -d @traces.json -i http://127.0.0.1:4316/v1/traces --verbose; sleep 1; done"
]
env {
name = "HOST_IP"
value_from {
field_ref {
field_path = "status.hostIP"
}
}
}
env {
name = "HOST_NAME"
value_from {
field_ref {
field_path = "spec.nodeName"
}
}
}
env {
name = "K8S_NAMESPACE"
value_from {
field_ref {
field_path = "metadata.namespace"
}
}
}
volume_mount {
mount_path = "/etc/cwagentconfig"
name = "cwagentconfig"
}
}
service_account_name = "cloudwatch-agent"
termination_grace_period_seconds = 60
}
}
}
}
##########################################
# Template Files
##########################################
locals {
cwagent_config = "../../../../${var.test_dir}/agent_configs/config.json"
server_consumer = "../../../../${var.test_dir}/resources/metrics/server_consumer.json"
client_producer = "../../../../${var.test_dir}/resources/metrics/client_producer.json"
traces = "../../../../${var.test_dir}/resources/traces/traces.json"
traceid_generator = "../../../../${var.test_dir}/resources/traceid_generator.go"
}
data "template_file" "cwagent_config" {
template = file(local.cwagent_config)
vars = {
}
}
resource "kubernetes_config_map" "cwagentconfig" {
depends_on = [
kubernetes_namespace.namespace,
kubernetes_service_account.cwagentservice
]
metadata {
name = "cwagentconfig"
namespace = "amazon-cloudwatch"
}
data = {
"cwagentconfig.json" : data.template_file.cwagent_config.rendered
}
}
data "template_file" "server_consumer" {
template = file(local.server_consumer)
vars = {
}
}
data "template_file" "client_producer" {
template = file(local.client_producer)
vars = {
}
}
data "template_file" "traces" {
template = file(local.traces)
vars = {
}
}
data "template_file" "traceid_generator" {
template = file(local.traceid_generator)
vars = {
}
}
resource "kubernetes_service_account" "cwagentservice" {
depends_on = [kubernetes_namespace.namespace]
metadata {
name = "cloudwatch-agent"
namespace = "amazon-cloudwatch"
}
}
resource "kubernetes_cluster_role" "clusterrole" {
depends_on = [kubernetes_namespace.namespace]
metadata {
name = "cloudwatch-agent-role"
}
rule {
verbs = ["list", "watch"]
resources = ["pods", "nodes", "endpoints", "services"]
api_groups = [""]
}
rule {
verbs = ["list", "watch"]
resources = ["replicasets"]
api_groups = ["apps"]
}
rule {
verbs = ["list", "watch"]
resources = ["jobs"]
api_groups = ["batch"]
}
rule {
verbs = ["get"]
resources = ["nodes/proxy", "configmaps"]
api_groups = [""]
}
rule {
verbs = ["create"]
resources = ["nodes/stats", "configmaps", "events"]
api_groups = [""]
}
rule {
verbs = ["get", "update"]
resource_names = ["cwagent-clusterleader"]
resources = ["configmaps"]
api_groups = [""]
}
rule {
verbs = ["list", "watch", "get"]
resources = ["endpointslices"]
api_groups = ["discovery.k8s.io"]
}
}
resource "kubernetes_cluster_role_binding" "rolebinding" {
depends_on = [kubernetes_namespace.namespace]
metadata {
name = "cloudwatch-agent-role-binding"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cloudwatch-agent-role"
}
subject {
kind = "ServiceAccount"
name = "cloudwatch-agent"
namespace = "amazon-cloudwatch"
}
}
resource "null_resource" "validator" {
depends_on = [
aws_eks_node_group.this,
kubernetes_daemonset.service,
kubernetes_cluster_role_binding.rolebinding,
kubernetes_service_account.cwagentservice,
]
provisioner "local-exec" {
command = <<-EOT
echo "Validating EKS metrics/traces for AppSignals"
cd ../../../..
go test ${var.test_dir} -timeout 1h -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON
EOT
}
}