otelcollector/shared/helpers.go (233 lines of code) (raw):

package shared import ( "fmt" "log" "os" "os/exec" "regexp" "strings" "time" ) func GetEnv(key, defaultValue string) string { value := os.Getenv(key) if value == "" { return defaultValue } return value } func GetControllerType() string { // Get CONTROLLER_TYPE environment variable controllerType := os.Getenv("CONTROLLER_TYPE") // Convert controllerType to lowercase and trim spaces controllerTypeLower := strings.ToLower(strings.TrimSpace(controllerType)) return controllerTypeLower } func IsValidRegex(input string) bool { _, err := regexp.Compile(input) return err == nil } func DetermineConfigFiles(controllerType, clusterOverride string) (string, string) { var meConfigFile, fluentBitConfigFile string switch { case strings.ToLower(controllerType) == "replicaset": fluentBitConfigFile = "/opt/fluent-bit/fluent-bit.yaml" if clusterOverride == "true" { meConfigFile = "/usr/sbin/me_internal.config" } else { meConfigFile = "/usr/sbin/me.config" } case os.Getenv("OS_TYPE") != "windows": fluentBitConfigFile = "/opt/fluent-bit/fluent-bit-daemonset.yaml" if clusterOverride == "true" { meConfigFile = "/usr/sbin/me_ds_internal.config" } else { meConfigFile = "/usr/sbin/me_ds.config" } default: fluentBitConfigFile = "/opt/fluent-bit/fluent-bit-windows.conf" if clusterOverride == "true" { meConfigFile = "/opt/metricextension/me_ds_internal_win.config" } else { meConfigFile = "/opt/metricextension/me_ds_win.config" } } return meConfigFile, fluentBitConfigFile } func LogVersionInfo() { if meVersion, err := ReadVersionFile("/opt/metricsextversion.txt"); err == nil { FmtVar("ME_VERSION", meVersion) } else { log.Printf("Error reading ME version file: %v\n", err) } if golangVersion, err := ReadVersionFile("/opt/goversion.txt"); err == nil { FmtVar("GOLANG_VERSION", golangVersion) } else { log.Printf("Error reading Golang version file: %v\n", err) } if otelCollectorVersion, err := exec.Command("/opt/microsoft/otelcollector/otelcollector", "--version").Output(); err == nil { FmtVar("OTELCOLLECTOR_VERSION", string(otelCollectorVersion)) } else { log.Printf("Error getting otelcollector version: %v\n", err) } if prometheusVersion, err := ReadVersionFile("/opt/microsoft/otelcollector/PROMETHEUS_VERSION"); err == nil { FmtVar("PROMETHEUS_VERSION", prometheusVersion) } else { log.Printf("Error reading Prometheus version file: %v\n", err) } } func StartTelegraf() { fmt.Println("Starting Telegraf") if telemetryDisabled := os.Getenv("TELEMETRY_DISABLED"); telemetryDisabled != "true" { if os.Getenv("OS_TYPE") == "linux" { controllerType := os.Getenv("CONTROLLER_TYPE") azmonOperatorEnabled := os.Getenv("AZMON_OPERATOR_ENABLED") var telegrafConfig string switch { case controllerType == "ReplicaSet" && azmonOperatorEnabled == "true": telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ta-enabled.conf" case controllerType == "ReplicaSet": telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector.conf" default: telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ds.conf" } telegrafCmd := exec.Command("/usr/bin/telegraf", "--config", telegrafConfig) telegrafCmd.Stdout = os.Stdout telegrafCmd.Stderr = os.Stderr if err := telegrafCmd.Start(); err != nil { fmt.Println("Error starting telegraf:", err) return } telegrafVersion, _ := os.ReadFile("/opt/telegrafversion.txt") fmt.Printf("TELEGRAF_VERSION=%s\n", string(telegrafVersion)) } } else { telegrafPath := "C:\\opt\\telegraf\\telegraf.exe" configPath := "C:\\opt\\telegraf\\telegraf-prometheus-collector-windows.conf" // Install Telegraf service installCmd := exec.Command(telegrafPath, "--service", "install", "--config", configPath) if err := installCmd.Run(); err != nil { log.Fatalf("Error installing Telegraf service: %v\n", err) } // Set delayed start if POD_NAME is set serverName := os.Getenv("POD_NAME") if serverName != "" { setDelayCmd := exec.Command("sc.exe", fmt.Sprintf("\\\\%s", serverName), "config", "telegraf", "start= delayed-auto") if err := setDelayCmd.Run(); err != nil { log.Printf("Failed to set delayed start for Telegraf: %v\n", err) } else { fmt.Println("Successfully set delayed start for Telegraf") } } else { fmt.Println("Failed to get environment variable POD_NAME to set delayed Telegraf start") } // Run Telegraf in test mode testCmd := exec.Command(telegrafPath, "--config", configPath, "--test") testCmd.Stdout = os.Stdout testCmd.Stderr = os.Stderr if err := testCmd.Run(); err != nil { log.Printf("Error running Telegraf in test mode: %v\n", err) } // Start Telegraf service startCmd := exec.Command(telegrafPath, "--service", "start") if err := startCmd.Run(); err != nil { log.Printf("Error starting Telegraf service: %v\n", err) } // Check if Telegraf is running, retry if necessary for { statusCmd := exec.Command("sc.exe", "query", "telegraf") output, err := statusCmd.CombinedOutput() if err != nil { log.Printf("Error checking Telegraf service status: %v\n", err) time.Sleep(30 * time.Second) continue } if string(output) != "" { fmt.Println("Telegraf is running") break } fmt.Println("Trying to start Telegraf again in 30 seconds, since it might not have been ready...") time.Sleep(30 * time.Second) startCmd := exec.Command(telegrafPath, "--service", "start") if err := startCmd.Run(); err != nil { log.Printf("Error starting Telegraf service again: %v\n", err) } } } } func SetEnvVariablesForWindows() { // Set Windows version (Microsoft Windows Server 2019 Datacenter or 2022 Datacenter) out, err := exec.Command("wmic", "os", "get", "Caption").Output() if err != nil { log.Fatalf("Failed to get Windows version: %v", err) } windowsVersion := strings.TrimSpace(string(out)) windowsVersion = strings.Split(windowsVersion, "\n")[1] // Extract version name // Set environment variables for process and machine os.Setenv("windowsVersion", windowsVersion) SetEnvAndSourceBashrcOrPowershell("windowsVersion", windowsVersion, true) // Resource ID override mac := os.Getenv("MAC") cluster := os.Getenv("CLUSTER") nodeName := os.Getenv("NODE_NAME") if mac == "" { if cluster == "" { fmt.Printf("CLUSTER is empty or not set. Using %s as CLUSTER\n", nodeName) os.Setenv("customResourceId", nodeName) SetEnvAndSourceBashrcOrPowershell("customResourceId", nodeName, true) } else { os.Setenv("customResourceId", cluster) SetEnvAndSourceBashrcOrPowershell("customResourceId", cluster, true) } } else { SetEnvAndSourceBashrcOrPowershell("customResourceId", cluster, true) aksRegion := os.Getenv("AKSREGION") SetEnvAndSourceBashrcOrPowershell("customRegion", aksRegion, true) // Set variables for Telegraf SetTelegrafVariables(aksRegion, cluster) } // Set monitoring-related variables SetMonitoringVariables() // Handle custom environment settings customEnvironment := strings.ToLower(os.Getenv("customEnvironment")) mcsEndpoint, mcsGlobalEndpoint := GetMcsEndpoints(customEnvironment) // Set MCS endpoint environment variables SetEnvAndSourceBashrcOrPowershell("MCS_AZURE_RESOURCE_ENDPOINT", mcsEndpoint, true) SetEnvAndSourceBashrcOrPowershell("MCS_GLOBAL_ENDPOINT", mcsGlobalEndpoint, true) } func SetTelegrafVariables(aksRegion, cluster string) { SetEnvAndSourceBashrcOrPowershell("AKSREGION", aksRegion, true) SetEnvAndSourceBashrcOrPowershell("CLUSTER", cluster, true) azmonClusterAlias := os.Getenv("AZMON_CLUSTER_ALIAS") SetEnvAndSourceBashrcOrPowershell("AZMON_CLUSTER_ALIAS", azmonClusterAlias, true) } func SetMonitoringVariables() { SetEnvAndSourceBashrcOrPowershell("MONITORING_ROLE_INSTANCE", "cloudAgentRoleInstanceIdentity", true) SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_OsType", "Windows", true) SetEnvAndSourceBashrcOrPowershell("MONITORING_VERSION", "2.0", true) SetEnvAndSourceBashrcOrPowershell("MONITORING_ROLE", "cloudAgentRoleIdentity", true) SetEnvAndSourceBashrcOrPowershell("MONITORING_IDENTITY", "use_ip_address", true) SetEnvAndSourceBashrcOrPowershell("MONITORING_USE_GENEVA_CONFIG_SERVICE", "false", true) SetEnvAndSourceBashrcOrPowershell("SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH", "true", true) SetEnvAndSourceBashrcOrPowershell("ENABLE_MCS", "true", true) SetEnvAndSourceBashrcOrPowershell("MDSD_USE_LOCAL_PERSISTENCY", "false", true) SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_Location", os.Getenv("AKSREGION"), true) SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_ResourceId", os.Getenv("CLUSTER"), true) SetEnvAndSourceBashrcOrPowershell("MCS_CUSTOM_RESOURCE_ID", os.Getenv("CLUSTER"), true) } func GetMcsEndpoints(customEnvironment string) (string, string) { var mcsEndpoint, mcsGlobalEndpoint string switch customEnvironment { case "azurepubliccloud": aksRegion := strings.ToLower(os.Getenv("AKSREGION")) if aksRegion == "eastus2euap" || aksRegion == "centraluseuap" { mcsEndpoint = "https://monitor.azure.com/" mcsGlobalEndpoint = "https://global.handler.canary.control.monitor.azure.com" } else { mcsEndpoint = "https://monitor.azure.com/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.com" } case "azureusgovernmentcloud": mcsEndpoint = "https://monitor.azure.us/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.us" case "azurechinacloud": mcsEndpoint = "https://monitor.azure.cn/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.cn" case "usnat": mcsEndpoint = "https://monitor.azure.eaglex.ic.gov/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.eaglex.ic.gov" case "ussec": mcsEndpoint = "https://monitor.azure.microsoft.scloud/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.microsoft.scloud/" default: fmt.Printf("Unknown customEnvironment: %s, setting mcs endpoint to default azurepubliccloud values\n", customEnvironment) mcsEndpoint = "https://monitor.azure.com/" mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.com" } return mcsEndpoint, mcsGlobalEndpoint }