internal/scripts/troubleshoot/TroubleshootError.ps1 (659 lines of code) (raw):

# # TroubleshootError.ps1 # <# .DESCRIPTION Classifies the error type that a user is facing with their AKS cluster related to Managed Prometheus .PARAMETER ClusterResourceId Resource Id of the AKS (Azure Kubernetes Service) Example : AKS cluster ResourceId should be in this format : /subscriptions/<subId>/resourceGroups/<rgName>/providers/Microsoft.ContainerService/managedClusters/<clusterName> #> param( [Parameter(mandatory = $true)] [string]$ClusterResourceId ) $ErrorActionPreference = "Stop" Start-Transcript -path .\TroubleshootDump.txt -Force $AksOptOutLink = "https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-metrics-disable" $AksOptInLink = "https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-metrics-enable?tabs=azure-portal#enable-prometheus-metric-collection" $contactUSMessage = "Please contact us by creating a support ticket in Azure if you need any help. Use this link: https://azure.microsoft.com/en-us/support/create-ticket" # Create debuglogs directory if not exists $debuglogsDir = "debuglogs" if (-not (Test-Path -Path $debuglogsDir -PathType Container)) { New-Item -Path $debuglogsDir -ItemType Directory } Write-Host("ClusterResourceId: '" + $ClusterResourceId + "' ") if (($null -eq $ClusterResourceId) -or ($ClusterResourceId.Split("/").Length -ne 9) -or (($ClusterResourceId.ToLower().Contains("microsoft.containerservice/managedclusters") -ne $true)) ) { Write-Host("Provided Cluster resource id should be fully qualified resource id of AKS or ARO cluster") -ForegroundColor Red Write-Host("Resource Id Format for AKS cluster is : /subscriptions/<subId>/resourceGroups/<rgName>/providers/Microsoft.ContainerService/managedClusters/<clusterName>") -ForegroundColor Red Stop-Transcript exit 1 } $ClusterRegion = "" $ClusterType = "AKS" # # checks the all required Powershell modules exist and if not exists, request the user permission to install # $azAccountModule = Get-Module -ListAvailable -Name Az.Accounts $azResourcesModule = Get-Module -ListAvailable -Name Az.Resources $azOperationalInsights = Get-Module -ListAvailable -Name Az.OperationalInsights $azAksModule = Get-Module -ListAvailable -Name Az.Aks $azARGModule = Get-Module -ListAvailable -Name Az.ResourceGraph $azMonitorModule = Get-Module -ListAvailable -Name Az.Monitor if (($null -eq $azAksModule) -or ($null -eq $azARGModule) -or ($null -eq $azAccountModule) -or ($null -eq $azResourcesModule) -or ($null -eq $azOperationalInsights) -or ($null -eq $azMonitorModule)) { $isWindowsMachine = $true if ($PSVersionTable -and $PSVersionTable.PSEdition -contains "core") { if ($PSVersionTable.Platform -notcontains "win") { $isWindowsMachine = $false } } if ($isWindowsMachine) { $currentPrincipal = New-Object Security.Principal.WindowsPrincipal([Security.Principal.WindowsIdentity]::GetCurrent()) if ($currentPrincipal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)) { Write-Host("Running script as an admin...") Write-Host("") } else { Write-Host("Please re-launch the script with elevated administrator") -ForegroundColor Red Stop-Transcript exit 1 } } $message = "This script will try to install the latest versions of the following Modules : ` Az.Ak,Az.ResourceGraph, Az.Resources, Az.Accounts, Az.OperationalInsights and Az.Monitor using the command` `'Install-Module {Insert Module Name} -Repository PSGallery -Force -AllowClobber -ErrorAction Stop -WarningAction Stop' `If you do not have the latest version of these Modules, this troubleshooting script may not run." $question = "Do you want to Install the modules and run the script or just run the script?" $choices = New-Object Collections.ObjectModel.Collection[Management.Automation.Host.ChoiceDescription] $choices.Add((New-Object Management.Automation.Host.ChoiceDescription -ArgumentList '&Yes, Install and run')) $choices.Add((New-Object Management.Automation.Host.ChoiceDescription -ArgumentList '&Continue without installing the Module')) $choices.Add((New-Object Management.Automation.Host.ChoiceDescription -ArgumentList '&Quit')) $decision = $Host.UI.PromptForChoice($message, $question, $choices, 0) switch ($decision) { 0 { if ($null -eq $azARGModule) { try { Write-Host("Installing Az.ResourceGraph...") Install-Module Az.ResourceGraph -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules for Az.ResourceGraph in a new powershell window: eg. 'Install-Module Az.ResourceGraph -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azAksModule) { try { Write-Host("Installing Az.Aks...") Install-Module Az.Aks -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules for Az.Aks in a new powershell window: eg. 'Install-Module Az.Aks -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azResourcesModule) { try { Write-Host("Installing Az.Resources...") Install-Module Az.Resources -Repository PSGallery -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules forAz.Accounts in a new powershell window: eg. 'Install-Module Az.Accounts -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azAccountModule) { try { Write-Host("Installing Az.Accounts...") Install-Module Az.Accounts -Repository PSGallery -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules forAz.Accounts in a new powershell window: eg. 'Install-Module Az.Accounts -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azOperationalInsights) { try { Write-Host("Installing Az.OperationalInsights...") Install-Module Az.OperationalInsights -Repository PSGallery -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules for Az.OperationalInsights in a new powershell window: eg. 'Install-Module Az.OperationalInsights -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azMonitorModule) { try { Write-Host("Installing Az.Monitor...") Install-Module Az.Monitor -Repository PSGallery -Force -AllowClobber -ErrorAction Stop } catch { Write-Host("Close other powershell logins and try installing the latest modules for Az.OperationalInsights in a new powershell window: eg. 'Install-Module Az.Monitor -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } } 1 { if ($null -eq $azARGModule) { try { Import-Module Az.ResourceGraph -ErrorAction Stop } catch { Write-Host("Could not Import Az.ResourceGraph...") -ForegroundColor Red Write-Host("Close other powershell logins and try installing the latest modules for Az.ResourceGraph in a new powershell window: eg. 'Install-Module Az.ResourceGraph -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azAksModule) { try { Import-Module Az.Aks -ErrorAction Stop } catch { Write-Host("Could not Import Az.Aks...") -ForegroundColor Red Write-Host("Close other powershell logins and try installing the latest modules for Az.Aks in a new powershell window: eg. 'Install-Module Az.Aks -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azResourcesModule) { try { Import-Module Az.Resources -ErrorAction Stop } catch { Write-Host("Could not import Az.Resources...") -ForegroundColor Red Write-Host("Close other powershell logins and try installing the latest modules for Az.Resources in a new powershell window: eg. 'Install-Module Az.Resources -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azAccountModule) { try { Import-Module Az.Accounts -ErrorAction Stop } catch { Write-Host("Could not import Az.Accounts...") -ForegroundColor Red Write-Host("Close other powershell logins and try installing the latest modules for Az.Accounts in a new powershell window: eg. 'Install-Module Az.Accounts -Repository PSGallery -Force'") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azOperationalInsights) { try { Import-Module Az.OperationalInsights -ErrorAction Stop } catch { Write-Host("Could not import Az.OperationalInsights... Please reinstall this Module") -ForegroundColor Red Stop-Transcript exit 1 } } if ($null -eq $azMonitorModule) { try { Import-Module Az.Monitor -ErrorAction Stop } catch { Write-Host("Could not import Az.Monitor... Please reinstall this Module") -ForegroundColor Red Stop-Transcript exit 1 } } } 2 { Write-Host("") Stop-Transcript exit 1 } } } $ClusterSubscriptionId = $ClusterResourceId.split("/")[2] $ClusterResourceGroupName = $ClusterResourceId.split("/")[4] $ClusterName = $ClusterResourceId.split("/")[8] # # Subscription existence and access check # if ($null -eq $account.Account) { try { Write-Host("Please login...") if ($isWindowsMachine) { Login-AzAccount -subscriptionid $ClusterSubscriptionId } else { Login-AzAccount -subscriptionid $ClusterSubscriptionId -UseDeviceAuthentication } } catch { Write-Host("") Write-Host("Could not select subscription with ID : " + $ClusterSubscriptionId + ". Please make sure the SubscriptionId you entered is correct and you have access to the Subscription" ) -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } } else { Write-Host $account.Subscription.Id if ($account.Subscription.Id -eq $ClusterSubscriptionId) { Write-Host("Subscription: $ClusterSubscriptionId is already selected. Account details: ") $account } else { try { Write-Host("Current Subscription:") $account Write-Host("Changing to subscription: $ClusterSubscriptionId") Select-AzSubscription -SubscriptionId $ClusterSubscriptionId } catch { Write-Host("") Write-Host("Could not select subscription with ID : " + $ClusterSubscriptionId + ". Please make sure the SubscriptionId you entered is correct and you have access to the Subscription" ) -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } } } # # Resource group existance and access check # Write-Host("Checking resource group details...") Get-AzResourceGroup -Name $ClusterResourceGroupName -ErrorVariable notPresent -ErrorAction SilentlyContinue if ($notPresent) { Write-Host("") Write-Host("Could not find RG. Please make sure that the resource group name: '" + $ClusterResourceGroupName + "'is correct and you have access to the Resource Group") -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } Write-Host("Successfully checked resource groups details...") -ForegroundColor Green Write-Host("Checking '" + $ClusterType + "' Cluster details...") $ResourceDetailsArray = $null try { $ResourceDetailsArray = Get-AzResource -ResourceGroupName $ClusterResourceGroupName -Name $ClusterName -ResourceType "Microsoft.ContainerService/managedClusters" -ExpandProperties -ErrorAction Stop -WarningAction Stop if ($null -eq $ResourceDetailsArray) { Write-Host("") Write-Host("Could not fetch cluster details: Please make sure that the '" + $ClusterType + "' Cluster name: '" + $ClusterName + "' is correct and you have access to the cluster") -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } else { Write-Host("Successfully checked '" + $ClusterType + "' Cluster details...") -ForegroundColor Green $ClusterRegion = $ResourceDetailsArray.Location Write-Host("ClusterRegion: " + $ClusterRegion) foreach ($ResourceDetail in $ResourceDetailsArray) { if ($ResourceDetail.ResourceType -eq "Microsoft.ContainerService/managedClusters") { $azureMonitorProfile = ($ResourceDetail.Properties.azureMonitorProfile | ConvertTo-Json).toLower() | ConvertFrom-Json if (($nul -eq $azureMonitorProfile) -or ($null -eq $azureMonitorProfile.metrics) -or ($null -eq $azureMonitorProfile.metrics.enabled) -or ("true" -ne $azureMonitorProfile.metrics.enabled)) { Write-Host("Your cluster isn't onboarded to Managed Prometheus. Please refer to the following documentation to onboard:") -ForegroundColor Red; $clusterProperies = ($ResourceDetail.Properties | ConvertTo-Json) Write-Host("Cluster Properties found: " + $clusterProperies) -ForegroundColor Red; Write-Host($AksOptInLink) -ForegroundColor Red; Write-Host(""); Stop-Transcript exit 1 } Write-Host("AKS Cluster ResourceId: '" + $ResourceDetail.ResourceId + " has Managed Prometheus enabled in the AKS-RP"); break } } } } catch { Write-Host("") Write-Host("Could not fetch cluster details: Please make sure that the '" + $ClusterType + "' Cluster name: '" + $ClusterName + "' is correct and you have access to the cluster") -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } # Get all DC* objects try { $dcraList = Get-AzDataCollectionRuleAssociation -TargetResourceId $ClusterResourceId -ErrorAction Stop -WarningAction SilentlyContinue $prometheusMetricsTuples = @() foreach ($dcra in $dcraList) { # Filter out "configurationAccessEndpoint" entries if ($dcra.Name -eq "configurationAccessEndpoint") { Write-Host "Skipping configurationAccessEndpoint DCRA: $($dcra.Name)" -ForegroundColor Yellow continue } Write-Output "DCRA ID: $($dcra.Id)" Write-Output "DCRA Name: $($dcra.Name)" Write-Output "Data Collection Rule ID: $($dcra.DataCollectionRuleId)" Write-Output "Target Resource ID: $($dcra.TargetResourceId)" Write-Output "Provisioning State: $($dcra.ProvisioningState)" Write-Output "Additional Properties:" $dcra.Properties | Format-Table -AutoSize # Check if DataCollectionRuleId is not null or empty if ([string]::IsNullOrWhiteSpace($dcra.DataCollectionRuleId)) { Write-Host "Skipping DCRA with no DataCollectionRuleId: $($dcra.Name)" -ForegroundColor Yellow continue } # Get the Data Collection Rule details based on its ID $dataCollectionRule = Get-AzResource -ResourceId $dcra.DataCollectionRuleId -ErrorAction SilentlyContinue if ($null -eq $dataCollectionRule) { Write-Host "Unable to fetch Data Collection Rule details for ID: $($dcra.DataCollectionRuleId)" -ForegroundColor Yellow continue } $dataflows = $dataCollectionRule.Properties.DataFlows foreach ($dataflow in $dataflows) { $dataflowstream = $dataflow.streams if ($dataflowstream -match "Microsoft-PrometheusMetrics") { Write-Host "Microsoft-PrometheusMetrics is present in the Dataflow." $prometheusMetricsTuples += [Tuple]::Create($dcra.Id, $dcra.DataCollectionRuleId, $dataCollectionRule.Properties.destinations.monitoringAccounts.accountResourceId) } } Write-Output "--------------------------------------------------" } # Print the Tuple Write-Output "Prometheus Metrics Tuple:" $prometheusMetricsTuples # Check if the map is empty if ($prometheusMetricsTuples.Count -eq 0) { Write-Host "No entries with Microsoft-PrometheusMetrics found in the Data Collection Rule" -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } } catch { Write-Host("") Write-Host("Could not fetch DC* details. Please make sure that the '" + $ClusterType + "' Cluster name: '" + $ClusterName + "' is correct and you have access to the cluster") -ForegroundColor Red Write-Host("") Stop-Transcript exit 1 } # # Check Agent pods running as expected with or without HPA # try { Write-Host("Getting Kubeconfig of the cluster...") Import-AzAksCredential -Id $ClusterResourceId -Force -ErrorAction Stop Write-Host("Successfully got the Kubeconfig of the cluster.") Write-Host("Switching to cluster context:", $ClusterName) kubectl config use-context $ClusterName Write-Host("Successfully switched current context of the k8s cluster to:", $ClusterName) Write-Host("Checking if HPA is configured for ama-metrics deployment...") $hpa = kubectl get hpa ama-metrics-hpa -n kube-system -o json 2>$null | ConvertFrom-Json if ($null -eq $hpa) { $rsPod = kubectl get deployments ama-metrics -n kube-system -o json | ConvertFrom-Json if ($null -eq $rsPod) { Write-Host("ama-metrics replicaset pod not scheduled or failed to schedule.") -ForegroundColor Red Write-Host("Please refer to the following documentation to onboard and validate:") -ForegroundColor Red Write-Host($AksOptInLink) -ForegroundColor Red Write-Host($contactUSMessage) Stop-Transcript exit 1 } $rsPodStatus = $rsPod.status if ((($rsPodStatus.availableReplicas -ge 2) -and ($rsPodStatus.readyReplicas -ge 2) -and ($rsPodStatus.replicas -ge 2)) -eq $false ) { Write-Host("ama-metrics replicaset pods not scheduled or failed to schedule.") -ForegroundColor Red Write-Host("Available ama-metrics replicas:", $rsPodStatus.availableReplicas) Write-Host("Ready ama-metrics replicas:", $rsPodStatus.readyReplicas) Write-Host("Total ama-metrics replicas:", $rsPodStatus.replicas) Write-Host($rsPod) -ForegroundColor Red Write-Host("get ama-metrics rs pod details ...") $amaMetricsRsPods = kubectl get pods -n kube-system -l rsName=ama-metrics -o json | ConvertFrom-Json foreach ($pod in $amaMetricsRsPods.items) { Write-Host("status of the ama-metrics rs pod is:", $pod.status.conditions) -ForegroundColor Red } Write-Host("successfully got ama-metrics rs pod details ...") Write-Host("Please refer to the following documentation to onboard and validate:") -ForegroundColor Red Write-Host($AksOptInLink) -ForegroundColor Red Write-Host($contactUSMessage) Stop-Transcript exit 1 } # Fetch all ama-metrics pods $amaMetricsRsPods = kubectl get pods -n kube-system -l rsName=ama-metrics -o json | ConvertFrom-Json foreach ($pod in $amaMetricsRsPods.items) { $podName = $pod.metadata.name # Copy MetricsExtensionConsoleDebugLog.log from container to debuglogs directory kubectl cp kube-system/$($podName):/MetricsExtensionConsoleDebugLog.log ./$debuglogsDir/MetricsExtensionConsoleDebugLog_$($podName).log Write-Host("MetricsExtensionConsoleDebugLog_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Copy MDSD logs from container to debuglogs directory $logFiles = @("mdsd.qos", "mdsd.info", "mdsd.warn", "mdsd.err") foreach ($logFile in $logFiles) { kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/$logFile ./$debuglogsDir/$($logFile)_$($podName).log Write-Host("$($logFile)_$($podName).log copied to debuglogs directory.") -ForegroundColor Green } # Get logs from prometheus-collector container and store in a file $promCollectorLogPath = "$debuglogsDir/$($podName)_promcollector.log" kubectl logs $($podName) -n kube-system -c prometheus-collector > $promCollectorLogPath # Get logs from addon-token-adapter container and store in a file $addonTokenAdapterLogPath = "$debuglogsDir/$($podName)_addontokenadapter.log" kubectl logs $($podName) -n kube-system -c addon-token-adapter > $addonTokenAdapterLogPath Write-Host("Logs for pod $($podName) copied successfully.") -ForegroundColor Green } Write-Host("All ama-metrics replicaset pods are running OK.") -ForegroundColor Green } else { Write-Host("Fetching HPA status for ama-metrics...") $hpaStatus = $hpa.status $currentReplicas = $hpaStatus.currentReplicas $desiredReplicas = $hpaStatus.desiredReplicas Write-Host("Current replicas:", $currentReplicas) Write-Host("Desired replicas:", $desiredReplicas) # Check if current replicas do not match desired replicas if ($currentReplicas -ne $desiredReplicas) { Write-Error "Mismatch detected! Current replicas ($currentReplicas) do not match desired replicas ($desiredReplicas)." } else { Write-Host "Replica counts match. No issues detected." } if ($currentReplicas -lt $hpa.spec.minReplicas) { Write-Host("Current replicas are less than the minimum replicas configured.") -ForegroundColor Red exit 1 } Write-Host("Checking the status of pods for ama-metrics deployment...") $rsPods = kubectl get pods -n kube-system -l rsName=ama-metrics -o json | ConvertFrom-Json if ($null -eq $rsPods.Items -or $rsPods.Items.Count -lt $currentReplicas) { Write-Host("Not all ama-metrics pods are scheduled or running.") -ForegroundColor Red Write-Host("Expected replicas:", $currentReplicas) Write-Host("Scheduled pods:", $rsPods.Items.Count) exit 1 } foreach ($pod in $rsPods.Items) { $podStatus = $pod.status.conditions if (-not ($podStatus | Where-Object { $_.type -eq "Ready" -and $_.status -eq "True" })) { Write-Host("Pod $($pod.metadata.name) is not ready.") -ForegroundColor Red exit 1 } } Write-Host("All ama-metrics pods are running as expected.") -ForegroundColor Green } Write-Host("Collecting logs for debugging...") foreach ($pod in $rsPods.Items) { $podName = $pod.metadata.name # Copy logs from the pod to debuglogs directory kubectl cp kube-system/$($podName):/MetricsExtensionConsoleDebugLog.log ./$debuglogsDir/MetricsExtensionConsoleDebugLog_$($podName).log kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.qos ./$debuglogsDir/mdsd_qos_$($podName).log kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.info ./$debuglogsDir/mdsd_info_$($podName).log kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.warn ./$debuglogsDir/mdsd_warn_$($podName).log kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.err ./$debuglogsDir/mdsd_err_$($podName).log # Collect prometheus-collector container logs $promCollectorLogPath = "$debuglogsDir/$($podName)_promcollector.log" kubectl logs $($podName) -n kube-system -c prometheus-collector > $promCollectorLogPath # Collect addon-token-adapter container logs $addonTokenLogPath = "$debuglogsDir/$($podName)_addontokenadapter.log" kubectl logs $($podName) -n kube-system -c addon-token-adapter > $addonTokenLogPath } Write-Host("Logs for all ama-metrics pods have been successfully copied.") -ForegroundColor Green } catch { Write-Host("Failed to validate ama-metrics pods: '" + $Error[0] + "'") -ForegroundColor Red exit 1 } Write-Host("Checking whether the ama-metrics-node linux daemonset pod running correctly ...") try { $ds = kubectl get ds -n kube-system -o json --field-selector metadata.name=ama-metrics-node | ConvertFrom-Json if (($null -eq $ds) -or ($null -eq $ds.Items) -or ($ds.Items.Length -ne 1)) { Write-Host( "ama-metrics daemonset pod not scheduled or failed to schedule." + $contactUSMessage) Stop-Transcript exit 1 } $dsStatus = $ds.Items[0].status if ( (($dsStatus.currentNumberScheduled -eq $dsStatus.desiredNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.currentNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.numberReady)) -eq $false) { Write-Host( "ama-metrics daemonset pod not scheduled or failed to schedule.") -ForegroundColor Red Write-Host($dsStatus) Write-Host($contactUSMessage) Stop-Transcript exit 1 } Write-Host( "ama-metrics daemonset pod running OK.") -ForegroundColor Green $iterationCount = 0 $maxIterations = 15 # Get linux daemonset pod logs $podNames = kubectl get pods -n kube-system -l dsName=ama-metrics-node -o jsonpath='{.items[*].metadata.name}' | ForEach-Object { $_.Trim() -split '\s+' } foreach ($podName in $podNames) { if ($iterationCount -ge $maxIterations) { Write-Host "Maximum iteration count reached ($maxIterations) Exiting loop." break } # Copy MetricsExtensionConsoleDebugLog.log from container to debuglogs directory kubectl cp kube-system/$($podName):/MetricsExtensionConsoleDebugLog.log ./$debuglogsDir/MetricsExtensionConsoleDebugLog_$($podName).log Write-Host("MetricsExtensionConsoleDebugLog$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Copy MDSD log from container to debuglogs directory kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.qos ./$debuglogsDir/mdsd_qos_$($podName).log Write-Host("mdsd_qos_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Copy MDSD log from container to debuglogs directory kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.info ./$debuglogsDir/mdsd_info_$($podName).log Write-Host("mdsd_info_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Copy MDSD log from container to debuglogs directory kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.warn ./$debuglogsDir/mdsd_warn_$($podName).log Write-Host("mdsd_warn_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Copy MDSD log from container to debuglogs directory kubectl cp kube-system/$($podName):/opt/microsoft/linuxmonagent/mdsd.err ./$debuglogsDir/mdsd_err_$($podName).log Write-Host("mdsd_err_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Get logs from prometheus-collector container and store in a file $promCollectorLogPath = "$debuglogsDir/$($podName)_promcollector.log" kubectl logs $($podName) -n kube-system -c prometheus-collector > $promCollectorLogPath # Get logs from prometheus-collector container and store in a file $addonTokenLogPath = "$debuglogsDir/$($podName)_addontokenadapter.log" kubectl logs $($podName) -n kube-system -c addon-token-adapter > $addonTokenLogPath Write-Host ("Logs for $podName have been saved to $($podName)_promcollector.log and $($podName)_addontokenadapter.log") $iterationCount++ } } catch { Write-Host ("Failed to execute the script : '" + $Error[0] + "' ") -ForegroundColor Red Stop-Transcript exit 1 } try { # Get AKS cluster information $aksCluster = Get-AzAksCluster -ResourceGroupName $ClusterResourceGroupName -Name $ClusterName $hasWindowsNodePools = $false # Loop through node pools and check for Windows nodes foreach ($nodePool in $aksCluster.AgentPoolProfiles) { if ($nodePool.OsType -eq "Windows") { $hasWindowsNodePools = $true break } } if ($hasWindowsNodePools) { Write-Host("Checking whether the ama-metrics-win-node windows daemonset pod running correctly ...") $ds = kubectl get ds -n kube-system -o json --field-selector metadata.name=ama-metrics-win-node | ConvertFrom-Json if (($null -eq $ds) -or ($null -eq $ds.Items) -or ($ds.Items.Length -ne 1)) { Write-Host( "ama-metrics-win-node daemonset pod not scheduled or failed to schedule." + $contactUSMessage) Stop-Transcript exit 1 } $dsStatus = $ds.Items[0].status if ( (($dsStatus.currentNumberScheduled -eq $dsStatus.desiredNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.currentNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.numberReady)) -eq $false) { Write-Host( "ama-metrics-win-node daemonset pod not scheduled or failed to schedule.") -ForegroundColor Red Write-Host($dsStatus) Write-Host($contactUSMessage) Stop-Transcript exit 1 } Write-Host( "ama-metrics-win-node daemonset pod running OK.") -ForegroundColor Green $iterationCount = 0 $maxIterations = 15 # Get windows daemonset pod logs $podNames = kubectl get pods -n kube-system -l dsName=ama-metrics-win-node -o jsonpath='{.items[*].metadata.name}' | ForEach-Object { $_.Trim() -split '\s+' } foreach ($podName in $podNames) { if ($iterationCount -ge $maxIterations) { Write-Host "Maximum iteration count reached ($maxIterations) Exiting loop." break } # Copy MetricsExtensionConsoleDebugLog.log from container to debuglogs directory kubectl cp kube-system/$($podName):/MetricsExtensionConsoleDebugLog.log ./$debuglogsDir/MetricsExtensionConsoleDebugLog_$($podName).log Write-Host("MetricsExtensionConsoleDebugLog$($podName).log copied to debuglogs directory.") -ForegroundColor Green # # Copy MA Host log from container to debuglogs directory # kubectl cp kube-system/$($podName):/opt/genevamonitoringagent/datadirectory/Configuration/MonAgentHost.1.log ./$debuglogsDir/MonAgentHost_$($podName).log # Write-Host("MonAgentHost_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # # Copy MA Launcher log from container to debuglogs directory # kubectl cp kube-system/$($podName):/opt/genevamonitoringagent/datadirectory/Configuration/MonAgentLauncher.1.log ./$debuglogsDir/MonAgentLauncher_$($podName).log # Write-Host("MonAgentLauncher_$($podName).log copied to debuglogs directory.") -ForegroundColor Green # Get logs from prometheus-collector container and store in a file $promCollectorLogPath = "$debuglogsDir/$($podName)_promcollector.log" kubectl logs $($podName) -n kube-system -c prometheus-collector > $promCollectorLogPath # Get logs from prometheus-collector container and store in a file $addonTokenLogPath = "$debuglogsDir/$($podName)_addontokenadapterwin.log" kubectl logs $($podName) -n kube-system -c addon-token-adapter-win > $addonTokenLogPath Write-Host ("Logs for $podName have been saved to $($podName)_promcollector.log and $($podName)__addontokenadapterwin.log") $iterationCount++ } # Collect windows exporter pod logs if it exits Write-Host("Checking whether the winndows exporter pods are running correctly in the monitoring namespace...") $ds = kubectl get ds -n monitoring -o json --field-selector metadata.name=windows-exporter | ConvertFrom-Json if (($null -eq $ds) -or ($null -eq $ds.Items) -or ($ds.Items.Length -ne 1)) { Write-Host( "windows exporter daemonset pod not scheduled or failed to schedule." + $contactUSMessage) } else { $dsStatus = $ds.Items[0].status if ( (($dsStatus.currentNumberScheduled -eq $dsStatus.desiredNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.currentNumberScheduled) -and ($dsStatus.numberAvailable -eq $dsStatus.numberReady)) -eq $false) { Write-Host( "windows exporter daemonset pod not scheduled or failed to schedule.") -ForegroundColor Red Write-Host($dsStatus) } else { Write-Host( "windows exporter daemonset pod(s) running OK.") -ForegroundColor Green $iterationCount = 0 $maxIterations = 15 # Get windows exporter daemonset pod logs $podNames = kubectl get pods -n monitoring -l app=windows-exporter -o jsonpath='{.items[*].metadata.name}' | ForEach-Object { $_.Trim() -split '\s+' } foreach ($podName in $podNames) { if ($iterationCount -ge $maxIterations) { Write-Host "Maximum iteration count reached ($maxIterations) Exiting loop." break } # Get logs from prometheus-collector container and store in a file $windowsExporterLogPath = "$debuglogsDir/$($podName).log" kubectl logs $($podName) -n monitoring > $windowsExporterLogPath Write-Host ("Logs for $podName have been saved to $($podName).log") $iterationCount++ } } } } } catch { Write-Host ("Failed to execute the script : '" + $Error[0] + "' ") -ForegroundColor Red Stop-Transcript exit 1 } # Zip up the contents of the debuglogs directory $zipFileName = "debuglogs.zip" Compress-Archive -Path $debuglogsDir -DestinationPath $zipFileName -Force Write-Host("Contents of debuglogs directory zipped to $zipFileName.") -ForegroundColor Green Write-Host("Everything looks good according to this script. Please contact us by creating a support ticket in Azure for help. Use this link: https://azure.microsoft.com/en-us/support/create-ticket") -ForegroundColor Green Write-Host("") Stop-Transcript