pkg/diagnostics/analyzers.go (402 lines of code) (raw):
package diagnostics
import (
"fmt"
"path"
"github.com/aws/eks-anywhere/pkg/api/v1alpha1"
"github.com/aws/eks-anywhere/pkg/constants"
)
const (
logAnalysisAnalyzerPrefix = "log analysis:"
)
type analyzerFactory struct{}
func NewAnalyzerFactory() *analyzerFactory {
return &analyzerFactory{}
}
func (a *analyzerFactory) DefaultAnalyzers() []*Analyze {
var analyzers []*Analyze
return append(analyzers, a.defaultDeploymentAnalyzers()...)
}
func (a *analyzerFactory) defaultDeploymentAnalyzers() []*Analyze {
d := []eksaDeployment{
{
Name: "coredns",
Namespace: constants.KubeSystemNamespace,
ExpectedReplicas: 2,
},
}
return a.generateDeploymentAnalyzers(d)
}
func (a *analyzerFactory) ManagementClusterAnalyzers() []*Analyze {
var analyzers []*Analyze
analyzers = append(analyzers, a.managementClusterDeploymentAnalyzers()...)
return append(analyzers, a.managementClusterCrdAnalyzers()...)
}
func (a *analyzerFactory) managementClusterCrdAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("clusters.%s", v1alpha1.GroupVersion.Group),
fmt.Sprintf("bundles.%s", v1alpha1.GroupVersion.Group),
}
return a.generateCrdAnalyzers(crds)
}
func (a *analyzerFactory) PackageAnalyzers() []*Analyze {
var analyzers []*Analyze
analyzers = append(analyzers, a.packageDeploymentAnalyzers()...)
return append(analyzers, a.packageCrdAnalyzers()...)
}
func (a *analyzerFactory) packageCrdAnalyzers() []*Analyze {
crds := []string{
"packagebundlecontrollers.packages.eks.amazonaws.com",
"packagebundles.packages.eks.amazonaws.com",
"packagecontrollers.packages.eks.amazonaws.com",
"packages.packages.eks.amazonaws.com",
}
return a.generateCrdAnalyzers(crds)
}
func (a *analyzerFactory) packageDeploymentAnalyzers() []*Analyze {
d := []eksaDeployment{
{
Name: "eks-anywhere-packages",
Namespace: constants.EksaPackagesName,
ExpectedReplicas: 1,
},
}
return a.generateDeploymentAnalyzers(d)
}
func (a *analyzerFactory) managementClusterDeploymentAnalyzers() []*Analyze {
d := []eksaDeployment{
{
Name: "capt-controller-manager",
Namespace: constants.CaptSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capv-controller-manager",
Namespace: constants.CapvSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capc-controller-manager",
Namespace: constants.CapcSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capx-controller-manager",
Namespace: constants.CapxSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "cert-manager-webhook",
Namespace: constants.CertManagerNamespace,
ExpectedReplicas: 1,
}, {
Name: "cert-manager-cainjector",
Namespace: constants.CertManagerNamespace,
ExpectedReplicas: 1,
}, {
Name: "cert-manager",
Namespace: constants.CertManagerNamespace,
ExpectedReplicas: 1,
}, {
Name: "capi-controller-manager",
Namespace: constants.CapiSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capi-kubeadm-control-plane-controller-manager",
Namespace: constants.CapiKubeadmControlPlaneSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capi-kubeadm-control-plane-controller-manager",
Namespace: constants.CapiKubeadmControlPlaneSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "capi-kubeadm-bootstrap-controller-manager",
Namespace: constants.CapiKubeadmBootstrapSystemNamespace,
ExpectedReplicas: 1,
},
}
return a.generateDeploymentAnalyzers(d)
}
func (a *analyzerFactory) EksaGitopsAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("gitopsconfigs.%s", v1alpha1.GroupVersion.Group),
}
return a.generateCrdAnalyzers(crds)
}
func (a *analyzerFactory) EksaOidcAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("oidcconfigs.%s", v1alpha1.GroupVersion.Group),
}
return a.generateCrdAnalyzers(crds)
}
func (a *analyzerFactory) EksaExternalEtcdAnalyzers() []*Analyze {
deployments := []eksaDeployment{
{
Name: "etcdadm-controller-controller-manager",
Namespace: constants.EtcdAdmControllerSystemNamespace,
ExpectedReplicas: 1,
}, {
Name: "etcdadm-bootstrap-provider-controller-manager",
Namespace: constants.EtcdAdmBootstrapProviderSystemNamespace,
ExpectedReplicas: 1,
},
}
return a.generateDeploymentAnalyzers(deployments)
}
func (a *analyzerFactory) DataCenterConfigAnalyzers(datacenter v1alpha1.Ref) []*Analyze {
switch datacenter.Kind {
case v1alpha1.VSphereDatacenterKind:
return a.eksaVsphereAnalyzers()
case v1alpha1.DockerDatacenterKind:
return a.eksaDockerAnalyzers()
case v1alpha1.CloudStackDatacenterKind:
return a.eksaCloudstackAnalyzers()
case v1alpha1.SnowDatacenterKind:
return a.eksaSnowAnalyzers()
case v1alpha1.NutanixDatacenterKind:
return a.eksaNutanixAnalyzers()
default:
return nil
}
}
func (a *analyzerFactory) eksaVsphereAnalyzers() []*Analyze {
var analyzers []*Analyze
crds := []string{
fmt.Sprintf("vspheredatacenterconfigs.%s", v1alpha1.GroupVersion.Group),
fmt.Sprintf("vspheremachineconfigs.%s", v1alpha1.GroupVersion.Group),
}
analyzers = append(analyzers, a.generateCrdAnalyzers(crds)...)
analyzers = append(analyzers, a.vsphereDiagnosticAnalyzers()...)
return analyzers
}
func (a *analyzerFactory) eksaCloudstackAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("cloudstackdatacenterconfigs.%s", v1alpha1.GroupVersion.Group),
fmt.Sprintf("cloudstackmachineconfigs.%s", v1alpha1.GroupVersion.Group),
}
analyzers := a.generateCrdAnalyzers(crds)
return append(analyzers, a.validControlPlaneIPAnalyzer())
}
func (a *analyzerFactory) eksaSnowAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("snowdatacenterconfigs.%s", v1alpha1.GroupVersion.Group),
fmt.Sprintf("snowmachineconfigs.%s", v1alpha1.GroupVersion.Group),
}
analyzers := a.generateCrdAnalyzers(crds)
return append(analyzers, a.validControlPlaneIPAnalyzer())
}
func (a *analyzerFactory) eksaDockerAnalyzers() []*Analyze {
var analyazers []*Analyze
crds := []string{
fmt.Sprintf("dockerdatacenterconfigs.%s", v1alpha1.GroupVersion.Group),
}
deployments := []eksaDeployment{
{
Name: "local-path-provisioner",
Namespace: constants.LocalPathStorageNamespace,
ExpectedReplicas: 1,
},
}
analyazers = append(analyazers, a.generateCrdAnalyzers(crds)...)
return append(analyazers, a.generateDeploymentAnalyzers(deployments)...)
}
func (a *analyzerFactory) eksaNutanixAnalyzers() []*Analyze {
crds := []string{
fmt.Sprintf("nutanixdatacenterconfigs.%s", v1alpha1.GroupVersion.Group),
fmt.Sprintf("nutanixmachineconfigs.%s", v1alpha1.GroupVersion.Group),
}
analyzers := a.generateCrdAnalyzers(crds)
return append(analyzers, a.validControlPlaneIPAnalyzer())
}
// EksaLogTextAnalyzers given a slice of Collectors will check which namespaced log collectors are present
// and return the log analyzers associated with the namespace in the namespaceLogTextAnalyzersMap.
func (a *analyzerFactory) EksaLogTextAnalyzers(collectors []*Collect) []*Analyze {
var analyzers []*Analyze
analyzersMap := a.namespaceLogTextAnalyzersMap()
for _, collector := range collectors {
if collector.Logs != nil {
analyzer, ok := analyzersMap[collector.Logs.Namespace]
if ok {
analyzers = append(analyzers, analyzer...)
}
}
}
return analyzers
}
// namespaceLogTextAnalyzersMap is used to associated log text analyzers with the logs collected from a specific namespace.
// the key of the analyzers map is the namespace name, and the value are the associated log text analyzers.
func (a *analyzerFactory) namespaceLogTextAnalyzersMap() map[string][]*Analyze {
return map[string][]*Analyze{
constants.CapiKubeadmControlPlaneSystemNamespace: a.capiKubeadmControlPlaneSystemLogAnalyzers(),
}
}
func (a *analyzerFactory) capiKubeadmControlPlaneSystemLogAnalyzers() []*Analyze {
capiCpManagerPod := "capi-kubeadm-control-plane-controller-manager-*"
capiCpManagerContainerLogFile := capiCpManagerPod + ".log"
fullManagerPodLogPath := path.Join(logpath(constants.CapiKubeadmControlPlaneSystemNamespace), capiCpManagerContainerLogFile)
return []*Analyze{
{
TextAnalyze: &textAnalyze{
analyzeMeta: analyzeMeta{
CheckName: fmt.Sprintf("%s: API server pod missing. Log: %s", logAnalysisAnalyzerPrefix, fullManagerPodLogPath),
},
FileName: fullManagerPodLogPath,
RegexPattern: `machine (.*?) reports APIServerPodHealthy condition is false \(Error, Pod kube-apiserver-(.*?) is missing\)`,
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: "true",
Message: fmt.Sprintf("Node failed to launch correctly; API server pod is missing. See %s", fullManagerPodLogPath),
},
},
{
Pass: &singleOutcome{
When: "false",
Message: "API server pods launched correctly",
},
},
},
},
},
}
}
type eksaDeployment struct {
Name string
Namespace string
ExpectedReplicas int
}
func (a *analyzerFactory) generateDeploymentAnalyzers(deployments []eksaDeployment) []*Analyze {
var deploymentAnalyzers []*Analyze
for _, d := range deployments {
deploymentAnalyzers = append(deploymentAnalyzers, a.deploymentAnalyzer(d))
}
return deploymentAnalyzers
}
func (a *analyzerFactory) deploymentAnalyzer(deployment eksaDeployment) *Analyze {
return &Analyze{
DeploymentStatus: &deploymentStatus{
Name: deployment.Name,
Namespace: deployment.Namespace,
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: fmt.Sprintf("< %d", deployment.ExpectedReplicas),
Message: fmt.Sprintf("%s is not ready.", deployment.Name),
},
}, {
Pass: &singleOutcome{
Message: fmt.Sprintf("%s is running.", deployment.Name),
},
},
},
},
}
}
func (a *analyzerFactory) generateCrdAnalyzers(crds []string) []*Analyze {
var crdAnalyzers []*Analyze
for _, crd := range crds {
crdAnalyzers = append(crdAnalyzers, a.crdAnalyzer(crd))
}
return crdAnalyzers
}
func (a *analyzerFactory) crdAnalyzer(crdName string) *Analyze {
return &Analyze{
CustomResourceDefinition: &customResourceDefinition{
analyzeMeta: analyzeMeta{
CheckName: crdName,
},
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: "< 1",
Message: fmt.Sprintf("%s is not present on cluster", crdName),
},
},
{
Pass: &singleOutcome{
Message: fmt.Sprintf("%s is present on the cluster", crdName),
},
},
},
CustomResourceDefinitionName: crdName,
},
}
}
// vsphereDiagnosticAnalyzers will return diagnostic analyzers to analyze the condition of vSphere cluster.
func (a *analyzerFactory) vsphereDiagnosticAnalyzers() []*Analyze {
return []*Analyze{a.validControlPlaneIPAnalyzer(), a.vcenterSessionValidatePermissionAnalyzer()}
}
// validControlPlaneIPAnalyzer analyzes whether a valid control plane IP is used to connect
// to API server.
func (a *analyzerFactory) validControlPlaneIPAnalyzer() *Analyze {
runPingPod := "ping-host-ip"
runPingPodLog := fmt.Sprintf("%s.log", runPingPod)
fullRunPingPodLogPath := path.Join(runPingPod, runPingPodLog)
return &Analyze{
TextAnalyze: &textAnalyze{
analyzeMeta: analyzeMeta{
CheckName: fmt.Sprintf("%s: Destination Host Unreachable. Log: %s", logAnalysisAnalyzerPrefix, fullRunPingPodLogPath),
},
FileName: fullRunPingPodLogPath,
RegexPattern: `exit code: 0`,
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: "false",
Message: fmt.Sprintf("The control plane endpoint host is unavailable. See %s", fullRunPingPodLogPath),
},
},
{
Pass: &singleOutcome{
When: "true",
Message: "Control plane IP verified.",
},
},
},
},
}
}
// vcenterSessionValidateAnalyzer analyzes whether the vcenter user has Session validate permissions for CAPV
// to be able to look up existing valid sessions to reuse them instead of having to create new ones.
func (a *analyzerFactory) vcenterSessionValidatePermissionAnalyzer() *Analyze {
capvManagerPod := "capv-controller-manager-*"
capvManagerContainerLogFile := capvManagerPod + ".log"
fullManagerPodLogPath := path.Join(logpath(constants.CapvSystemNamespace), capvManagerContainerLogFile)
return &Analyze{
TextAnalyze: &textAnalyze{
analyzeMeta: analyzeMeta{
CheckName: fmt.Sprintf("%s: Session Validate permission missing. Log: %s", logAnalysisAnalyzerPrefix, fullManagerPodLogPath),
},
FileName: fullManagerPodLogPath,
RegexPattern: `session "msg"="error checking if session is active" "error"="ServerFaultCode: Permission to perform this operation was denied."`,
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: "true",
Message: fmt.Sprintf("VCenter user doesn't have Sessions.ValidateSession permission. See %s", fullManagerPodLogPath),
},
},
{
Pass: &singleOutcome{
When: "false",
Message: "VCenter user has Sessions.ValidateSession permission.",
},
},
},
},
}
}
// vmsAccessAnalyzer will analyze if vms have access to the API server of vSphere cluster
// not used yet but it will once the workflows are updated to support this usecase.
func (a *analyzerFactory) vmsAccessAnalyzer() *Analyze { //nolint:unused
runBashPod := "check-cloud-controller"
runBashPodLog := fmt.Sprintf("%s.log", runBashPod)
vSphereCloudControllerPodLogPath := path.Join(runBashPod, runBashPodLog)
return &Analyze{
TextAnalyze: &textAnalyze{
analyzeMeta: analyzeMeta{
CheckName: fmt.Sprintf("%s: Virtual Machine has no access to vSphere API server. Logs: %s", logAnalysisAnalyzerPrefix, vSphereCloudControllerPodLogPath),
},
FileName: vSphereCloudControllerPodLogPath,
RegexPattern: `Failed to create new client. err: Post (.*) dial tcp (.*) connect: connection timed out\n(.*)Failed to create govmomi client. err: Post (.*) dial tcp (.*) connect: connection timed out`,
Outcomes: []*outcome{
{
Fail: &singleOutcome{
When: "true",
Message: fmt.Sprintf("Failed to create client, Virtural Machines have no access to vSphere API server. See the cloud controller log in control plane node: %s", vSphereCloudControllerPodLogPath),
},
},
{
Pass: &singleOutcome{
When: "false",
Message: fmt.Sprintf("Virtual Machines have access to vSphere API server. See %s \nPlease ignore the result when this analyzer is running on bootstrap cluster", vSphereCloudControllerPodLogPath),
},
},
},
},
}
}