in pkg/k8s-client/eks.go [561:795]
func (e *eks) checkHealth() error {
if e.cfg == nil {
return errors.New("nil EKSConfig")
}
if e.cfg.KubectlPath == "" {
return errors.New("empty EKSConfig.KubectlPath")
}
if e.cfg.KubeConfigPath == "" {
return errors.New("empty EKSConfig.KubeConfigPath")
}
if e.cfg.ClusterAPIServerEndpoint == "" {
return errors.New("empty EKSConfig.ClusterAPIServerEndpoint")
}
if !fileutil.Exist(e.cfg.KubeConfigPath) {
return fmt.Errorf("%q not found", e.cfg.KubeConfigPath)
}
if !fileutil.Exist(e.cfg.KubectlPath) {
return fmt.Errorf("%q not found", e.cfg.KubectlPath)
}
if err := fileutil.EnsureExecutable(e.cfg.KubectlPath); err != nil {
return fmt.Errorf("cannot execute %q (%v)", e.cfg.KubectlPath, err)
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
output, err := exec.New().CommandContext(
ctx,
e.cfg.KubectlPath,
"--kubeconfig="+e.cfg.KubeConfigPath,
"version",
).CombinedOutput()
cancel()
out := strings.TrimSpace(string(output))
if err != nil {
return fmt.Errorf("'kubectl version' failed %v (output %q)", err, out)
}
fmt.Printf("\n\"kubectl version\" output:\n%s\n\n", out)
vf, err := e.fetchServerVersion()
if err != nil {
return fmt.Errorf("fetch version info failed %v", err)
}
fmt.Printf("\n\"kubectl version\" info output:\n%s\n\n", vf.String())
ep := e.cfg.ClusterAPIServerEndpoint + "/version"
output, err = httputil.ReadInsecure(e.cfg.Logger, ioutil.Discard, ep)
if err != nil {
return err
}
out = strings.TrimSpace(string(output))
fmt.Printf("\n\n\"%s\" output:\n%s\n\n", ep, out)
if e.cfg.ServerVersion != "" && !strings.Contains(out, fmt.Sprintf(`"gitVersion": "v%s`, e.cfg.ServerVersion)) {
err = fmt.Errorf("%q does not contain version %q", out, e.cfg.ServerVersion)
}
if err != nil && e.cfg.UpgradeServerVersion != "" {
if !strings.Contains(out, fmt.Sprintf(`"gitVersion": "v%s`, e.cfg.UpgradeServerVersion)) {
err = fmt.Errorf("%v; does not contain version %q either", err, e.cfg.UpgradeServerVersion)
} else {
err = nil
}
}
if err != nil {
return err
}
ctx, cancel = context.WithTimeout(context.Background(), 15*time.Second)
output, err = exec.New().CommandContext(
ctx,
e.cfg.KubectlPath,
"--kubeconfig="+e.cfg.KubeConfigPath,
"cluster-info",
).CombinedOutput()
cancel()
out = strings.TrimSpace(string(output))
if err != nil {
return fmt.Errorf("'kubectl cluster-info' failed %v (output %q)", err, out)
}
if !strings.Contains(out, "is running at") {
return fmt.Errorf("'kubectl cluster-info' not ready (output %q)", out)
}
fmt.Printf("\n\"kubectl cluster-info\" output:\n%s\n\n", out)
ctx, cancel = context.WithTimeout(context.Background(), 15*time.Second)
output, err = exec.New().CommandContext(
ctx,
e.cfg.KubectlPath,
"--kubeconfig="+e.cfg.KubeConfigPath,
"get",
"cs",
).CombinedOutput()
cancel()
out = strings.TrimSpace(string(output))
if err != nil {
return fmt.Errorf("'kubectl get cs' failed %v (output %q)", err, out)
}
fmt.Printf("\n\"kubectl get cs\" output:\n%s\n\n", out)
ep = e.cfg.ClusterAPIServerEndpoint + "/healthz?verbose"
output, err = httputil.ReadInsecure(e.cfg.Logger, ioutil.Discard, ep)
if err != nil {
return err
}
out = strings.TrimSpace(string(output))
if !strings.Contains(out, "healthz check passed") {
return fmt.Errorf("%q does not contain 'healthz check passed'", out)
}
fmt.Printf("\n\n\"%s\" output (\"kubectl get --raw /healthz?verbose\"):\n%s\n", ep, out)
fmt.Printf("\n\"kubectl get namespaces\" output:\n")
ns, err := e.listNamespaces(150, 5*time.Second)
if err != nil {
return fmt.Errorf("failed to list namespaces %v", err)
}
for _, v := range ns {
e.cfg.Logger.Info("namespace", zap.String("name", v.GetName()))
}
println()
fmt.Printf("\n\"kubectl get pods -n=kube-system\" output:\n")
pods, err := e.listPods("kube-system", 150, 5*time.Second, 3)
if err != nil {
return fmt.Errorf("failed to list pods %v", err)
}
for _, v := range pods {
cond := "Pending"
for _, cv := range v.Status.Conditions {
if cv.Status != v1.ConditionTrue {
continue
}
cond = string(cv.Type)
}
e.cfg.Logger.Info("kube-system pod", zap.String("name", v.GetName()), zap.String("condition", cond))
}
println()
fmt.Printf("\n\"curl -sL http://localhost:8080/metrics | grep storage_\" output:\n")
ctx, cancel = context.WithTimeout(context.Background(), time.Minute)
output, err = e.getClient().
CoreV1().
RESTClient().
Get().
RequestURI("/metrics").
Do(ctx).
Raw()
cancel()
if err != nil {
return fmt.Errorf("failed to fetch /metrics (%v)", err)
}
if e.cfg.MetricsRawOutputDirKubeAPIServer != "" {
if !fileutil.Exist(e.cfg.MetricsRawOutputDirKubeAPIServer) {
if err = os.MkdirAll(e.cfg.MetricsRawOutputDirKubeAPIServer, 0700); err != nil {
e.cfg.Logger.Warn("failed to mkdir", zap.String("dir", e.cfg.MetricsRawOutputDirKubeAPIServer), zap.Error(err))
return fmt.Errorf("failed to mkdir %q (%v)", e.cfg.MetricsRawOutputDirKubeAPIServer, err)
}
}
name := time.Now().UTC().Format(time.RFC3339Nano)
fpath := filepath.Join(e.cfg.MetricsRawOutputDirKubeAPIServer, name)
if err := ioutil.WriteFile(fpath, output, 0777); err != nil {
e.cfg.Logger.Warn("failed to write /metrics", zap.String("path", fpath), zap.Error(err))
return err
}
if e.cfg.S3API != nil {
if err := aws_s3.Upload(
e.cfg.Logger,
e.cfg.S3API,
e.cfg.S3BucketName,
path.Join(e.cfg.S3MetricsRawOutputDirKubeAPIServer, name),
fpath,
); err != nil {
return err
}
}
e.cfg.Logger.Info("wrote /metrics", zap.String("path", fpath))
}
dekGenCnt, cacheMissCnt := int64(0), int64(0)
scanner := bufio.NewScanner(bytes.NewReader(output))
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "# "):
continue
// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#deprecatedchanged-metrics
case strings.HasPrefix(line, metricDEKGenSecondsCount+" "):
vs := strings.TrimSpace(strings.Replace(line, metricDEKGenSecondsCount, "", -1))
dekGenCnt, err = strconv.ParseInt(vs, 10, 64)
if err != nil {
e.cfg.Logger.Warn("failed to parse",
zap.String("line", line),
zap.Error(err),
)
}
// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#deprecatedchanged-metrics
case strings.HasPrefix(line, metricDEKGenMicroSecondsCount+" "):
vs := strings.TrimSpace(strings.Replace(line, metricDEKGenMicroSecondsCount, "", -1))
dekGenCnt, err = strconv.ParseInt(vs, 10, 64)
if err != nil {
e.cfg.Logger.Warn("failed to parse",
zap.String("line", line),
zap.Error(err),
)
}
case strings.HasPrefix(line, metricEnvelopeCacheMiss+" "):
vs := strings.TrimSpace(strings.Replace(line, metricEnvelopeCacheMiss, "", -1))
cacheMissCnt, err = strconv.ParseInt(vs, 10, 64)
if err != nil {
e.cfg.Logger.Warn("failed to parse",
zap.String("line", line),
zap.Error(err),
)
}
}
if dekGenCnt > 0 || cacheMissCnt > 0 {
break
}
}
e.cfg.Logger.Info("encryption metrics",
zap.Int64("dek-gen-count", dekGenCnt),
zap.Int64("cache-miss-count", cacheMissCnt),
)
if e.cfg.EncryptionEnabled {
if dekGenCnt == 0 && cacheMissCnt == 0 {
return errors.New("encrypted enabled, unexpected /metrics")
}
e.cfg.Logger.Info("successfully checked encryption")
}
e.cfg.Logger.Info("successfully checked health")
return nil
}