func()

in pkg/k8s-client/eks.go [561:795]


func (e *eks) checkHealth() error {
	if e.cfg == nil {
		return errors.New("nil EKSConfig")
	}
	if e.cfg.KubectlPath == "" {
		return errors.New("empty EKSConfig.KubectlPath")
	}
	if e.cfg.KubeConfigPath == "" {
		return errors.New("empty EKSConfig.KubeConfigPath")
	}
	if e.cfg.ClusterAPIServerEndpoint == "" {
		return errors.New("empty EKSConfig.ClusterAPIServerEndpoint")
	}

	if !fileutil.Exist(e.cfg.KubeConfigPath) {
		return fmt.Errorf("%q not found", e.cfg.KubeConfigPath)
	}
	if !fileutil.Exist(e.cfg.KubectlPath) {
		return fmt.Errorf("%q not found", e.cfg.KubectlPath)
	}
	if err := fileutil.EnsureExecutable(e.cfg.KubectlPath); err != nil {
		return fmt.Errorf("cannot execute %q (%v)", e.cfg.KubectlPath, err)
	}

	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
	output, err := exec.New().CommandContext(
		ctx,
		e.cfg.KubectlPath,
		"--kubeconfig="+e.cfg.KubeConfigPath,
		"version",
	).CombinedOutput()
	cancel()
	out := strings.TrimSpace(string(output))
	if err != nil {
		return fmt.Errorf("'kubectl version' failed %v (output %q)", err, out)
	}
	fmt.Printf("\n\"kubectl version\" output:\n%s\n\n", out)

	vf, err := e.fetchServerVersion()
	if err != nil {
		return fmt.Errorf("fetch version info failed %v", err)
	}
	fmt.Printf("\n\"kubectl version\" info output:\n%s\n\n", vf.String())

	ep := e.cfg.ClusterAPIServerEndpoint + "/version"
	output, err = httputil.ReadInsecure(e.cfg.Logger, ioutil.Discard, ep)
	if err != nil {
		return err
	}
	out = strings.TrimSpace(string(output))
	fmt.Printf("\n\n\"%s\" output:\n%s\n\n", ep, out)

	if e.cfg.ServerVersion != "" && !strings.Contains(out, fmt.Sprintf(`"gitVersion": "v%s`, e.cfg.ServerVersion)) {
		err = fmt.Errorf("%q does not contain version %q", out, e.cfg.ServerVersion)
	}
	if err != nil && e.cfg.UpgradeServerVersion != "" {
		if !strings.Contains(out, fmt.Sprintf(`"gitVersion": "v%s`, e.cfg.UpgradeServerVersion)) {
			err = fmt.Errorf("%v; does not contain version %q either", err, e.cfg.UpgradeServerVersion)
		} else {
			err = nil
		}
	}
	if err != nil {
		return err
	}

	ctx, cancel = context.WithTimeout(context.Background(), 15*time.Second)
	output, err = exec.New().CommandContext(
		ctx,
		e.cfg.KubectlPath,
		"--kubeconfig="+e.cfg.KubeConfigPath,
		"cluster-info",
	).CombinedOutput()
	cancel()
	out = strings.TrimSpace(string(output))
	if err != nil {
		return fmt.Errorf("'kubectl cluster-info' failed %v (output %q)", err, out)
	}
	if !strings.Contains(out, "is running at") {
		return fmt.Errorf("'kubectl cluster-info' not ready (output %q)", out)
	}
	fmt.Printf("\n\"kubectl cluster-info\" output:\n%s\n\n", out)

	ctx, cancel = context.WithTimeout(context.Background(), 15*time.Second)
	output, err = exec.New().CommandContext(
		ctx,
		e.cfg.KubectlPath,
		"--kubeconfig="+e.cfg.KubeConfigPath,
		"get",
		"cs",
	).CombinedOutput()
	cancel()
	out = strings.TrimSpace(string(output))
	if err != nil {
		return fmt.Errorf("'kubectl get cs' failed %v (output %q)", err, out)
	}
	fmt.Printf("\n\"kubectl get cs\" output:\n%s\n\n", out)

	ep = e.cfg.ClusterAPIServerEndpoint + "/healthz?verbose"
	output, err = httputil.ReadInsecure(e.cfg.Logger, ioutil.Discard, ep)
	if err != nil {
		return err
	}
	out = strings.TrimSpace(string(output))
	if !strings.Contains(out, "healthz check passed") {
		return fmt.Errorf("%q does not contain 'healthz check passed'", out)
	}
	fmt.Printf("\n\n\"%s\" output (\"kubectl get --raw /healthz?verbose\"):\n%s\n", ep, out)

	fmt.Printf("\n\"kubectl get namespaces\" output:\n")
	ns, err := e.listNamespaces(150, 5*time.Second)
	if err != nil {
		return fmt.Errorf("failed to list namespaces %v", err)
	}
	for _, v := range ns {
		e.cfg.Logger.Info("namespace", zap.String("name", v.GetName()))
	}
	println()

	fmt.Printf("\n\"kubectl get pods -n=kube-system\" output:\n")
	pods, err := e.listPods("kube-system", 150, 5*time.Second, 3)
	if err != nil {
		return fmt.Errorf("failed to list pods %v", err)
	}
	for _, v := range pods {
		cond := "Pending"
		for _, cv := range v.Status.Conditions {
			if cv.Status != v1.ConditionTrue {
				continue
			}
			cond = string(cv.Type)
		}
		e.cfg.Logger.Info("kube-system pod", zap.String("name", v.GetName()), zap.String("condition", cond))
	}
	println()

	fmt.Printf("\n\"curl -sL http://localhost:8080/metrics | grep storage_\" output:\n")
	ctx, cancel = context.WithTimeout(context.Background(), time.Minute)
	output, err = e.getClient().
		CoreV1().
		RESTClient().
		Get().
		RequestURI("/metrics").
		Do(ctx).
		Raw()
	cancel()
	if err != nil {
		return fmt.Errorf("failed to fetch /metrics (%v)", err)
	}
	if e.cfg.MetricsRawOutputDirKubeAPIServer != "" {
		if !fileutil.Exist(e.cfg.MetricsRawOutputDirKubeAPIServer) {
			if err = os.MkdirAll(e.cfg.MetricsRawOutputDirKubeAPIServer, 0700); err != nil {
				e.cfg.Logger.Warn("failed to mkdir", zap.String("dir", e.cfg.MetricsRawOutputDirKubeAPIServer), zap.Error(err))
				return fmt.Errorf("failed to mkdir %q (%v)", e.cfg.MetricsRawOutputDirKubeAPIServer, err)
			}
		}
		name := time.Now().UTC().Format(time.RFC3339Nano)
		fpath := filepath.Join(e.cfg.MetricsRawOutputDirKubeAPIServer, name)
		if err := ioutil.WriteFile(fpath, output, 0777); err != nil {
			e.cfg.Logger.Warn("failed to write /metrics", zap.String("path", fpath), zap.Error(err))
			return err
		}
		if e.cfg.S3API != nil {
			if err := aws_s3.Upload(
				e.cfg.Logger,
				e.cfg.S3API,
				e.cfg.S3BucketName,
				path.Join(e.cfg.S3MetricsRawOutputDirKubeAPIServer, name),
				fpath,
			); err != nil {
				return err
			}
		}
		e.cfg.Logger.Info("wrote /metrics", zap.String("path", fpath))
	}

	dekGenCnt, cacheMissCnt := int64(0), int64(0)
	scanner := bufio.NewScanner(bytes.NewReader(output))
	for scanner.Scan() {
		line := scanner.Text()
		switch {
		case strings.HasPrefix(line, "# "):
			continue

		// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#deprecatedchanged-metrics
		case strings.HasPrefix(line, metricDEKGenSecondsCount+" "):
			vs := strings.TrimSpace(strings.Replace(line, metricDEKGenSecondsCount, "", -1))
			dekGenCnt, err = strconv.ParseInt(vs, 10, 64)
			if err != nil {
				e.cfg.Logger.Warn("failed to parse",
					zap.String("line", line),
					zap.Error(err),
				)
			}

		// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#deprecatedchanged-metrics
		case strings.HasPrefix(line, metricDEKGenMicroSecondsCount+" "):
			vs := strings.TrimSpace(strings.Replace(line, metricDEKGenMicroSecondsCount, "", -1))
			dekGenCnt, err = strconv.ParseInt(vs, 10, 64)
			if err != nil {
				e.cfg.Logger.Warn("failed to parse",
					zap.String("line", line),
					zap.Error(err),
				)
			}

		case strings.HasPrefix(line, metricEnvelopeCacheMiss+" "):
			vs := strings.TrimSpace(strings.Replace(line, metricEnvelopeCacheMiss, "", -1))
			cacheMissCnt, err = strconv.ParseInt(vs, 10, 64)
			if err != nil {
				e.cfg.Logger.Warn("failed to parse",
					zap.String("line", line),
					zap.Error(err),
				)
			}
		}

		if dekGenCnt > 0 || cacheMissCnt > 0 {
			break
		}
	}
	e.cfg.Logger.Info("encryption metrics",
		zap.Int64("dek-gen-count", dekGenCnt),
		zap.Int64("cache-miss-count", cacheMissCnt),
	)
	if e.cfg.EncryptionEnabled {
		if dekGenCnt == 0 && cacheMissCnt == 0 {
			return errors.New("encrypted enabled, unexpected /metrics")
		}
		e.cfg.Logger.Info("successfully checked encryption")
	}

	e.cfg.Logger.Info("successfully checked health")
	return nil
}