pkg/probe/gke.go (162 lines of code) (raw):
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package probe
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"time"
"cloud.google.com/go/compute/metadata"
"github.com/GoogleCloudPlatform/gke-prober/pkg/common"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
)
const (
clusterMetricsApi = "apis/metrics.k8s.io/v1beta1/namespaces/gke-prober-system/pods/"
dnsLookupHost = "kubernetes.default.svc.cluster.local"
)
type PodMetricsList struct {
Kind string `json:"kind"`
APIVersion string `json:"apiVersion"`
Metadata struct {
SelfLink string `json:"selfLink"`
} `json:"metadata"`
Items []struct {
Metadata struct {
Name string `json:"name"`
Namespace string `json:"namespace"`
CreationTimestamp time.Time `json:"creationTimestamp"`
} `json:"metadata"`
Timestamp time.Time `json:"timestamp"`
Window string `json:"window"`
Containers []struct {
Name string `json:"name"`
Usage struct {
CPU string `json:"cpu"`
Memory string `json:"memory"`
} `json:"usage"`
} `json:"containers"`
} `json:"items"`
}
func ClusterProbes() ClusterProbeMap {
return ClusterProbeMap{
"metrics_server": &metricsServerProbe{},
"kube_dns": &kubeDnsProbe{host: dnsLookupHost},
}
}
type metricsServerProbe struct {
}
type kubeDnsProbe struct {
host string
}
func (p *metricsServerProbe) Run(ctx context.Context, clientset *kubernetes.Clientset) Result {
var podmetrics PodMetricsList
var body []byte
var err error
body, err = clientset.RESTClient().Get().AbsPath(clusterMetricsApi).DoRaw(ctx)
if err != nil {
klog.Warningf("Get metrics from metrics-server returned error %s\n", err.Error())
return Result{
Available: "Unhealthy",
Err: err,
}
}
if err = json.Unmarshal(body, &podmetrics); err != nil {
klog.Warningf("Json parser metrics returned error %s\n", err.Error())
return Result{
Available: "Unhealthy",
Err: err,
}
}
if len(podmetrics.Items) == 0 {
err = fmt.Errorf("zero metrics returned\n")
klog.Warningf("Get metrics from metrics-server returned error %s\n", err.Error())
return Result{
Available: "Unhealthy",
Err: err,
}
}
str, _ := json.MarshalIndent(podmetrics, "", "\t")
klog.V(2).Infof("Pod metrics List is %s\n", string(str))
err = fmt.Errorf("Metrics-server is operational health")
return Result{
Available: "Healthy",
Err: err,
}
}
func (p *kubeDnsProbe) Run(context.Context, *kubernetes.Clientset) Result {
ips, err := net.LookupIP(p.host)
if err != nil {
klog.Warningf("Dns Lookup response failed from KubeDNS: %s\n", err.Error())
return Result{
Available: "Unhealthy",
Err: err,
}
}
if len(ips) == 0 {
err = fmt.Errorf("no IPs returned in dns lookup response from KubeDNS")
return Result{
Available: "Unhealthy",
Err: err,
}
}
klog.V(1).Infof("Dns lookup responses from KubeDNS returns %s\n", ips[0])
err = fmt.Errorf("KubeDNS is operational health")
return Result{
Available: "Healthy",
Err: err,
}
}
func NodeProbes() ProbeMap {
return ProbeMap{
"fluentbit": newFluntdProbe(),
"gke-metadata-server": newGkeMetadataServerProbe(),
}
}
func resultFromError(err error) Result {
return Result{
Available: AvailableError,
Err: err,
}
}
type fluentdProbe struct {
}
func newFluntdProbe() *fluentdProbe {
return &fluentdProbe{}
}
func (p *fluentdProbe) Run(ctx context.Context, pod *v1.Pod, addon common.Addon) Result {
ip := pod.Status.HostIP
_, err := fetchFluentbitUptime(ip)
if err != nil {
err = fmt.Errorf("fetchFluentbitUptime returned unexpected error: %v", err)
return resultFromError(err)
}
return defaultProbeResult
}
func fetchFluentbitUptime(ip string) (string, error) {
resp, err := http.Get("http://" + ip + ":2020/api/v1/uptime")
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
type gkeMetadataServerProbe struct {
}
func newGkeMetadataServerProbe() *gkeMetadataServerProbe {
return &gkeMetadataServerProbe{}
}
func (p *gkeMetadataServerProbe) Run(ctx context.Context, pod *v1.Pod, addon common.Addon) Result {
_, err := metadata.Email("default")
if err != nil {
err = fmt.Errorf("metadata.Email(default) returned %v", err)
return resultFromError(err)
}
return resultSuccessful
}