internal/healthchecks/api_check.go (147 lines of code) (raw):

// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package healthchecks import ( "context" "errors" "fmt" "time" "cloud.google.com/go/logging" monitoring "cloud.google.com/go/monitoring/apiv3/v2" "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb" "github.com/GoogleCloudPlatform/ops-agent/confgenerator/resourcedetector" "github.com/GoogleCloudPlatform/ops-agent/internal/logs" "github.com/cenkalti/backoff/v4" "github.com/googleapis/gax-go/v2/apierror" metricpb "google.golang.org/genproto/googleapis/api/metric" "google.golang.org/grpc/codes" timestamppb "google.golang.org/protobuf/types/known/timestamppb" ) const ( ServiceDisabled = "SERVICE_DISABLED" AccessTokenScopeInsufficient = "ACCESS_TOKEN_SCOPE_INSUFFICIENT" IamPermissionDenied = "IAM_PERMISSION_DENIED" MaxMonitoringPingRetries = 1 ) func createMonitoringPingRequest(resource resourcedetector.Resource) *monitoringpb.CreateTimeSeriesRequest { metricType := "agent.googleapis.com/agent/ops_agent/enabled_receivers" now := &timestamppb.Timestamp{ Seconds: time.Now().Unix(), } value := &monitoringpb.TypedValue{ Value: &monitoringpb.TypedValue_Int64Value{ Int64Value: int64(0), }, } req := &monitoringpb.CreateTimeSeriesRequest{ Name: "projects/" + resource.ProjectName(), TimeSeries: []*monitoringpb.TimeSeries{{ MetricKind: metricpb.MetricDescriptor_GAUGE, ValueType: metricpb.MetricDescriptor_INT64, Metric: &metricpb.Metric{ Type: metricType, }, Resource: resource.MonitoredResource(), Points: []*monitoringpb.Point{{ Interval: &monitoringpb.TimeInterval{ StartTime: now, EndTime: now, }, Value: value, }}, }}, } return req } // monitoringPing reports whether the client's connection to the monitoring service and the // authentication configuration are valid. To accomplish this, monitoringPing writes a // time series point with empty values to an Ops Agent specific metric. // This method mirrors the "(c *Client) Ping" method in "cloud.google.com/go/logging". func monitoringPing(ctx context.Context, client monitoring.MetricClient, resource resourcedetector.Resource) error { // Points written to a time series must be at least 5 seconds apart. Because `monitoringPing` might // be called multiple times in quick succession, the first attempted request to `CreateTimeSeries` // may fail. We can retry the request >5 seconds later in such cases. // https://cloud.google.com/monitoring/quotas pingBackoff := backoff.WithMaxRetries(backoff.NewConstantBackOff(6*time.Second), MaxMonitoringPingRetries) pingOperation := func() error { return client.CreateTimeSeries(ctx, createMonitoringPingRequest(resource)) } return backoff.Retry(pingOperation, pingBackoff) } func runLoggingCheck(logger logs.StructuredLogger, resource resourcedetector.Resource) error { ctx := context.Background() // New Logging Client logClient, err := logging.NewClient(ctx, resource.ProjectName()) if err != nil { return err } defer logClient.Close() logger.Infof("logging client was created successfully") if err := logClient.Ping(ctx); err != nil { logger.Infof(err.Error()) var apiErr *apierror.APIError if errors.As(err, &apiErr) { switch apiErr.Reason() { case ServiceDisabled: return LogApiDisabledErr case AccessTokenScopeInsufficient: return LogApiScopeErr case IamPermissionDenied: return LogApiPermissionErr } switch apiErr.GRPCStatus().Code() { case codes.PermissionDenied: return LogApiPermissionErr case codes.Unauthenticated: return LogApiUnauthenticatedErr case codes.DeadlineExceeded: return LogApiConnErr case codes.Unavailable: return LogApiConnErr } } if errors.Is(err, context.DeadlineExceeded) { return LogApiConnErr } return err } return nil } func runMonitoringCheck(logger logs.StructuredLogger, resource resourcedetector.Resource) error { ctx := context.Background() // New Monitoring Client monClient, err := monitoring.NewMetricClient(ctx) if err != nil { return err } defer monClient.Close() logger.Infof("monitoring client was created successfully") if err := monitoringPing(ctx, *monClient, resource); err != nil { logger.Infof(err.Error()) var apiErr *apierror.APIError if errors.As(err, &apiErr) { switch apiErr.Reason() { case ServiceDisabled: return MonApiDisabledErr case AccessTokenScopeInsufficient: return MonApiScopeErr case IamPermissionDenied: return MonApiPermissionErr } switch apiErr.GRPCStatus().Code() { case codes.PermissionDenied: return MonApiPermissionErr case codes.Unauthenticated: return MonApiUnauthenticatedErr case codes.DeadlineExceeded: return MonApiConnErr case codes.Unavailable: return MonApiConnErr } } if errors.Is(err, context.DeadlineExceeded) { return MonApiConnErr } return err } return nil } type APICheck struct{} func (c APICheck) Name() string { return "API Check" } func (c APICheck) RunCheck(logger logs.StructuredLogger) error { resource, err := resourcedetector.GetResource() if err != nil { return fmt.Errorf("failed to detect the resource: %v", err) } monErr := runMonitoringCheck(logger, resource) logErr := runLoggingCheck(logger, resource) return errors.Join(monErr, logErr) }