galog_cloudlogging.go (183 lines of code) (raw):
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package galog
import (
"context"
"errors"
"fmt"
"time"
"cloud.google.com/go/logging"
logpb "cloud.google.com/go/logging/apiv2/loggingpb"
"google.golang.org/api/option"
)
// CloudLoggingInitMode is the cloud logging backend initialization mode.
type CloudLoggingInitMode int
const (
// CloudLoggingInitModeLazy is the lazy initialization mode. In this mode the
// backend object is created but the cloud logging client and logger are not
// initialized until the first call to InitClient.
CloudLoggingInitModeLazy CloudLoggingInitMode = iota
// CloudLoggingInitModeActive is the active initialization mode. In this mode
// the backend object is created and the cloud logging client and logger are
// initialized immediately.
CloudLoggingInitModeActive
// DefaultCloudLoggingPingTimeout is the default timeout for pinging cloud
// logging.
DefaultCloudLoggingPingTimeout = time.Second
// DefaultClientErrorInterval is the default interval for logging the client
// errors.
DefaultClientErrorInterval = time.Minute * 20
// DefaultClientErrorLoggingDisabled is whether to enable the client error
// logging by default.
DefaultClientErrorLoggingDisabled = false
// defaultCloudLoggingQueueSize is the default queue size of the cloud logging
// backend implementation. In general writing to cloud logging should not
// require buffering as cloud logging is async and logging library takes care
// of flushing.
defaultCloudLoggingQueueSize = 100
)
var (
// errCloudLoggingNotInitialized is the error returned when the cloud
// logging backend is not yet initialized.
errCloudLoggingNotInitialized = errors.New("cloud logging logger is not yet fully initialized")
// errCloudLoggingAlreadyInitialized is the error returned when the InitClient
// is called and cloud logging backend is already initialized.
errCloudLoggingAlreadyInitialized = errors.New("cloud logging logger is already initialized")
)
// CloudBackend is a Backend implementation for cloud logging.
type CloudBackend struct {
// backendID is the cloud logging backend implementation's ID.
backendID string
// client is the cloud logging client pointer.
client *logging.Client
// logger is the cloud logging logger pointer.
logger *logging.Logger
// config is a pointer to the generic Config interface implementation.
config *backendConfig
// opts is the cloud logging options.
opts *CloudOptions
// periodicLogger is the periodic logger used to capture the client errors.
periodicLogger *periodicLogger
// disableClientErrorLogging is whether to disable the client error logging.
disableClientErrorLogging bool
}
// CloudOptions defines the cloud logging behavior and setup options.
type CloudOptions struct {
// Ident is the logger's ident, or the logger's name.
Ident string
// ProgramName is the program name, it's used on the logging payload.
ProgramName string
// ProgramVersion is the program version, it's used on the logging payload.
ProgramVersion string
// Project the gcp project name.
Project string
// Instance the running instance name.
Instance string
// UserAgent is the logging user agent option.
UserAgent string
// FlushCadence is how frequently we should push the log to the server.
FlushCadence time.Duration
// ClientErrorInterval is how frequently we should log the client errors. This
// defaults to [DefaultClientErrorInterval].
ClientErrorInterval time.Duration
// DisableClientErrorLogging is whether to disable the client error logging.
// If enabled, any errors from the cloud logging client will be logged
// periodically. Period is controlled by [ClientErrorInterval] option.
// Periodical logging is used to prevent spamming the error logs in case of a
// persistent error. By default this is enabled and controlled by
// [DefaultClientErrorLoggingDisabled]. This periodic logger would attempt to
// log at [WARN] level to every other configured backend (file, serial, etc).
DisableClientErrorLogging bool
// PingTimeout is the timeout for pinging Cloud Logging.
//
// This is required currently because the cloud logging flush operation hangs
// indefinitely when the server is unreachable due to having no external IP
// address or private Google access enabled. The timeout is used to attempt
// pinging the Cloud Logging server, and if it's not reachable, we skip the
// flush operation.
PingTimeout time.Duration
// WithoutAuthentication is whether to use authentication for cloud logging
// operations.
WithoutAuthentication bool
// ExtraLabels are extra labels to be added to the cloud logging entry.
ExtraLabels map[string]string
}
// CloudEntryPayload contains the data to be sent to cloud logging as the
// entry payload. It's translated from the log subsystem's Entry structure.
type CloudEntryPayload struct {
// Message is the formatted message.
Message string `json:"message"`
// LocalTimestamp is the unix timestamp got from the entry's When field.
LocalTimestamp string `json:"localTimestamp"`
// ProgName is the program name - or the binary name.
ProgName string `json:"progName,omitempty"`
// ProgVersion is the program version.
ProgVersion string `json:"progVersion,omitempty"`
}
// NewCloudBackend returns a Backend implementation that will log out to google
// cloud logging.
//
// Initialization Mode:
//
// If mode is InitModeLazy the backend object will be allocated and only the
// the basic elements will be initialized, all log entries will be queued
// until InitClient is called.
//
// Why lazy initialization is important/needed?
//
// The Cloud Logging depends on instance name that's mainly a data fed by - or
// accessed from - metadata server and depending on the application and
// environment the metadata server might not be available at the time of the
// application start - being only available later on. In such cases the early
// initializing and registering the backend will result into entries being
// queued and written/sent to the cloud logging later on - that way no logging
// entries are lost.
func NewCloudBackend(ctx context.Context, mode CloudLoggingInitMode, opts *CloudOptions) (*CloudBackend, error) {
res := &CloudBackend{
backendID: "log-backend,cloudlogging",
config: newBackendConfig(defaultFileQueueSize),
}
res.config.SetFormat(ErrorLevel, `{{.Message}}`)
res.config.SetFormat(DebugLevel, `{{.Message}}`)
if mode == CloudLoggingInitModeActive {
if err := res.InitClient(ctx, opts); err != nil {
return nil, fmt.Errorf("failed to initialize cloud logging client: %+v", err)
}
}
return res, nil
}
// InitClient initializes the cloud logging client and logger. If the backend
// was initialized in "active" mode this function is no-op.
func (cb *CloudBackend) InitClient(ctx context.Context, opts *CloudOptions) error {
if cb.client != nil {
return errCloudLoggingAlreadyInitialized
}
var clientOptions []option.ClientOption
if opts.UserAgent != "" {
clientOptions = append(clientOptions, option.WithUserAgent(opts.UserAgent))
}
if opts.WithoutAuthentication {
clientOptions = append(clientOptions, option.WithoutAuthentication())
}
// Set the default flush timeout if not provided.
if opts.PingTimeout == 0 {
opts.PingTimeout = DefaultCloudLoggingPingTimeout
}
cb.disableClientErrorLogging = opts.DisableClientErrorLogging
errTimeout := opts.ClientErrorInterval
if errTimeout == 0 {
errTimeout = DefaultClientErrorInterval
}
cb.periodicLogger = &periodicLogger{
interval: errTimeout,
}
client, err := logging.NewClient(ctx, opts.Project, clientOptions...)
if err != nil {
return fmt.Errorf("failed to initialize cloud logging client: %+v", err)
}
client.OnError = func(err error) {
if opts.DisableClientErrorLogging {
return
}
cb.periodicLogger.log(err)
}
labels := make(map[string]string)
for k, v := range opts.ExtraLabels {
labels[k] = v
}
if opts.Instance != "" {
labels["instance_name"] = opts.Instance
}
var loggerOptions []logging.LoggerOption
loggerOptions = append(loggerOptions, logging.CommonLabels(labels))
loggerOptions = append(loggerOptions, logging.DelayThreshold(opts.FlushCadence))
logger := client.Logger(opts.Ident, loggerOptions...)
cb.client = client
cb.logger = logger
cb.opts = opts
return nil
}
// Log prints sends the log entry to cloud logging.
func (cb *CloudBackend) Log(entry *LogEntry) error {
// If the logger is nil it means the backend is lazy initialized, we return
// an error to indicate that the backend is not yet initialized - the entries
// will be queued.
if cb.logger == nil {
return errCloudLoggingNotInitialized
}
levelMap := map[Level]logging.Severity{
FatalLevel: logging.Critical,
ErrorLevel: logging.Error,
WarningLevel: logging.Warning,
InfoLevel: logging.Info,
DebugLevel: logging.Debug,
}
severity := levelMap[entry.Level]
sourceLocation := &logpb.LogEntrySourceLocation{
File: entry.File,
Line: int64(entry.Line),
Function: entry.Function,
}
format := cb.config.Format(entry.Level)
message, err := entry.Format(format)
if err != nil {
return fmt.Errorf("failed to format log message: %+v", err)
}
payload := &CloudEntryPayload{
Message: message,
LocalTimestamp: entry.When.Format("2006-01-02T15:04:05.0000Z07:00"),
ProgName: cb.opts.ProgramName,
ProgVersion: cb.opts.ProgramVersion,
}
cb.logger.Log(logging.Entry{
Severity: severity,
SourceLocation: sourceLocation,
Payload: payload,
})
return nil
}
// ID returns the cloud logging backend implementation's ID.
func (cb *CloudBackend) ID() string {
return cb.backendID
}
// Config returns the configuration of the cloud logging backend.
func (cb *CloudBackend) Config() Config {
return cb.config
}
// Shutdown forces the cloud logging backend to flush its content and closes the
// logging client. This operation is skipped if Cloud Logging is unreachable.
func (cb *CloudBackend) Shutdown(ctx context.Context) error {
if cb.logger == nil {
return errCloudLoggingNotInitialized
}
pingTimeout, cancelFunc := context.WithTimeout(ctx, cb.opts.PingTimeout)
defer cancelFunc()
// Ensure we can reach Cloud Logging before attempting to flush.
if err := cb.client.Ping(pingTimeout); err != nil {
return fmt.Errorf("failed to reach cloud logging, skipping flush: %v", err)
}
// Closing the client will flush the logs.
if err := cb.client.Close(); err != nil {
return fmt.Errorf("failed to close cloud logging client: %v", err)
}
return nil
}
// periodicLogger is a helper struct used to log the client errors periodically.
// Its to prevent spamming the logs with client errors. If there is a persistent
// error, the client will call the error handler too frequently.
type periodicLogger struct {
// interval is the interval between logging the client errors.
interval time.Duration
// lastLog is the last time the client error was logged.
lastLog time.Time
// firstRunPassed is whether this is the first time the error handler was
// called.
firstRunPassed bool
}
// log logs the error if the interval has passed since the last log.
// Returns true if the error was logged, this is only used for testing.
func (pl *periodicLogger) log(err error) bool {
if !pl.firstRunPassed || time.Since(pl.lastLog) >= pl.interval {
pl.firstRunPassed = true
pl.lastLog = time.Now()
Warnf("Cloud Logging Client Error: %v", err)
return true
}
return false
}