entrypoint.go (119 lines of code) (raw):
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"
"github.com/GoogleCloudPlatform/run-gmp-sidecar/confgenerator"
)
// Create channel to listen for signals.
var signalChan chan (os.Signal) = make(chan os.Signal, 1)
var userConfigFile = "/etc/rungmp/config.yaml"
var otelConfigFile = "/run/rungmp/otel.yaml"
var configRefreshInterval = 20 * time.Second
var selfMetricsPort = 0
var livenessProbePort = 13133
var livenessProbePath = "/liveness"
var delayLivenessProbe = 5 * time.Second
func getRawUserConfig(userConfigFile string) (string, error) {
_, err := os.Stat(userConfigFile)
if err != nil {
if os.IsNotExist(err) {
return "", nil
}
return "", fmt.Errorf("failed to stat file %q: %v", userConfigFile, err)
}
data, err := ioutil.ReadFile(userConfigFile)
if err != nil {
return "", fmt.Errorf("failed to read file %q: %v", userConfigFile, err)
}
return string(data), nil
}
// Generate OTel config from RunMonitoring config. Returns an error if
// generation of OTel configs failed.
func generateOtelConfig(ctx context.Context, userConfigFile string) error {
// Pick up RunMonitoring configuration from mounted volume that is tied to
// secret manager. Translate it from RunMonitoring to OTel.
c, err := confgenerator.ReadConfigFromFile(ctx, userConfigFile)
if err != nil {
log.Fatal(err)
}
if selfMetricsPort == 0 {
selfMetricsPort, err = confgenerator.GetFreePort()
if err != nil {
return err
}
}
// Create the OTel config and write it to disk
otel, err := c.GenerateOtelConfig(ctx, selfMetricsPort)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(otelConfigFile), 0755); err != nil {
return fmt.Errorf("failed to create directory for %q: %v", otelConfigFile, err)
}
if err := ioutil.WriteFile(otelConfigFile, []byte(otel), 0644); err != nil {
return fmt.Errorf("failed to write file to %q: %v", otelConfigFile, err)
}
return nil
}
// The container is allocated CPU for the duration of the healthcheck. Delaying
// the response to this probe allows the container to complete telemetry flushes
// that may have been throttled.
//
// TODO(b/342463831): Use a more reliable way of checking if telemetry is being
// flushed instead of using a static sleep.
func healthcheckHandler(_ http.ResponseWriter, _ *http.Request) {
time.Sleep(delayLivenessProbe)
}
func main() {
// SIGINT handles Ctrl+C locally.
// SIGTERM handles Cloud Run termination signal.
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
ctx := context.Background()
lastRawConfig, err := getRawUserConfig(userConfigFile)
if err != nil {
log.Fatal(err)
}
// Generate the OTel config for the first time.
err = generateOtelConfig(ctx, userConfigFile)
if err != nil {
log.Fatal(err)
}
entrypointMux := http.NewServeMux()
entrypointMux.HandleFunc(livenessProbePath, healthcheckHandler)
go func() {
err := http.ListenAndServe(fmt.Sprintf(":%d", livenessProbePort), entrypointMux)
if err != nil && err != http.ErrServerClosed {
log.Fatal(err)
}
}()
// Spin up new-subprocess that runs the OTel collector and store the PID.
// This OTel collector should use the generated config.
var procAttr os.ProcAttr
procAttr.Files = []*os.File{nil, /* stdin is not needed for the collector */
os.Stdout, os.Stderr}
collectorProcess, err := os.StartProcess("./rungmpcol", []string{"./rungmpcol", "--config", otelConfigFile}, &procAttr)
if err != nil {
log.Fatal(err)
}
log.Printf("entrypoint: started OTel successfully")
refreshTicker := time.NewTicker(configRefreshInterval)
for {
select {
case <-refreshTicker.C:
rawConfig, err := getRawUserConfig(userConfigFile)
if err != nil {
log.Fatal(err)
}
// Check if we're using the default config. Only reload if something
// has changed since the last time we checked.
if rawConfig == lastRawConfig {
continue
}
// Something changed since the last time we checked the config.
err = generateOtelConfig(ctx, userConfigFile)
if err != nil {
log.Fatal(err)
}
lastRawConfig = rawConfig
// Signal the OTel collector to reload its config
collectorProcess.Signal(syscall.SIGHUP)
log.Println("entrypoint: reloaded OTel config")
case sig := <-signalChan:
// Wait for signals from Cloud Run. Signal the sub process appropriately
// after making relevant changes to the config and/or health signals.
// TODO(b/307317433): Consider having a timeout to shutdown the subprocess
// non-gracefully.
log.Printf("entrypoint: %s signal caught", sig)
collectorProcess.Signal(sig)
processState, err := collectorProcess.Wait()
if err != nil {
log.Fatalf(processState.String(), err)
}
log.Print("entrypoint: sidecar exited")
return
}
}
}