gce_workload_cert_refresh/main.go (200 lines of code) (raw):
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// gce_workload_cert_refresh downloads and rotates workload certificates for GCE VMs.
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/GoogleCloudPlatform/guest-agent/metadata"
"github.com/GoogleCloudPlatform/guest-logging-go/logger"
)
const (
// trustAnchorsKey endpoint contains a set of trusted certificates for peer X.509 certificate chain validation.
trustAnchorsKey = "instance/gce-workload-certificates/trust-anchors"
// workloadIdentitiesKey endpoint contains identities managed by the GCE control plane. This contains the X.509 certificate and the private key for the VM's trust domain.
workloadIdentitiesKey = "instance/gce-workload-certificates/workload-identities"
// configStatusKey contains status and any errors in the config values provided via the VM metadata.
configStatusKey = "instance/gce-workload-certificates/config-status"
// enableWorkloadCertsKey is set to true as custom metadata to enable automatic provisioning of credentials.
enableWorkloadCertsKey = "instance/attributes/enable-workload-certificate"
// contentDirPrefix is used as prefx to create certificate directories on refresh as contentDirPrefix-<time>.
contentDirPrefix = "/run/secrets/workload-spiffe-contents"
// tempSymlinkPrefix is used as prefix to create temporary symlinks on refresh as tempSymlinkPrefix-<time> to content directories.
tempSymlinkPrefix = "/run/secrets/workload-spiffe-symlink"
// symlink points to the directory with current GCE workload certificates and is always expected to be present.
symlink = "/run/secrets/workload-spiffe-credentials"
)
var (
// mdsClient is the client used to query Metadata server.
mdsClient metadata.MDSClientInterface
programName = path.Base(os.Args[0])
// timeNow returns current time, defining as variable allows the time to be stubbed during testing.
timeNow = func() string { return time.Now().Format(time.RFC3339) }
)
func init() {
mdsClient = metadata.New()
}
func logFormat(e logger.LogEntry) string {
now := time.Now().Format("2006/01/02 15:04:05")
return fmt.Sprintf("%s: %s", now, e.Message)
}
// isEnabled returns true only if enable-workload-certificate metadata attribute is present and set to true.
func isEnabled(ctx context.Context) bool {
resp, err := getMetadata(ctx, enableWorkloadCertsKey)
if err != nil {
logger.Debugf("Failed to get %q from MDS with error: %v", enableWorkloadCertsKey, err)
return false
}
return bytes.EqualFold(resp, []byte("true"))
}
func getMetadata(ctx context.Context, key string) ([]byte, error) {
// GCE Workload Certificate endpoints return 412 Precondition failed if the VM was
// never configured with valid config values at least once. Without valid config
// values GCE cannot provision the workload certificates.
resp, err := mdsClient.GetKey(ctx, key, nil)
if err != nil {
return nil, fmt.Errorf("failed to GET %q from MDS with error: %w", key, err)
}
return []byte(resp), nil
}
/*
metadata key instance/gce-workload-certificates/workload-identities
MANAGED_WORKLOAD_IDENTITY_SPIFFE is of the format:
spiffe://POOL_ID.global.PROJECT_NUMBER.workload.id.goog/ns/NAMESPACE_ID/sa/MANAGED_IDENTITY_ID
{
"status": "OK", // Status of the response,
"workloadCredentials": { // Credentials for the VM's trust domains
"MANAGED_WORKLOAD_IDENTITY_SPIFFE": {
"certificatePem": "-----BEGIN CERTIFICATE-----datahere-----END CERTIFICATE-----",
"privateKeyPem": "-----BEGIN PRIVATE KEY-----datahere-----END PRIVATE KEY-----"
}
}
}
*/
// WorkloadCredential represents Workload Credentials in metadata.
type WorkloadCredential struct {
CertificatePem string `json:"certificatePem"`
PrivateKeyPem string `json:"privateKeyPem"`
}
// WorkloadIdentities represents Workload Identities in metadata.
type WorkloadIdentities struct {
Status string `json:"status"`
WorkloadCredentials map[string]WorkloadCredential `json:"workloadCredentials"`
}
/*
metadata key instance/gce-workload-certificates/trust-anchors
{
"status": "<status string>" // Status of the response,
"trustAnchors": { // Trust bundle for the VM's trust domains
"PEER_SPIFFE_TRUST_DOMAIN_1": {
"trustAnchorsPem" : "<Trust bundle containing the X.509 roots certificates>",
},
"PEER_SPIFFE_TRUST_DOMAIN_2": {
"trustAnchorsPem" : "<Trust bundle containing the X.509 roots certificates>",
}
}
}
*/
// TrustAnchor represents one or more certificates in an arbitrary order in the metadata.
type TrustAnchor struct {
TrustAnchorsPem string `json:"trustAnchorsPem"`
}
// WorkloadTrustedAnchors represents Workload Trusted Root Certs in metadata.
type WorkloadTrustedAnchors struct {
Status string `json:"status"`
TrustAnchors map[string]TrustAnchor `json:"trustAnchors"`
}
// outputOpts is a struct for output directory name and symlink templates.
type outputOpts struct {
contentDirPrefix, tempSymlinkPrefix, symlink string
}
func main() {
ctx := context.Background()
opts := logger.LogOpts{
LoggerName: programName,
FormatFunction: logFormat,
// No need for syslog.
DisableLocalLogging: true,
}
createdByBytes, err := getMetadata(ctx, "/instance/attributes/created-by")
if err == nil {
opts.MIG = string(createdByBytes)
}
opts.Writers = []io.Writer{os.Stderr}
if err := logger.Init(ctx, opts); err != nil {
fmt.Printf("Error initializing logger: %v", err)
os.Exit(1)
}
// Try flushing logs before exiting, if not flushed logs could go missing.
defer func() {
logger.Infof("Done")
logger.Close()
}()
if !isEnabled(ctx) {
logger.Debugf("GCE Workload Certificate refresh is not enabled, skipping cert generation.")
return
}
out := outputOpts{contentDirPrefix, tempSymlinkPrefix, symlink}
if err := refreshCreds(ctx, out); err != nil {
logger.Fatalf("Error refreshCreds: %v", err.Error())
}
}
// findDomain finds the anchor matching with the domain from spiffeID.
// spiffeID is of the form -
// spiffe://POOL_ID.global.PROJECT_NUMBER.workload.id.goog/ns/NAMESPACE_ID/sa/MANAGED_IDENTITY_ID
// where domain is POOL_ID.global.PROJECT_NUMBER.workload.id.goog
// anchors is a map of various domains and their corresponding trust PEMs.
// However, if anchor map contains single entry it returns that without any check.
func findDomain(anchors map[string]TrustAnchor, spiffeID string) (string, error) {
c := len(anchors)
for k := range anchors {
if c == 1 {
return k, nil
}
if strings.Contains(spiffeID, k) {
return k, nil
}
}
return "", fmt.Errorf("no matching trust anchor found")
}
// writeTrustAnchors parses the input data, finds the domain from spiffeID and writes ca_certificate.pem
// in the destDir for that domain.
func writeTrustAnchors(wtrcsMd []byte, destDir, spiffeID string) error {
wtrcs := WorkloadTrustedAnchors{}
if err := json.Unmarshal(wtrcsMd, &wtrcs); err != nil {
return fmt.Errorf("error unmarshaling workload trusted root certs: %v", err)
}
// Currently there's only one trust anchor but there could be multipe trust anchors in future.
// In either case we want the trust anchor with domain matching with the one in SPIFFE ID.
domain, err := findDomain(wtrcs.TrustAnchors, spiffeID)
if err != nil {
return err
}
return os.WriteFile(fmt.Sprintf("%s/ca_certificates.pem", destDir), []byte(wtrcs.TrustAnchors[domain].TrustAnchorsPem), 0644)
}
// writeWorkloadIdentities parses the input data, writes the certificates.pem, private_key.pem files in the
// destDir, and returns the SPIFFE ID for which it wrote the certificates.
func writeWorkloadIdentities(destDir string, wisMd []byte) (string, error) {
var spiffeID string
wis := WorkloadIdentities{}
if err := json.Unmarshal(wisMd, &wis); err != nil {
return "", fmt.Errorf("error unmarshaling workload identities response: %w", err)
}
// Its guaranteed to have single entry in workload credentials map.
for k := range wis.WorkloadCredentials {
spiffeID = k
break
}
if err := os.WriteFile(filepath.Join(destDir, "certificates.pem"), []byte(wis.WorkloadCredentials[spiffeID].CertificatePem), 0644); err != nil {
return "", fmt.Errorf("error writing certificates.pem: %w", err)
}
if err := os.WriteFile(filepath.Join(destDir, "private_key.pem"), []byte(wis.WorkloadCredentials[spiffeID].PrivateKeyPem), 0644); err != nil {
return "", fmt.Errorf("error writing private_key.pem: %w", err)
}
return spiffeID, nil
}
func refreshCreds(ctx context.Context, opts outputOpts) error {
now := timeNow()
contentDir := fmt.Sprintf("%s-%s", opts.contentDirPrefix, now)
tempSymlink := fmt.Sprintf("%s-%s", opts.tempSymlinkPrefix, now)
// Get status first so it can be written even when other endpoints are empty.
certConfigStatus, err := getMetadata(ctx, configStatusKey)
if err != nil {
// Return success when certs are not configured to avoid unnecessary systemd failed units.
logger.Infof("Error getting config status, workload certificates may not be configured: %v", err)
return nil
}
logger.Infof("Creating timestamp contents dir %s", contentDir)
if err := os.MkdirAll(contentDir, 0755); err != nil {
return fmt.Errorf("error creating contents dir: %v", err)
}
// Write config_status first even if remaining endpoints are empty.
if err := os.WriteFile(filepath.Join(contentDir, "config_status"), certConfigStatus, 0644); err != nil {
return fmt.Errorf("error writing config_status: %v", err)
}
// Handles the edge case where the config values provided for the first time may be invalid. This ensures
// that the symlink directory always exists and contains the config_status to surface config errors to the VM.
if _, err := os.Stat(opts.symlink); os.IsNotExist(err) {
logger.Infof("Creating new symlink %s", symlink)
if err := os.Symlink(contentDir, opts.symlink); err != nil {
return fmt.Errorf("error creating symlink: %v", err)
}
}
// Now get the rest of the content.
wisMd, err := getMetadata(ctx, workloadIdentitiesKey)
if err != nil {
return fmt.Errorf("error getting workload-identities: %v", err)
}
spiffeID, err := writeWorkloadIdentities(contentDir, wisMd)
if err != nil {
return fmt.Errorf("failed to write workload identities with error: %w", err)
}
wtrcsMd, err := getMetadata(ctx, trustAnchorsKey)
if err != nil {
return fmt.Errorf("error getting workload-trust-anchors: %v", err)
}
if err := writeTrustAnchors(wtrcsMd, contentDir, spiffeID); err != nil {
return fmt.Errorf("failed to write trust anchors: %w", err)
}
if err := os.Symlink(contentDir, tempSymlink); err != nil {
return fmt.Errorf("error creating temporary link: %v", err)
}
oldTarget, err := os.Readlink(opts.symlink)
if err != nil {
logger.Infof("Error reading existing symlink: %v\n", err)
oldTarget = ""
}
// Only rotate on success of all steps above.
logger.Infof("Rotating symlink %s", opts.symlink)
if err := os.Rename(tempSymlink, opts.symlink); err != nil {
return fmt.Errorf("error rotating target link: %v", err)
}
// Clean up previous contents dir.
newTarget, err := os.Readlink(opts.symlink)
if err != nil {
return fmt.Errorf("error reading new symlink: %v, unable to remove old symlink target", err)
}
if oldTarget != newTarget {
logger.Infof("Removing old content dir %s", oldTarget)
if err := os.RemoveAll(oldTarget); err != nil {
return fmt.Errorf("failed to remove old symlink target: %v", err)
}
}
return nil
}