google_guest_agent/agentcrypto/mtls_mds.go (217 lines of code) (raw):
// Copyright 2023 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// https://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package agentcrypto provides various cryptography related utility functions for Guest Agent.
package agentcrypto
import (
"context"
"fmt"
"path/filepath"
"sync/atomic"
"time"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/cfg"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/events"
mdsevent "github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/events/metadata"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/scheduler"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/uefi"
"github.com/GoogleCloudPlatform/guest-agent/metadata"
"github.com/GoogleCloudPlatform/guest-logging-go/logger"
"github.com/google/go-tpm-tools/client"
"github.com/google/go-tpm-tools/proto/tpm"
"github.com/google/go-tpm/legacy/tpm2"
"google.golang.org/protobuf/encoding/protojson"
pb "github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/agentcrypto/credentials"
)
const (
// UEFI variables are of format {VariableName}-{VendorGUID}
// googleGUID is Google's (vendors/variable owners) GUID used to prevent name collision with other vendors.
googleGUID = "a2858e46-a37f-456a-8c79-0c1fe48b65ff"
// googleRootCACertEFIVarName is predefined string part of the UEFI variable name that holds Root CA cert.
googleRootCACertEFIVarName = "InstanceRootCACertificate"
// clientCertsKey is the metadata server key at which client identity certificate is exposed.
clientCertsKey = "instance/credentials/mds-client-certificate"
// MTLSSchedulerID is the identifier used by job scheduler.
MTLSSchedulerID = "MTLS_MDS_Credential_Boostrapper"
// MTLSScheduleInterval is interval at which credential bootstrapper runs.
MTLSScheduleInterval = 48 * time.Hour
)
var (
googleRootCACertUEFIVar = uefi.VariableName{Name: googleRootCACertEFIVarName, GUID: googleGUID}
schedulerInstance *scheduler.Scheduler
)
// CredsJob implements job scheduler interface for generating/rotating credentials.
type CredsJob struct {
// client is the client used for communicating with MDS.
client metadata.MDSClientInterface
// rootCertsInstalled tracks if MDS root certificates were installed successfully
// atleast once. This allows to skip unnecessary work of refreshing root certs
// which are updated only when instance stops/starts which will restart agent
// as well. Allowing refresh on agent restarts regardless of instance reboots allows
// to fix any issues encountered with root certificate without having to restart
// compute instance.
rootCertsInstalled atomic.Bool
// useNativeStore tracks if native store should be used for current run or not.
useNativeStore atomic.Bool
// isEnabled tracks if the job is currently enabled or not.
isEnabled atomic.Bool
// failedPrevious tracks if MDS check has failed previously to not spam the log failures.
failedPrevious atomic.Bool
}
// New initializes new job.
func New() *CredsJob {
return &CredsJob{
client: metadata.New(),
}
}
// readRootCACert reads Root CA cert from UEFI variable.
func (j *CredsJob) readRootCACert(name uefi.VariableName) (*uefi.Variable, error) {
rootCACert, err := uefi.ReadVariable(name)
if err != nil {
return nil, fmt.Errorf("unable to read root CA cert file contents: %w", err)
}
if _, err := parseCertificate(rootCACert.Content); err != nil {
return nil, fmt.Errorf("unable to verify Root CA cert: %w", err)
}
logger.Infof("Successfully read root CA Cert from %+v", name)
return rootCACert, nil
}
// getClientCredentials fetches encrypted credentials from MDS and unmarshal it into GuestCredentialsResponse.
func (j *CredsJob) getClientCredentials(ctx context.Context) (*pb.GuestCredentialsResponse, error) {
creds, err := j.client.GetKey(ctx, clientCertsKey, nil)
if err != nil {
return nil, fmt.Errorf("unable to get client credentials from MDS: %w", err)
}
res := &pb.GuestCredentialsResponse{}
if err := protojson.Unmarshal([]byte(creds), res); err != nil {
return nil, fmt.Errorf("unable to unmarshal MDS response(%+v): %w", creds, err)
}
return res, nil
}
// extractKey decrypts the key cipher text (Key encryption Key encrypted Data Dencryption Key)
// through vTPM and returns the key (DEK) as plain text.
func (j *CredsJob) extractKey(importBlob *tpm.ImportBlob) ([]byte, error) {
rwc, err := tpm2.OpenTPM()
if err != nil {
return nil, fmt.Errorf("unable to open a channel to the TPM: %w", err)
}
defer rwc.Close()
ek, err := client.EndorsementKeyECC(rwc)
if err != nil {
return nil, fmt.Errorf("failed to load a key from TPM: %w", err)
}
defer ek.Close()
dek, err := ek.Import(importBlob)
if err != nil {
return nil, fmt.Errorf("failed to decrypt import blob: %w", err)
}
return dek, nil
}
// fetchClientCredentials fetches encrypted client credentials from MDS,
// extracts Key Encryption Key (KEK) from vTPM, decrypts the client credentials using KEK,
// and verifies that the certificate is signed by root CA.
func (j *CredsJob) fetchClientCredentials(ctx context.Context, rootCA string) ([]byte, error) {
resp, err := j.getClientCredentials(ctx)
if err != nil {
return []byte{}, err
}
dek, err := j.extractKey(resp.GetKeyImportBlob())
if err != nil {
return []byte{}, err
}
plaintext, err := decrypt(dek, resp.GetEncryptedCredentials(), nil)
if err != nil {
return []byte{}, err
}
if err := verifySign(plaintext, rootCA); err != nil {
return []byte{}, err
}
return plaintext, nil
}
// Run generates the required credentials for MTLS MDS workflow.
//
// 1. Fetches, verifies and writes Root CA cert from UEFI variable to /run/google-mds-mtls/root.crt
// 2. Fetches encrypted client credentials from MDS, decrypts it via vTPM and writes it to /run/google-mds-mtls/client.key
//
// Note that these credentials are at `C:\Program Files\Google\Compute Engine\certs\mds` on Windows.
// Additionally agent also generates a PFX file on windows that can be used invoking HTTPS endpoint.
//
// Example usage of these credentials to call HTTPS endpoint of MDS:
//
// curl --cacert /run/google-mds-mtls/root.crt -E /run/google-mds-mtls/client.key -H "MetadataFlavor: Google" https://169.254.169.254
//
// Windows example:
//
// $cert = Get-PfxCertificate -FilePath "C:\ProgramData\Google\Compute Engine\mds-mtls-client.key.pfx"
// or
// $cert = Get-ChildItem Cert:\LocalMachine\My | Where-Object { $_.Issuer -like "*google.internal*" }
// Invoke-RestMethod -Uri https://169.254.169.254 -Method Get -Headers @{"Metadata-Flavor"="Google"} -Certificate $cert
func (j *CredsJob) Run(ctx context.Context) (bool, error) {
if !j.rootCertsInstalled.Load() {
logger.Infof("Fetching Root CA cert...")
v, err := j.readRootCACert(googleRootCACertUEFIVar)
if err != nil {
return true, fmt.Errorf("failed to read Root CA cert with an error: %w", err)
}
if err := j.writeRootCACert(ctx, v.Content, filepath.Join(defaultCredsDir, rootCACertFileName)); err != nil {
return true, fmt.Errorf("failed to store Root CA cert with an error: %w", err)
}
}
// Set only when agent has atleast one successful run for installing root certs.
j.rootCertsInstalled.Store(true)
logger.Infof("Fetching client credentials...")
creds, err := j.fetchClientCredentials(ctx, filepath.Join(defaultCredsDir, rootCACertFileName))
if err != nil {
return true, fmt.Errorf("failed to generate client credentials with an error: %w", err)
}
if err := j.writeClientCredentials(creds, filepath.Join(defaultCredsDir, clientCredsFileName)); err != nil {
return true, fmt.Errorf("failed to store client credentials with an error: %w", err)
}
logger.Infof("Successfully bootstrapped MDS mTLS credentials")
return true, nil
}
// ID returns the ID for this job.
func (j *CredsJob) ID() string {
return MTLSSchedulerID
}
// Interval returns the interval at which job is executed.
func (j *CredsJob) Interval() (time.Duration, bool) {
return MTLSScheduleInterval, true
}
// ShouldEnable implements scheduler job interface which returns true if job
// should be scheduled based on previous cached [isEnabled] value.
func (j *CredsJob) ShouldEnable(ctx context.Context) bool {
return j.isEnabled.Load()
}
// checkUserSettings checks and stores user settings for job enablement and the use
// of OS Native store.
func (j *CredsJob) checkUserSettings(ctx context.Context, mds *metadata.Descriptor) {
logger.Debugf("Checking user settings for %s", j.ID())
j.useNativeStore.Store(j.shouldUseNativeStore(mds))
j.isEnabled.Store(j.shouldEnableJob(ctx, mds))
}
// shouldEnableJob returns true if MDS endpoint for fetching credentials is available on the VM
// and user has not disabled the bootstrapping via config file or Metadata.
// Used for identifying if we want schedule bootstrapping and enable MDS mTLS credential rotation.
func (j *CredsJob) shouldEnableJob(ctx context.Context, mds *metadata.Descriptor) bool {
var enable bool
if cfg.Get().MDS != nil {
enable = !cfg.Get().MDS.DisableHTTPSMdsSetup
logger.Debugf("Found instance config file attribute for enable credential refresher set to: %t", enable)
}
if mds.Project.Attributes.DisableHTTPSMdsSetup != nil {
enable = !*mds.Project.Attributes.DisableHTTPSMdsSetup
logger.Debugf("Found project level attribute for enable credential refresher set to: %t", enable)
}
if mds.Instance.Attributes.DisableHTTPSMdsSetup != nil {
enable = !*mds.Instance.Attributes.DisableHTTPSMdsSetup
logger.Debugf("Found instance level attribute for enable credential refresher set to: %t", enable)
}
if !enable {
// No need to make MDS call in case job is disabled by the user.
return false
}
_, err := j.client.GetKey(ctx, clientCertsKey, nil)
if err != nil {
// This error is logged only once to prevent raising unnecessary alerts. Repeated logging
// could be mistaken for a recurring issue, even if mTLS MDS is indeed not supported.
if !j.failedPrevious.Load() {
logger.Debugf("Skipping scheduling credential generation job, unable to reach client credentials endpoint(%s): %v\nNote that this does not impact any functionality and you might see this message if HTTPS endpoint isn't supported by the Metadata Server on your VM. Refer https://cloud.google.com/compute/docs/metadata/overview#https-mds for more details.", clientCertsKey, err)
j.failedPrevious.Store(true)
}
enable = false
} else {
j.failedPrevious.Store(false)
}
return enable
}
// shouldUseNativeStore checks if user has configured agent to use OS Native Store for
// storing credentials.
func (j *CredsJob) shouldUseNativeStore(mds *metadata.Descriptor) bool {
var useNative bool
if cfg.Get().MDS != nil {
useNative = cfg.Get().MDS.HTTPSMDSEnableNativeStore
logger.Debugf("Found instance config file attribute for use native store set to: %t", useNative)
}
if mds.Project.Attributes.HTTPSMDSEnableNativeStore != nil {
useNative = *mds.Project.Attributes.HTTPSMDSEnableNativeStore
logger.Debugf("Found project level attribute for use native store set to: %t", useNative)
}
if mds.Instance.Attributes.HTTPSMDSEnableNativeStore != nil {
useNative = *mds.Instance.Attributes.HTTPSMDSEnableNativeStore
logger.Debugf("Found instance level attribute for use native store set to: %t", useNative)
}
return useNative
}
// Init intializes the mds mtls credential bootstrapping job and subscribes to MDS
// long poll event. This allows handler to enable/disable the job based on MDS keys.
func Init(ctx context.Context) {
logger.Infof("Initializing MDS mTLS bootstrapping handler")
schedulerInstance = scheduler.Get()
job := New()
mds, err := job.client.Get(ctx)
ev := events.EventData{Data: mds, Error: err}
// First run should happen immediately, later it can handle based on MDS long poll event.
job.mdsSchedulerHandler(ctx, mdsevent.LongpollEvent, nil, &ev)
logger.Infof("Subscribing to mdsSchedulerHandler to listen %s", mdsevent.LongpollEvent)
events.Get().Subscribe(mdsevent.LongpollEvent, nil, job.mdsSchedulerHandler)
}
func (j *CredsJob) mdsSchedulerHandler(ctx context.Context, evType string, _ interface{}, evData *events.EventData) bool {
logger.Debugf("Running MDS mTLS scheduler handler callback")
if evData.Error != nil {
logger.Debugf("Not handling MDS mTLS scheduler handler, got an error from %s event: %v", evType, evData.Error)
return true
}
mds, ok := evData.Data.(*metadata.Descriptor)
if !ok {
logger.Errorf("Received invalid event data (%+v) of type (%T), ignoring this event and un-subscribing %s", evData.Data, evData.Data, evType)
return false
}
alreadyScheduled := schedulerInstance.IsScheduled(j.ID())
j.checkUserSettings(ctx, mds)
shouldSchedule := j.isEnabled.Load()
if !shouldSchedule && alreadyScheduled {
schedulerInstance.UnscheduleJob(j.ID())
return true
}
if shouldSchedule && !alreadyScheduled {
if err := schedulerInstance.ScheduleJob(ctx, j, true); err != nil {
logger.Errorf("Failed to schedule job %q: %v", j.ID(), err)
}
}
return true
}