agent/hibernation/hibernation.go (115 lines of code) (raw):

// Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may not // use this file except in compliance with the License. A copy of the // License is located at // // http://aws.amazon.com/apache2.0/ // // or in the "license" file accompanying this file. This file is distributed // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing // permissions and limitations under the License. // Package hibernation is responsible for the agent in hibernate mode. // It depends on health pings in an exponential backoff to check if the agent needs // to move to active mode. package hibernation import ( "sync" "time" "github.com/aws/amazon-ssm-agent/agent/context" "github.com/aws/amazon-ssm-agent/agent/health" "github.com/aws/amazon-ssm-agent/agent/log/logger" "github.com/carlescere/scheduler" "github.com/cihub/seelog" ) // IHibernate holds information about the current agent state type IHibernate interface { ExecuteHibernation(context context.T) health.AgentState } type Hibernate struct { sync.RWMutex currentMode health.AgentState healthModule health.IHealthCheck hibernateJob *scheduler.Job currentPingInterval int maxInterval int scheduleBackOff func(m *Hibernate) schedulePing func(m *Hibernate) seelogger seelog.LoggerInterface isLogged bool done chan struct{} } // modeChan is a channel that tracks the status of the agent var modeChan = make(chan health.AgentState, 10) const ( hibernateLogFile = "hibernate.log" secondsInMinute = 60 multiplier = 2 initialPingRate = 5 * secondsInMinute // Seconds backoffRate = 3 ) // NewHibernateMode creates an object of type NewHibernateMode func NewHibernateMode(healthModule health.IHealthCheck, context context.T) *Hibernate { context.Log().Debug("Initializing agent hibernate mode. Switching log to minimal logging...") maxBackoffInterval := context.AppConfig().Ssm.HibernationMaxBackoffIntervalMinutes * secondsInMinute return &Hibernate{ healthModule: healthModule, currentMode: health.Passive, isLogged: false, currentPingInterval: initialPingRate, maxInterval: maxBackoffInterval, scheduleBackOff: scheduleBackOffStrategy, schedulePing: scheduleEmptyHealthPing, done: make(chan struct{}), } } // ExecuteHibernation Starts the hibernate mode by blocking agent start and by scheduling health pings func (m *Hibernate) ExecuteHibernation(context context.T) health.AgentState { m.seelogger = logger.GetLogger(context.Log(), getHibernateSeelogConfig()) next := time.Duration(initialPingRate) * time.Second m.seelogger.Info("Agent is in hibernate mode. Reducing logging. Logging will be reduced to one log per backoff period") // Wait for initial ping time and then schedule health pings <-time.After(next) m.scheduleBackOff(m) for { status := <-modeChan if status == health.Active { // Agent mode is now active. Agent can start. Exit loop close(m.done) m.stopEmptyPing() m.seelogger.Close() return status // returning status for testing purposes. } } } func (m *Hibernate) getCurrentPingInterval() int { m.RLock() defer m.RUnlock() return m.currentPingInterval } func (m *Hibernate) setCurrentPingInterval(interval int) { m.Lock() defer m.Unlock() m.currentPingInterval = interval } func (m *Hibernate) healthCheck() { status, err := m.healthModule.GetAgentState() if err != nil && !m.isLogged { m.seelogger.Errorf("Health ping failed with error - %v", err.Error()) m.isLogged = true } modeChan <- status } func (m *Hibernate) stopEmptyPing() { if m.hibernateJob != nil { m.hibernateJob.Quit <- true } } func scheduleEmptyHealthPing(m *Hibernate) { var err error if m.hibernateJob, err = scheduler.Every(m.getCurrentPingInterval()).Seconds().Run(m.healthCheck); err != nil { m.seelogger.Errorf("Unable to schedule health update. %v", err) } } func scheduleBackOffStrategy(m *Hibernate) { // Observe initial ping rate for an iteration instead of starting // directly from the backed off rate for the first iteration m.stopEmptyPing() m.schedulePing(m) backoffInterval := m.getCurrentPingInterval() * backoffRate next := time.Duration(backoffInterval) * time.Second m.seelogger.Infof("Backing off health check to every %v seconds for %v seconds.", m.getCurrentPingInterval(), backoffInterval) select { case <-m.done: return case <-time.After(next): // Once backoff reaches the max interval, the scheduler will continue to schedule ping sets at max interval. // This ensure a very low amount of logging remains in the hibernate.log currentInterval := m.getCurrentPingInterval() if currentInterval < m.maxInterval { nextInterval := multiplier * currentInterval if nextInterval > m.maxInterval { nextInterval = m.maxInterval } m.setCurrentPingInterval(nextInterval) } m.isLogged = false go m.scheduleBackOff(m) } }