pkg/monitor/scheduledevent/scheduled-event-monitor.go (121 lines of code) (raw):
// Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.
package scheduledevent
import (
"fmt"
"time"
"github.com/aws/aws-node-termination-handler/pkg/ec2metadata"
"github.com/aws/aws-node-termination-handler/pkg/monitor"
"github.com/aws/aws-node-termination-handler/pkg/node"
"github.com/rs/zerolog/log"
)
const (
// ScheduledEventMonitorKind is a const to define this monitor kind
ScheduledEventMonitorKind = "SCHEDULED_EVENT_MONITOR"
scheduledEventStateCompleted = "completed"
scheduledEventStateCanceled = "canceled"
scheduledEventDateFormat = "2 Jan 2006 15:04:05 GMT"
instanceStopCode = "instance-stop"
systemRebootCode = "system-reboot"
instanceRebootCode = "instance-reboot"
instanceRetirementCode = "instance-retirement"
)
// ScheduledEventMonitor is a struct definition that knows how to process scheduled events from IMDS
type ScheduledEventMonitor struct {
IMDS *ec2metadata.Service
InterruptionChan chan<- monitor.InterruptionEvent
CancelChan chan<- monitor.InterruptionEvent
NodeName string
}
// NewScheduledEventMonitor creates an instance of a scheduled event monitor
func NewScheduledEventMonitor(imds *ec2metadata.Service, interruptionChan chan<- monitor.InterruptionEvent, cancelChan chan<- monitor.InterruptionEvent, nodeName string) ScheduledEventMonitor {
return ScheduledEventMonitor{
IMDS: imds,
InterruptionChan: interruptionChan,
CancelChan: cancelChan,
NodeName: nodeName,
}
}
// Monitor continuously monitors metadata for scheduled events and sends interruption events to the passed in channel
func (m ScheduledEventMonitor) Monitor() error {
interruptionEvents, err := m.checkForScheduledEvents()
if err != nil {
return err
}
for _, interruptionEvent := range interruptionEvents {
if isStateCanceledOrCompleted(interruptionEvent.State) {
m.CancelChan <- interruptionEvent
} else {
m.InterruptionChan <- interruptionEvent
}
}
return nil
}
// Kind denotes the kind of monitor
func (m ScheduledEventMonitor) Kind() string {
return ScheduledEventMonitorKind
}
// checkForScheduledEvents Checks EC2 instance metadata for a scheduled event requiring a node drain
func (m ScheduledEventMonitor) checkForScheduledEvents() ([]monitor.InterruptionEvent, error) {
scheduledEvents, err := m.IMDS.GetScheduledMaintenanceEvents()
if err != nil {
return nil, fmt.Errorf("Unable to parse metadata response: %w", err)
}
events := make([]monitor.InterruptionEvent, 0)
for _, scheduledEvent := range scheduledEvents {
var preDrainFunc monitor.DrainTask
if isRestartEvent(scheduledEvent.Code) && !isStateCanceledOrCompleted(scheduledEvent.State) {
preDrainFunc = uncordonAfterRebootPreDrain
}
notBefore, err := time.Parse(scheduledEventDateFormat, scheduledEvent.NotBefore)
if err != nil {
return nil, fmt.Errorf("Unable to parse scheduled event start time: %w", err)
}
notAfter := notBefore
if len(scheduledEvent.NotAfter) > 0 {
notAfter, err = time.Parse(scheduledEventDateFormat, scheduledEvent.NotAfter)
if err != nil {
notAfter = notBefore
log.Err(err).Msg("Unable to parse scheduled event end time, continuing")
}
}
events = append(events, monitor.InterruptionEvent{
EventID: scheduledEvent.EventID,
Kind: monitor.ScheduledEventKind,
Monitor: ScheduledEventMonitorKind,
Description: fmt.Sprintf("%s will occur between %s and %s because %s\n", scheduledEvent.Code, scheduledEvent.NotBefore, scheduledEvent.NotAfter, scheduledEvent.Description),
State: scheduledEvent.State,
NodeName: m.NodeName,
StartTime: time.Now(),
EndTime: notAfter,
PreDrainTask: preDrainFunc,
})
}
return events, nil
}
func uncordonAfterRebootPreDrain(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
nodeName := interruptionEvent.NodeName
err := n.MarkWithEventID(nodeName, interruptionEvent.EventID)
if err != nil {
return fmt.Errorf("Unable to mark node with event ID: %w", err)
}
err = n.TaintScheduledMaintenance(nodeName, interruptionEvent.EventID)
if err != nil {
return fmt.Errorf("Unable to taint node with taint %s:%s: %w", node.ScheduledMaintenanceTaint, interruptionEvent.EventID, err)
}
// if the node is already marked as unschedulable, then don't do anything
unschedulable, err := n.IsUnschedulable(nodeName)
if err == nil && unschedulable {
log.Debug().Msg("Node is already marked unschedulable, not taking any action to add uncordon label.")
return nil
} else if err != nil {
return fmt.Errorf("Encountered an error while checking if the node is unschedulable. Not setting an uncordon label: %w", err)
}
err = n.MarkForUncordonAfterReboot(nodeName)
if err != nil {
return fmt.Errorf("Unable to mark the node for uncordon: %w", err)
}
log.Info().Msg("Successfully applied uncordon after reboot action label to node.")
return nil
}
func isStateCanceledOrCompleted(state string) bool {
return state == scheduledEventStateCanceled ||
state == scheduledEventStateCompleted
}
func isRestartEvent(maintenanceCode string) bool {
return maintenanceCode == instanceStopCode ||
maintenanceCode == instanceRetirementCode ||
maintenanceCode == instanceRebootCode ||
maintenanceCode == systemRebootCode
}