in updater/main.go [47:181]
func _main() error {
flag.Parse()
switch {
case *flagCluster == "":
flag.Usage()
return errors.New("cluster is required")
case *flagRegion == "":
flag.Usage()
return errors.New("region is required")
case *flagCheck == "":
flag.Usage()
return errors.New("check-document is required")
case *flagApply == "":
flag.Usage()
return errors.New("apply-document is required")
case *flagReboot == "":
flag.Usage()
return errors.New("reboot-document is required")
}
sess := session.Must(session.NewSession(&aws.Config{
Region: aws.String(*flagRegion),
}))
u := &updater{
cluster: *flagCluster,
checkDocument: *flagCheck,
applyDocument: *flagApply,
rebootDocument: *flagReboot,
ecs: ecs.New(sess, aws.NewConfig()),
ssm: ssm.New(sess, aws.NewConfig()),
ec2: ec2.New(sess, aws.NewConfig()),
}
family, err := taskDefFamily()
if err != nil {
log.Printf("Failed to parse updater task definition arn: %v", err)
log.Printf("Ignoring check for already running updater")
} else {
ok, err := u.alreadyRunning(family)
if err != nil {
return fmt.Errorf("Cannot determine running updater tasks, therefore stopping this run to avoid risk of multiple runs: %w", err)
}
if ok {
log.Printf("Another updater is running, therefore exiting this run.")
return nil
}
}
listedInstances, err := u.listContainerInstances()
if err != nil {
return fmt.Errorf("Failed to get container instances in cluster %q: %w", u.cluster, err)
}
if len(listedInstances) == 0 {
log.Print("Zero instances in the cluster")
return nil
}
bottlerocketInstances, err := u.filterBottlerocketInstances(listedInstances)
if err != nil {
return fmt.Errorf("Failed to filter Bottlerocket instances: %w", err)
}
if len(bottlerocketInstances) == 0 {
log.Printf("No Bottlerocket instances detected")
return nil
}
candidates, err := u.filterAvailableUpdates(bottlerocketInstances)
if err != nil {
return fmt.Errorf("Failed to check updates: %w", err)
}
if len(candidates) == 0 {
log.Printf("No instances to update")
return nil
}
log.Printf("Instances ready for update: %#q", candidates)
summary := make(map[string]string)
for _, i := range candidates {
eligible, err := u.eligible(i.containerInstanceID)
if err != nil {
log.Printf("Failed to determine eligibility for update of instance %#q: %v", i, err)
summary[i.instanceID] = fmt.Sprintf("Failed to determine eligibility for update: %v", err)
continue
}
if !eligible {
log.Printf("Instance %#q is not eligible for updates because it contains non-service task", i)
summary[i.instanceID] = "Instance is not eligible for updates because it contains non-service task(s)"
continue
}
log.Printf("Instance %q is eligible for update", i)
err = u.drainInstance(i.containerInstanceID)
if err != nil {
log.Printf("Failed to drain instance %#q: %v", i, err)
summary[i.instanceID] = fmt.Sprintf("Failed to drain: %v", err)
continue
}
log.Printf("Instance %#q successfully drained!", i)
updateErr := u.updateInstance(i)
activateErr := u.activateInstance(i.containerInstanceID)
if updateErr != nil && activateErr != nil {
log.Printf("Failed to update instance %#q: %v", i, updateErr)
return fmt.Errorf("instance %#q failed to re-activate after failing to update: %w", i, activateErr)
} else if updateErr != nil {
log.Printf("Failed to update instance %#q: %v", i, updateErr)
summary[i.instanceID] = fmt.Sprintf("Failed to update: %v", updateErr)
continue
} else if activateErr != nil {
return fmt.Errorf("instance %#q failed to re-activate after update: %w", i, activateErr)
}
// Reboots are not immediate, and initiating an SSM command races with reboot. Add some
// sleep time to allow the reboot to progress before we verify update.
time.Sleep(20 * time.Second)
ok, err := u.verifyUpdate(i)
if err != nil {
log.Printf("Failed to verify update for instance %#q: %v", i, err)
}
if !ok {
log.Printf("Update failed for instance %#q", i)
summary[i.instanceID] = "Update failed"
} else {
log.Printf("Instance %#q updated successfully!", i)
summary[i.instanceID] = "Instance updated successfully"
}
}
log.Printf("After action summary:")
for k, v := range summary {
log.Printf("%s: %s", k, v)
}
log.Printf("Update operations complete!")
return nil
}