internal/ssm/daemon.go (103 lines of code) (raw):

package ssm import ( "context" "fmt" "os" "regexp" "time" "go.uber.org/zap" "github.com/aws/eks-hybrid/internal/api" "github.com/aws/eks-hybrid/internal/daemon" "github.com/aws/eks-hybrid/internal/system" ) var ( _ daemon.Daemon = &ssm{} SsmDaemonName = "amazon-ssm-agent" activationExpiredRegex = regexp.MustCompile(`.*ActivationExpired*`) invalidActivationRegex = regexp.MustCompile(`.*InvalidActivation*`) ) const ( defaultAWSConfigPath = "/root/.aws" awsCredentialsFilePath = defaultAWSConfigPath + "/credentials" eksHybridPath = "/eks-hybrid" symlinkedAWSConfigPath = eksHybridPath + "/.aws" ) type ssm struct { daemonManager daemon.DaemonManager nodeConfig *api.NodeConfig logger *zap.Logger } func NewSsmDaemon(daemonManager daemon.DaemonManager, cfg *api.NodeConfig, logger *zap.Logger) daemon.Daemon { setDaemonName() return &ssm{ daemonManager: daemonManager, nodeConfig: cfg, logger: logger, } } func (s *ssm) Configure() error { if err := s.registerMachine(s.nodeConfig); err != nil { if match := activationExpiredRegex.MatchString(err.Error()); match { return fmt.Errorf("SSM activation expired. Please use a valid activation") } else if match := invalidActivationRegex.MatchString(err.Error()); match { return fmt.Errorf("invalid SSM activation. Please use a valid activation code, activation id and region") } return err } return nil } func (s *ssm) EnsureRunning(ctx context.Context) error { err := s.daemonManager.EnableDaemon(SsmDaemonName) if err != nil { return err } restartCancel, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() s.logger.Info("Restarting SSM agent...") // When the restart operation fails, it's usually because there are many operations running // for the same service, and we get rate limited. That's why we use a big backoff time. if err := daemon.RetryOperation(restartCancel, s.daemonManager.RestartDaemon, SsmDaemonName, 20*time.Second); err != nil { return fmt.Errorf("restarting SSM agent: %w", err) } runningCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() s.logger.Info("Waiting for SSM agent to be running...") if err := daemon.WaitForStatus(runningCtx, s.logger, s.daemonManager, SsmDaemonName, daemon.DaemonStatusRunning, 5*time.Second); err != nil { return fmt.Errorf("waiting for SSM agent to be running: %w", err) } s.logger.Info("SSM agent is running") return nil } func (s *ssm) PostLaunch() error { if s.nodeConfig.Spec.Hybrid.EnableCredentialsFile { s.logger.Info("Creating symlink for AWS credentials", zap.String("Symbolic link path", symlinkedAWSConfigPath)) err := os.MkdirAll(eksHybridPath, 0o755) if err != nil { return fmt.Errorf("creating path: %v", err) } err = os.RemoveAll(symlinkedAWSConfigPath) if err != nil && !os.IsNotExist(err) { return fmt.Errorf("removing directory %s: %v", symlinkedAWSConfigPath, err) } err = os.Symlink(defaultAWSConfigPath, symlinkedAWSConfigPath) if err != nil { return fmt.Errorf("creating symlink: %v", err) } } return nil } // Stop stops the ssm unit only if it is loaded and running func (s *ssm) Stop() error { return s.daemonManager.StopDaemon(SsmDaemonName) } func (s *ssm) Name() string { return SsmDaemonName } func setDaemonName() { osToDaemonName := map[string]string{ system.UbuntuOsName: "snap.amazon-ssm-agent.amazon-ssm-agent", system.RhelOsName: "amazon-ssm-agent", system.AmazonOsName: "amazon-ssm-agent", } osName := system.GetOsName() if daemonName, ok := osToDaemonName[osName]; ok { SsmDaemonName = daemonName } }