in agent/app/agent.go [343:534]
func (agent *ecsAgent) doStart(containerChangeEventStream *eventstream.EventStream,
credentialsManager credentials.Manager,
state dockerstate.TaskEngineState,
imageManager engine.ImageManager,
client ecs.ECSClient,
execCmdMgr execcmd.Manager) int {
// check docker version >= 1.9.0, exit agent if older
if exitcode, ok := agent.verifyRequiredDockerVersion(); !ok {
return exitcode
}
// Conditionally create '/ecs' cgroup root
if agent.cfg.TaskCPUMemLimit.Enabled() {
if err := agent.cgroupInit(); err != nil {
seelog.Criticalf("Unable to initialize cgroup root for ECS: %v", err)
return exitcodes.ExitTerminal
}
}
hostResources, err := client.GetHostResources()
if err != nil {
seelog.Critical("Unable to fetch host resources")
return exitcodes.ExitError
}
gpuIDs := []string{}
if agent.cfg.GPUSupportEnabled {
err := agent.initializeGPUManager()
if err != nil {
seelog.Criticalf("Could not initialize Nvidia GPU Manager: %v", err)
return exitcodes.ExitError
}
// Find GPUs (if any) on the instance
platformDevices := agent.getPlatformDevices()
for _, device := range platformDevices {
if device.Type == types.PlatformDeviceTypeGpu {
gpuIDs = append(gpuIDs, *device.Id)
}
}
}
hostResources["GPU"] = types.Resource{
Name: utils.Strptr("GPU"),
Type: utils.Strptr("STRINGSET"),
StringSetValue: gpuIDs,
}
// Create the task engine
taskEngine, currentEC2InstanceID, err := agent.newTaskEngine(
containerChangeEventStream, credentialsManager, state, imageManager, hostResources, execCmdMgr,
agent.serviceconnectManager, agent.daemonManagers)
if err != nil {
seelog.Criticalf("Unable to initialize new task engine: %v", err)
return exitcodes.ExitTerminal
}
// Start termination handler in goroutine
go agent.terminationHandler(state, agent.dataClient, taskEngine, agent.cancel)
// If part of ASG, wait until instance is being set up to go in service before registering with cluster
if agent.cfg.WarmPoolsSupport.Enabled() {
err := agent.waitUntilInstanceInService(asgLifecyclePollWait, asgLifecyclePollMax, targetLifecycleMaxRetryCount)
if err != nil && err.Error() != blackholed {
seelog.Criticalf("Could not determine target lifecycle of instance: %v", err)
return exitcodes.ExitTerminal
}
}
loadPauseErr := agent.loadPauseContainer()
if loadPauseErr != nil {
seelog.Errorf("Failed to load pause container: %v", loadPauseErr)
}
var vpcSubnetAttributes []types.Attribute
// Check if Task ENI is enabled
if agent.cfg.TaskENIEnabled.Enabled() {
// check pause container image load
if loadPauseErr != nil {
if loader.IsNoSuchFileError(loadPauseErr) || loader.IsUnsupportedPlatform(loadPauseErr) {
return exitcodes.ExitTerminal
} else {
return exitcodes.ExitError
}
}
err, terminal := agent.initializeTaskENIDependencies(state, taskEngine)
switch err {
case nil:
// No error, we can proceed with the rest of initialization
// Set vpc and subnet id attributes
vpcSubnetAttributes = agent.constructVPCSubnetAttributes()
case instanceNotLaunchedInVPCError:
// We have ascertained that the EC2 Instance is not running in a VPC
// No need to stop the ECS Agent in this case; all we need to do is
// to not update the config to disable the TaskENIEnabled flag and
// move on
seelog.Warnf("Unable to detect VPC ID for the Instance, disabling Task ENI capability: %v", err)
agent.cfg.TaskENIEnabled = config.BooleanDefaultFalse{Value: config.ExplicitlyDisabled}
default:
// Encountered an error initializing dependencies for dealing with
// ENIs for Tasks. Exit with the appropriate error code
seelog.Criticalf("Unable to initialize Task ENI dependencies: %v", err)
if terminal {
return exitcodes.ExitTerminal
}
return exitcodes.ExitError
}
} else if !agent.cfg.External.Enabled() {
// Set VPC and Subnet IDs for the EC2 instance
err, terminal := agent.setVPCSubnet()
switch err {
case nil:
// No error so do nothing
case instanceNotLaunchedInVPCError:
// We have ascertained that the EC2 Instance is not running in a VPC
// No need to stop the ECS Agent in this case
logger.Info("Unable to detect VPC ID for the instance as it was not launched in VPC mode.")
default:
// Encountered an error initializing VPC ID and Subnet
seelog.Criticalf("Unable to detect VPC ID and Subnet: %v", err)
if terminal {
return exitcodes.ExitTerminal
}
return exitcodes.ExitError
}
}
// Register the container instance
err = agent.registerContainerInstance(client, vpcSubnetAttributes)
if err != nil {
if isTerminal(err) {
// On unrecoverable error codes, agent should terminally exit.
logger.Critical("Agent will terminally exit, unable to register container instance:", logger.Fields{
field.Error: err,
})
return exitcodes.ExitTerminal
}
// Other errors are considered recoverable and will be retried.
return exitcodes.ExitError
}
// Load Managed Daemon images asynchronously
agent.loadManagedDaemonImagesAsync(imageManager)
scManager := agent.serviceconnectManager
scManager.SetECSClient(client, agent.containerInstanceARN)
if loaded, _ := scManager.IsLoaded(agent.dockerClient); loaded {
imageManager.AddImageToCleanUpExclusionList(agent.serviceconnectManager.GetLoadedImageName())
}
// Add container instance ARN to metadata manager
if agent.cfg.ContainerMetadataEnabled.Enabled() {
agent.metadataManager.SetContainerInstanceARN(agent.containerInstanceARN)
agent.metadataManager.SetAvailabilityZone(agent.availabilityZone)
agent.metadataManager.SetHostPrivateIPv4Address(agent.getHostPrivateIPv4AddressFromEC2Metadata())
agent.metadataManager.SetHostPublicIPv4Address(agent.getHostPublicIPv4AddressFromEC2Metadata())
}
if agent.cfg.Checkpoint.Enabled() {
agent.saveMetadata(data.AgentVersionKey, version.Version)
agent.saveMetadata(data.AvailabilityZoneKey, agent.availabilityZone)
agent.saveMetadata(data.ClusterNameKey, agent.cfg.Cluster)
agent.saveMetadata(data.ContainerInstanceARNKey, agent.containerInstanceARN)
agent.saveMetadata(data.EC2InstanceIDKey, currentEC2InstanceID)
}
// now that we know the container instance ARN, we can create the doctor
// and pass it on to ACS and TACS
doctor, doctorCreateErr := agent.newDoctorWithHealthchecks(agent.cfg.Cluster, agent.containerInstanceARN)
if doctorCreateErr != nil {
seelog.Warnf("Error starting doctor, healthchecks won't be running: %v", err)
} else {
seelog.Debug("Doctor healthchecks set up properly.")
}
// Begin listening to the docker daemon and saving changes
taskEngine.SetDataClient(agent.dataClient)
imageManager.SetDataClient(agent.dataClient)
taskEngine.MustInit(agent.ctx)
// Start back ground routines, including the telemetry session
deregisterInstanceEventStream := eventstream.NewEventStream(
deregisterContainerInstanceEventStreamName, agent.ctx)
deregisterInstanceEventStream.StartListening()
taskHandler := eventhandler.NewTaskHandler(agent.ctx, agent.dataClient, state, client)
attachmentEventHandler := eventhandler.NewAttachmentEventHandler(agent.ctx, agent.dataClient, client)
agent.startAsyncRoutines(containerChangeEventStream, credentialsManager, imageManager,
taskEngine, deregisterInstanceEventStream, client, taskHandler, attachmentEventHandler, state, doctor)
// TODO add EBS watcher to async routines
agent.startEBSWatcher(state, taskEngine, agent.dockerClient)
// Start the acs session, which should block doStart
return agent.startACSSession(credentialsManager, taskEngine,
deregisterInstanceEventStream, client, state, taskHandler, doctor)
}