func()

in agent/app/agent.go [343:534]


func (agent *ecsAgent) doStart(containerChangeEventStream *eventstream.EventStream,
	credentialsManager credentials.Manager,
	state dockerstate.TaskEngineState,
	imageManager engine.ImageManager,
	client ecs.ECSClient,
	execCmdMgr execcmd.Manager) int {
	// check docker version >= 1.9.0, exit agent if older
	if exitcode, ok := agent.verifyRequiredDockerVersion(); !ok {
		return exitcode
	}

	// Conditionally create '/ecs' cgroup root
	if agent.cfg.TaskCPUMemLimit.Enabled() {
		if err := agent.cgroupInit(); err != nil {
			seelog.Criticalf("Unable to initialize cgroup root for ECS: %v", err)
			return exitcodes.ExitTerminal
		}
	}
	hostResources, err := client.GetHostResources()
	if err != nil {
		seelog.Critical("Unable to fetch host resources")
		return exitcodes.ExitError
	}
	gpuIDs := []string{}
	if agent.cfg.GPUSupportEnabled {
		err := agent.initializeGPUManager()
		if err != nil {
			seelog.Criticalf("Could not initialize Nvidia GPU Manager: %v", err)
			return exitcodes.ExitError
		}
		// Find GPUs (if any) on the instance
		platformDevices := agent.getPlatformDevices()
		for _, device := range platformDevices {
			if device.Type == types.PlatformDeviceTypeGpu {
				gpuIDs = append(gpuIDs, *device.Id)
			}
		}
	}

	hostResources["GPU"] = types.Resource{
		Name:           utils.Strptr("GPU"),
		Type:           utils.Strptr("STRINGSET"),
		StringSetValue: gpuIDs,
	}

	// Create the task engine
	taskEngine, currentEC2InstanceID, err := agent.newTaskEngine(
		containerChangeEventStream, credentialsManager, state, imageManager, hostResources, execCmdMgr,
		agent.serviceconnectManager, agent.daemonManagers)
	if err != nil {
		seelog.Criticalf("Unable to initialize new task engine: %v", err)
		return exitcodes.ExitTerminal
	}

	// Start termination handler in goroutine
	go agent.terminationHandler(state, agent.dataClient, taskEngine, agent.cancel)

	// If part of ASG, wait until instance is being set up to go in service before registering with cluster
	if agent.cfg.WarmPoolsSupport.Enabled() {
		err := agent.waitUntilInstanceInService(asgLifecyclePollWait, asgLifecyclePollMax, targetLifecycleMaxRetryCount)
		if err != nil && err.Error() != blackholed {
			seelog.Criticalf("Could not determine target lifecycle of instance: %v", err)
			return exitcodes.ExitTerminal
		}
	}

	loadPauseErr := agent.loadPauseContainer()
	if loadPauseErr != nil {
		seelog.Errorf("Failed to load pause container: %v", loadPauseErr)
	}

	var vpcSubnetAttributes []types.Attribute
	// Check if Task ENI is enabled
	if agent.cfg.TaskENIEnabled.Enabled() {
		// check pause container image load
		if loadPauseErr != nil {
			if loader.IsNoSuchFileError(loadPauseErr) || loader.IsUnsupportedPlatform(loadPauseErr) {
				return exitcodes.ExitTerminal
			} else {
				return exitcodes.ExitError
			}
		}

		err, terminal := agent.initializeTaskENIDependencies(state, taskEngine)
		switch err {
		case nil:
			// No error, we can proceed with the rest of initialization
			// Set vpc and subnet id attributes
			vpcSubnetAttributes = agent.constructVPCSubnetAttributes()
		case instanceNotLaunchedInVPCError:
			// We have ascertained that the EC2 Instance is not running in a VPC
			// No need to stop the ECS Agent in this case; all we need to do is
			// to not update the config to disable the TaskENIEnabled flag and
			// move on
			seelog.Warnf("Unable to detect VPC ID for the Instance, disabling Task ENI capability: %v", err)
			agent.cfg.TaskENIEnabled = config.BooleanDefaultFalse{Value: config.ExplicitlyDisabled}
		default:
			// Encountered an error initializing dependencies for dealing with
			// ENIs for Tasks. Exit with the appropriate error code
			seelog.Criticalf("Unable to initialize Task ENI dependencies: %v", err)
			if terminal {
				return exitcodes.ExitTerminal
			}
			return exitcodes.ExitError
		}
	} else if !agent.cfg.External.Enabled() {
		// Set VPC and Subnet IDs for the EC2 instance
		err, terminal := agent.setVPCSubnet()
		switch err {
		case nil:
			// No error so do nothing
		case instanceNotLaunchedInVPCError:
			// We have ascertained that the EC2 Instance is not running in a VPC
			// No need to stop the ECS Agent in this case
			logger.Info("Unable to detect VPC ID for the instance as it was not launched in VPC mode.")
		default:
			// Encountered an error initializing VPC ID and Subnet
			seelog.Criticalf("Unable to detect VPC ID and Subnet: %v", err)
			if terminal {
				return exitcodes.ExitTerminal
			}
			return exitcodes.ExitError
		}
	}

	// Register the container instance
	err = agent.registerContainerInstance(client, vpcSubnetAttributes)
	if err != nil {
		if isTerminal(err) {
			// On unrecoverable error codes, agent should terminally exit.
			logger.Critical("Agent will terminally exit, unable to register container instance:", logger.Fields{
				field.Error: err,
			})
			return exitcodes.ExitTerminal
		}
		// Other errors are considered recoverable and will be retried.
		return exitcodes.ExitError
	}

	// Load Managed Daemon images asynchronously
	agent.loadManagedDaemonImagesAsync(imageManager)

	scManager := agent.serviceconnectManager
	scManager.SetECSClient(client, agent.containerInstanceARN)
	if loaded, _ := scManager.IsLoaded(agent.dockerClient); loaded {
		imageManager.AddImageToCleanUpExclusionList(agent.serviceconnectManager.GetLoadedImageName())
	}

	// Add container instance ARN to metadata manager
	if agent.cfg.ContainerMetadataEnabled.Enabled() {
		agent.metadataManager.SetContainerInstanceARN(agent.containerInstanceARN)
		agent.metadataManager.SetAvailabilityZone(agent.availabilityZone)
		agent.metadataManager.SetHostPrivateIPv4Address(agent.getHostPrivateIPv4AddressFromEC2Metadata())
		agent.metadataManager.SetHostPublicIPv4Address(agent.getHostPublicIPv4AddressFromEC2Metadata())
	}

	if agent.cfg.Checkpoint.Enabled() {
		agent.saveMetadata(data.AgentVersionKey, version.Version)
		agent.saveMetadata(data.AvailabilityZoneKey, agent.availabilityZone)
		agent.saveMetadata(data.ClusterNameKey, agent.cfg.Cluster)
		agent.saveMetadata(data.ContainerInstanceARNKey, agent.containerInstanceARN)
		agent.saveMetadata(data.EC2InstanceIDKey, currentEC2InstanceID)
	}

	// now that we know the container instance ARN, we can create the doctor
	// and pass it on to ACS and TACS
	doctor, doctorCreateErr := agent.newDoctorWithHealthchecks(agent.cfg.Cluster, agent.containerInstanceARN)
	if doctorCreateErr != nil {
		seelog.Warnf("Error starting doctor, healthchecks won't be running: %v", err)
	} else {
		seelog.Debug("Doctor healthchecks set up properly.")
	}

	// Begin listening to the docker daemon and saving changes
	taskEngine.SetDataClient(agent.dataClient)
	imageManager.SetDataClient(agent.dataClient)
	taskEngine.MustInit(agent.ctx)

	// Start back ground routines, including the telemetry session
	deregisterInstanceEventStream := eventstream.NewEventStream(
		deregisterContainerInstanceEventStreamName, agent.ctx)
	deregisterInstanceEventStream.StartListening()
	taskHandler := eventhandler.NewTaskHandler(agent.ctx, agent.dataClient, state, client)
	attachmentEventHandler := eventhandler.NewAttachmentEventHandler(agent.ctx, agent.dataClient, client)
	agent.startAsyncRoutines(containerChangeEventStream, credentialsManager, imageManager,
		taskEngine, deregisterInstanceEventStream, client, taskHandler, attachmentEventHandler, state, doctor)
	// TODO add EBS watcher to async routines
	agent.startEBSWatcher(state, taskEngine, agent.dockerClient)
	// Start the acs session, which should block doStart
	return agent.startACSSession(credentialsManager, taskEngine,
		deregisterInstanceEventStream, client, state, taskHandler, doctor)
}