app/housekeeping/DeleteLongRunningEC2Instances.scala (59 lines of code) (raw):

package housekeeping import com.amazonaws.services.ec2.model.Instance import housekeeping.utils.{BakesRepo, PackerEC2Client} import models.BakeId import org.joda.time.DateTime import org.quartz.{ScheduleBuilder, SimpleScheduleBuilder, Trigger} import services.Loggable import scala.jdk.CollectionConverters._ // TimeOutLongRunningBakes was failing to delete some long running EC2 instances. // The issue was that the bake corresponding to the long running EC2 instance would have status Failed. // This means it wouldn't be considered as a long running bake (status not equal to Running) // and therefore it's corresponding EC2 instance wouldn't be considered for termination. // To mitigate against this, 'go from the other direction' i.e. query for long running EC2 instances, terminate them // and then set the status of the corresponding bake to timed out (if it's status is Running). class DeleteLongRunningEC2Instances( bakesRepo: BakesRepo, packerEC2Client: PackerEC2Client ) extends HousekeepingJob with Loggable { import DeleteLongRunningEC2Instances._ override val schedule: ScheduleBuilder[_ <: Trigger] = SimpleScheduleBuilder.repeatMinutelyForever(20) def getRunningPackerInstancesLaunchedBefore( dateTime: DateTime ): List[Instance] = packerEC2Client.getRunningPackerInstances().filter { instance => val launchTime = new DateTime(instance.getLaunchTime) launchTime.isBefore(dateTime) } def runHouseKeeping(earliestStartedAt: DateTime): Unit = { val instancesToTerminate = getRunningPackerInstancesLaunchedBefore( earliestStartedAt ) if (instancesToTerminate.isEmpty) { log.info("no instances found to terminate") } else { log.warn( s"instances found to terminate that were launched before $earliestStartedAt: " + s"${instancesToTerminate.map(_.getInstanceId).mkString(",")}" ) instancesToTerminate.foreach { instance => log.info(s"terminating instance ${instance.getInstanceId}") packerEC2Client.terminateEC2Instance(instance.getInstanceId) getBakeIdFromInstance(instance) match { case None => log.warn( s"unable to get bake id for instance ${instance.getInstanceId}" ) case Some(id) => log.info(s"updating status of $id to timed out") bakesRepo.updateStatusToTimedOutIfRunning(id) } } } } override def housekeep(): Unit = runHouseKeeping(DateTime.now.minusHours(1)) } object DeleteLongRunningEC2Instances { def getBakeIdFromInstance(instance: Instance): Option[BakeId] = { for { raw <- instance.getTags.asScala.find(_.getKey == "BakeId") id <- BakeId.fromString(raw.getValue).toOption } yield id } }