app/housekeeping/TimeOutLongRunningBakes.scala (52 lines of code) (raw):
package housekeeping
import housekeeping.utils.{BakesRepo, PackerEC2Client}
import models.{Bake, BakeStatus}
import org.joda.time.DateTime
import org.quartz.{ScheduleBuilder, SimpleScheduleBuilder, Trigger}
import services.Loggable
// This house keeping job is to mitigate against Amigo bakes that have failed, but have not reported as such.
// As a result of this, the EC2 instance used for the bake would not be terminated, incurring unnecessary costs.
// The solution is to update the status (in the database) of running bakes that were launched over 2 hours ago to TimedOut
// and terminate the EC2 instance associated with them respectively.
class TimeOutLongRunningBakes(
bakesRepo: BakesRepo,
packerEC2Client: PackerEC2Client
) extends HousekeepingJob
with Loggable {
override val schedule: ScheduleBuilder[_ <: Trigger] =
SimpleScheduleBuilder.repeatMinutelyForever(20)
def getBakesToTimeOut(earliestStartedAt: DateTime): List[Bake.DbModel] =
bakesRepo.getBakes.filter { bake =>
TimeOutLongRunningBakes.shouldTimeOutBake(bake, earliestStartedAt)
}
def runHouseKeeping(earliestStartedAt: DateTime): Unit = {
log.info("scanning for long running bakes to mark as timed out")
val bakesToTimeout = getBakesToTimeOut(earliestStartedAt)
log.info(
s"${bakesToTimeout.size} long running bake(s) found for marking as timed out"
)
bakesToTimeout.foreach { bake =>
packerEC2Client.getBakeInstance(bake.bakeId) match {
case None =>
log.warn(
s"unable to find instance associated with long running bake ${bake.bakeId}"
)
case Some(instance) =>
val instanceId = instance.getInstanceId
log.info(
s"terminating instance $instanceId associated with long running bake ${bake.bakeId}"
)
packerEC2Client.terminateEC2Instance(instanceId)
}
// Update the status to TimedOut, even if the respective EC2 instance can't be found.
// This is to handle cases where e.g. the instance was deleted manually.
log.info(s"marking long running bake $bake as timed out")
bakesRepo.updateStatusToTimedOutIfRunning(bake.bakeId)
}
}
override def housekeep(): Unit =
runHouseKeeping(earliestStartedAt = DateTime.now.minusHours(2))
}
object TimeOutLongRunningBakes {
def shouldTimeOutBake(
bake: Bake.DbModel,
earliestStartedAt: DateTime
): Boolean =
bake.status == BakeStatus.Running && bake.startedAt.isBefore(
earliestStartedAt
)
}