app/services/Agents.scala (210 lines of code) (raw):

package services import org.apache.pekko.actor.ActorSystem import config.{AMIableConfig, AmiableConfigProvider} import javax.inject.Inject import metrics.{CloudWatch, CloudWatchMetrics} import models.* import org.joda.time.DateTime import play.api.inject.ApplicationLifecycle import play.api.{Environment, Logging, Mode} import prism.{Prism, PrismLogic} import utils.Percentiles import java.util.concurrent.atomic.AtomicReference import scala.concurrent.duration.* import scala.concurrent.{ExecutionContext, Future} case class OldInstanceAccountHistory( date: DateTime, accountName: String, count: Int ) class Agents @Inject() ( amiableConfigProvider: AmiableConfigProvider, lifecycle: ApplicationLifecycle, system: ActorSystem, environment: Environment, cloudWatch: CloudWatch )(implicit exec: ExecutionContext) extends Logging { lazy implicit val conf: AMIableConfig = amiableConfigProvider.conf val refreshInterval = 5.minutes private val amisAgent: AtomicReference[Set[AMI]] = AtomicReference(Set.empty) private val ssasAgent: AtomicReference[Set[SSAA]] = AtomicReference(Set.empty) private val oldProdInstanceCountAgent: AtomicReference[Option[Int]] = AtomicReference(None) private val oldProdInstanceCountHistoryAgent : AtomicReference[List[(DateTime, Double)]] = AtomicReference(Nil) private val amisAgePercentilesAgent: AtomicReference[Option[Percentiles]] = AtomicReference(None) private val amisAgePercentile25thHistoryAgent : AtomicReference[List[(DateTime, Double)]] = AtomicReference(Nil) private val amisAgePercentile50thHistoryAgent : AtomicReference[List[(DateTime, Double)]] = AtomicReference(Nil) private val amisAgePercentile75thHistoryAgent : AtomicReference[List[(DateTime, Double)]] = AtomicReference(Nil) private val oldInstanceCountByAccountAgent : AtomicReference[List[OldInstanceAccountHistory]] = AtomicReference(Nil) def allAmis: Set[AMI] = amisAgent.get def allSSAs: Set[SSAA] = ssasAgent.get def oldProdInstanceCount: Option[Int] = oldProdInstanceCountAgent.get def oldProdInstanceCountHistory: List[(DateTime, Double)] = oldProdInstanceCountHistoryAgent.get def amisAgePercentiles: Option[Percentiles] = amisAgePercentilesAgent.get def amisAgePercentile25thHistory: List[(DateTime, Double)] = amisAgePercentile25thHistoryAgent.get def amisAgePercentile50thHistory: List[(DateTime, Double)] = amisAgePercentile50thHistoryAgent.get def amisAgePercentile75thHistory: List[(DateTime, Double)] = amisAgePercentile75thHistoryAgent.get def oldInstanceCountByAccount: List[OldInstanceAccountHistory] = oldInstanceCountByAccountAgent.get if (environment.mode != Mode.Test) { refreshAmis() refreshSSAs() refreshInstancesInfo() refreshHistory() val prismDataSubscription = system.scheduler.scheduleAtFixedRate( initialDelay = 0.seconds, interval = refreshInterval ) { () => { logger.debug(s"Refreshing agents") refreshAmis() refreshSSAs() refreshInstancesInfo() } }(exec) val cloudwatchDataSubscription = system.scheduler.scheduleAtFixedRate( initialDelay = 0.seconds, interval = 1.hour ) { () => refreshHistory() }(exec) lifecycle.addStopHook { () => prismDataSubscription.cancel() cloudwatchDataSubscription.cancel() Future.successful(()) } } def refreshAmis(): Unit = { Prism .getAMIs() .fold( { err => logger.warn(s"Failed to update AMIs ${err.logString}") }, { amis => logger.debug(s"Loaded ${amis.size} AMIs") amisAgent.set(amis.toSet) } ) } def refreshSSAs(): Unit = { Prism .getInstances(SSAA()) .map(PrismLogic.instanceSSAAs) .fold( { err => logger.warn(s"Failed to update SSAs ${err.logString}") }, { ssaas => logger.debug(s"Loaded ${ssaas.size} SSAA combinations") ssasAgent.set(ssaas.toSet) } ) } def refreshProdInstancesInfo( instancesWithAmis: List[(Instance, Option[AMI])] ): Unit = { val prodInstancesWithAmis = instancesWithAmis.filter(_._1.stage.contains("PROD")) val oldProdInstances = PrismLogic.oldInstances(prodInstancesWithAmis) logger.debug( s"Found ${oldProdInstances.size} PROD instances running on an out-of-date AMI" ) oldProdInstanceCountAgent.set(Some(oldProdInstances.size)) val prodAgePercentiles = PrismLogic.instancesAmisAgePercentiles(prodInstancesWithAmis) logger.debug( s"Found AMIs age percentiles (p25: ${prodAgePercentiles.p25}, p50: ${prodAgePercentiles.p50}, p75: ${prodAgePercentiles.p75})" ) amisAgePercentilesAgent.set(Some(prodAgePercentiles)) } def refreshOldInstanceCountInfo( instancesWithAmis: List[(Instance, Option[AMI])] ): Unit = { for { accounts <- Prism.getAccounts } yield { val now = DateTime.now val oldInstancesForAccount = PrismLogic .oldInstances(instancesWithAmis) .groupBy(_.meta.origin.accountName.getOrElse("unknown-account")) val oldInstanceCountsByAccount = accounts.map(account => { val numberOfOldInstancesForAccount = oldInstancesForAccount.getOrElse(account.accountName, List()).length OldInstanceAccountHistory( now, account.accountName, numberOfOldInstancesForAccount ) }) oldInstanceCountByAccountAgent.set(oldInstanceCountsByAccount) } } def refreshInstancesInfo(): Unit = { Prism .instancesWithAmis(SSAA()) .fold( { err => logger.warn(s"Failed to update old instance count ${err.logString}") }, { instancesWithAmis => refreshProdInstancesInfo(instancesWithAmis) refreshOldInstanceCountInfo(instancesWithAmis) } ) } def refreshHistory(): Unit = { refreshHistory( oldProdInstanceCountHistoryAgent, CloudWatchMetrics.OldCount.name ) refreshHistory( amisAgePercentile25thHistoryAgent, CloudWatchMetrics.AmisAgePercentile25th.name ) refreshHistory( amisAgePercentile50thHistoryAgent, CloudWatchMetrics.AmisAgePercentile50th.name ) refreshHistory( amisAgePercentile75thHistoryAgent, CloudWatchMetrics.AmisAgePercentile75th.name ) } private def refreshHistory( agent: AtomicReference[List[(DateTime, Double)]], metricName: String ): Unit = { cloudWatch .get(amiableConfigProvider.cloudwatchReadNamespace, metricName) .fold( { err => logger.warn( s"Failed to update historical data for metric '$metricName': ${err.logString}" ) }, { dataOpt => dataOpt.fold { logger .warn(s"Failed to fetch historical data for metric '$metricName'") } { data => logger.debug( s"Found ${data.size} historical datapoints for metric '$metricName'" ) agent.set(data) } } ) } }