def reduceLoadByDiscardingOldestContent()

in src/main/scala/ophan/google/indexing/observatory/AvailabilityUpdaterService.scala [68:81]


  def reduceLoadByDiscardingOldestContent(existingRecordsForUrlsInSitemap: Set[AvailabilityRecord])(using clock: Clock = systemUTC()): Set[AvailabilityRecord] = {
    if (existingRecordsForUrlsInSitemap.size < 10) existingRecordsForUrlsInSitemap
    else {
      val recencyThreshold = clock.instant().minus(MaxAgeOfUriToScan) // don't scan really old stuff

      // don't scan THE VERY EARLIEST items - who knows how long they had been published before we turned on scanning?
      val earliestItemsThreshold =
        existingRecordsForUrlsInSitemap.map(_.firstSeenInSitemap).minOption.map(_.plus(1, MINUTES))

      val timeThreshold = (Set(recencyThreshold) ++ earliestItemsThreshold).max

      existingRecordsForUrlsInSitemap.filter(_.firstSeenInSitemap > timeThreshold)
    }
  }