in src/main/scala/ophan/google/indexing/observatory/AvailabilityUpdaterService.scala [68:81]
def reduceLoadByDiscardingOldestContent(existingRecordsForUrlsInSitemap: Set[AvailabilityRecord])(using clock: Clock = systemUTC()): Set[AvailabilityRecord] = {
if (existingRecordsForUrlsInSitemap.size < 10) existingRecordsForUrlsInSitemap
else {
val recencyThreshold = clock.instant().minus(MaxAgeOfUriToScan) // don't scan really old stuff
// don't scan THE VERY EARLIEST items - who knows how long they had been published before we turned on scanning?
val earliestItemsThreshold =
existingRecordsForUrlsInSitemap.map(_.firstSeenInSitemap).minOption.map(_.plus(1, MINUTES))
val timeThreshold = (Set(recencyThreshold) ++ earliestItemsThreshold).max
existingRecordsForUrlsInSitemap.filter(_.firstSeenInSitemap > timeThreshold)
}
}