in src/main/scala/ophan/google/indexing/observatory/AvailabilityUpdaterService.scala [83:104]
def mostUrgent(existingRecords: Set[AvailabilityRecord])(using site: Site): Seq[AvailabilityRecord] = {
val recordsNeedingCheck = reduceLoadByDiscardingOldestContent(existingRecords).filter(_.needsCheckingNow())
val (neverScannedRecords, missingTimesAndRecords) =
recordsNeedingCheck.map(record => record.missing.toRight(record).map(_ -> record)).toSeq.separate
val mostUrgentUrisAlreadyRecordedAsMissingFromGoogle = missingTimesAndRecords.sortBy(_._1).map(_._2)
val urisMostRecentlyArrivedInSitemapNotYetScanned = neverScannedRecords.sortBy(_.firstSeenInSitemap).reverse
val recordsToCheck =
(mostUrgentUrisAlreadyRecordedAsMissingFromGoogle.take(5) ++ urisMostRecentlyArrivedInSitemapNotYetScanned).take(5)
logger.info(Map(
"site" -> site.url,
) ++ contextSampleOf("uris.existingRecords", existingRecords.map(_.uri))
++ contextSampleOf("uris.recordsNeedingCheck", recordsNeedingCheck.map(_.uri))
++ contextSampleOf("uris.mostUrgent.alreadyRecordedAsMissingFromGoogle", mostUrgentUrisAlreadyRecordedAsMissingFromGoogle.map(_.uri))
++ contextSampleOf("uris.mostUrgent.selectedForCheck", recordsToCheck.map(_.uri)),
s"Identified most urgent records")
recordsToCheck
}