def fetchExistingRecordsFor()

in src/main/scala/ophan/google/indexing/observatory/DataStore.scala [25:53]


  def fetchExistingRecordsFor(uris: Set[URI]): Future[Set[AvailabilityRecord]] = scanamoAsync.exec(
    table.getAll(Field.Uri in uris)
  ).map(_.flatMap(_.toOption))


  /**
   * When we initially store an availability record in the DynamoDB table, we don't store anything about its
   * availability, just its URL, whether it redirects, and the time we first have seen this url.
   */
  def storeNewRecordsFor(sitemapDownload: SitemapDownload, resolutionsForUrisNotSeenBefore: Set[Resolution]): Future[Unit] = {
    def logContextFor[R <: Resolution](fieldSuffix: String, resolutions: Set[R]): Map[String, _] =
      contextSampleOf(s"sitemap.uris.fresh.$fieldSuffix", resolutions.map(_.redirectPath.originalUri))

    val resolvedNewUris = resolutionsForUrisNotSeenBefore.collect { case r: Resolved => r }
    logger.info(Map(
      "site" -> sitemapDownload.site.url,
      "sitemap.uris.all" -> sitemapDownload.allUris.size,
    ) ++ logContextFor("unresolved", resolutionsForUrisNotSeenBefore.collect { case u: Unresolved => u})
      ++ logContextFor("resolved", resolvedNewUris)
      ++ logContextFor("resolved.notOK", resolvedNewUris.filter(!_.conclusion.isOk))
      ++ logContextFor("resolved.redirecting", resolvedNewUris.filter(_.redirectPath.doesRedirect)),
      s"Storing ${resolvedNewUris.size} new resolved uris for ${sitemapDownload.site.url}")

    if (resolvedNewUris.isEmpty) Future.successful(()) else {
      scanamoAsync.exec(
        table.putAll(resolvedNewUris.map { resolved => AvailabilityRecord(resolved, sitemapDownload.timestamp) })
      )
    }
  }