in src/main/scala/ophan/google/indexing/observatory/SitemapDownloader.scala [26:46]
def fetchSitemapEntriesFor(site: Site): Future[SitemapDownload] = {
val start = Instant.now()
Future.traverse(site.sitemaps) { sitemapUrl =>
client.sendAsync(HttpRequest.newBuilder(sitemapUrl).GET().build(), BodyHandlers.ofString()).asScala.map { response =>
logger.info(Map(
"site" -> site.url,
"sitemap.url" -> sitemapUrl,
"sitemap.response.statusCode" -> response.statusCode()
), s"Received HTTP ${response.statusCode()} response for $sitemapUrl sitemap")
val uris: Set[URI] = SitemapParser.parse(new ByteArrayInputStream(response.body.getBytes()), site.url)
logger.info(Map(
"site" -> site.url,
"sitemap.url" -> sitemapUrl,
"sitemap.uris.size" -> uris.size
), s"Found ${uris.size} uris in $sitemapUrl sitemap")
uris
}
}.map { results =>
SitemapDownload(site, start, results.flatten)
}
}