in build_docs.pl [591:669]
sub build_sitemap {
#===================================
my ( $dir, $changed ) = @_;
# Build the sitemap by iterating over all of the toc and index files. Uses
# the old sitemap to populate the dates for files that haven't changed.
# Use "now" for files that have.
my $sitemap = $dir->file('sitemap.xml');
my $now = timestamp();
my %dates;
if ( -e $sitemap ) {
my $doc = XML::LibXML->load_xml( location => $sitemap );
for ($doc->firstChild->childNodes) {
next unless $_->nodeName eq 'url';
my $loc;
my $lastmod;
for ($_->childNodes) {
$loc = $_->to_literal if $_->nodeName eq 'loc';
$lastmod = $_->to_literal if $_->nodeName eq 'lastmod';
}
die "Dind't find <loc> in $_" unless $loc;
die "Dind't find <lastmod> in $_" unless $lastmod;
$loc =~ s|https://www.elastic.co/guide/||;
$dates{$loc} = $lastmod;
}
}
for ( split /\0/, $changed ) {
next unless s|^html/||;
$dates{$_} = $now;
}
# Build a list of the files we're going to index and sort it so entries in
# the sitemap don't "jump around".
my @files;
$dir->recurse(
callback => sub {
my $item = shift;
return unless $item->is_dir && $item->basename eq 'current';
if ( -e $item->file('toc.html') ) {
my $content = $item->file('toc.html')
->slurp( iomode => '<:encoding(UTF-8)' );
push @files, $item->file($_)
for ( $content =~ /href="([^"]+)"/g );
}
elsif ( -e $item->file('index.html') ) {
push @files, $item->file('index.html');
}
return $item->PRUNE;
}
);
@files = sort @files;
open my $fh, '>', $sitemap or die "Couldn't create $sitemap: $!";
say $fh <<SITEMAP_START;
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
SITEMAP_START
for ( @files ) {
my $loc = $_->relative($dir);
my $url = "https://www.elastic.co/guide/$loc";
my $date = $dates{$loc};
die "Couldn't find a modified time for $loc" unless $date;
say $fh <<ENTRY;
<url>
<loc>$url</loc>
<lastmod>$date</lastmod>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
</url>
ENTRY
}
say $fh "</urlset>";
close $fh or die "Couldn't close $sitemap: $!"
}