sub build_sitemap()

in build_docs.pl [591:669]


sub build_sitemap {
#===================================
    my ( $dir, $changed ) = @_;

    # Build the sitemap by iterating over all of the toc and index files. Uses
    # the old sitemap to populate the dates for files that haven't changed.
    # Use "now" for files that have.

    my $sitemap = $dir->file('sitemap.xml');
    my $now = timestamp();
    my %dates;

    if ( -e $sitemap ) {
        my $doc = XML::LibXML->load_xml( location => $sitemap );
        for ($doc->firstChild->childNodes) {
            next unless $_->nodeName eq 'url';
            my $loc;
            my $lastmod;
            for ($_->childNodes) {
                $loc = $_->to_literal if $_->nodeName eq 'loc';
                $lastmod = $_->to_literal if $_->nodeName eq 'lastmod';
            }
            die "Dind't find <loc> in $_" unless $loc;
            die "Dind't find <lastmod> in $_" unless $lastmod;
            $loc =~ s|https://www.elastic.co/guide/||;
            $dates{$loc} = $lastmod;
        }
    }
    for ( split /\0/, $changed ) {
        next unless s|^html/||;
        $dates{$_} = $now;
    }

    # Build a list of the files we're going to index and sort it so entries in
    # the sitemap don't "jump around".
    my @files;
    $dir->recurse(
        callback => sub {
            my $item = shift;

            return unless $item->is_dir && $item->basename eq 'current';
            if ( -e $item->file('toc.html') ) {
                my $content = $item->file('toc.html')
                    ->slurp( iomode => '<:encoding(UTF-8)' );
                push @files, $item->file($_)
                    for ( $content =~ /href="([^"]+)"/g );
            }
            elsif ( -e $item->file('index.html') ) {
                push @files, $item->file('index.html');
            }
            return $item->PRUNE;
        }
    );
    @files = sort @files;

    open my $fh, '>', $sitemap or die "Couldn't create $sitemap: $!";
    say $fh <<SITEMAP_START;
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
SITEMAP_START

    for ( @files ) {
        my $loc  = $_->relative($dir);
        my $url  = "https://www.elastic.co/guide/$loc";
        my $date = $dates{$loc};
        die "Couldn't find a modified time for $loc" unless $date;
        say $fh <<ENTRY;
<url>
    <loc>$url</loc>
    <lastmod>$date</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.5</priority>
</url>
ENTRY
    }

    say $fh "</urlset>";
    close $fh or die "Couldn't close $sitemap: $!"
}