lib/ES/LinkCheck.pm (113 lines of code) (raw):
package ES::LinkCheck;
use strict;
use warnings;
use v5.10;
use ES::Util qw(run);
our $Link_Re = qr{
(?:https?://(?:www.)?elastic.co|[\s"])/guide/
([^"\#<>\s]+) # path
(?:\#([^"<>\s]+))? # fragment
}x;
#===================================
sub new {
#===================================
my $class = shift;
my $root = shift or die "No root dir specified";
bless { root => $root, seen => {}, bad => {} }, $class;
}
#===================================
sub check {
#===================================
my $self = shift;
my $dir = $self->root;
$self->root->recurse(
callback => sub {
my $item = shift;
if ( $item->is_dir ) {
return $item->PRUNE
if $item->basename eq 'images';
return;
}
$self->check_file($item)
if $item->basename =~ /\.html$/;
}
);
return $self->has_bad;
}
#===================================
sub check_file {
#===================================
my ( $self, $file, $extract, $file_descr ) = @_;
$file_descr ||= "$file";
my $source = $file->slurp( iomode => '<:encoding(UTF-8)' );
return $self->check_source( $source, $extract, $file_descr );
}
#===================================
sub check_source {
#===================================
my ( $self, $source, $extract, $file_descr ) = @_;
$extract ||= \&_link_extractor;
my $link_it = $extract->($source);
my $seen = $self->seen;
while ( my ( $path, $fragment ) = $link_it->() ) {
my $dest = $self->root->file($path);
unless ( $self->_file_exists( $dest, $path ) ) {
# Check if the path contains 'main' or 'master'
if ($path =~ /main|master/) {
# Warn (not error!) on broken main/master links
warn "Warning: $file_descr contains a broken link to $path\n";
} else {
# Add non-main/master broken links to the list of bad links
# A list length greater than zero will result in a build failure
$self->add_bad( $file_descr, $path );
}
next;
}
next unless $fragment;
unless ( $self->_fragment_exists( $dest, $path, $fragment ) ) {
# Check if the path contains 'main' or 'master'
if ($path =~ /main|master/) {
# Warn (not error!) on broken main/master links
warn "Warning: $file_descr contains a broken link to $path#$fragment\n";
} else {
# Add non-main/master broken links to the list of bad links
# A list length greater than zero will result in a build failure
$self->add_bad( $file_descr, "$path#$fragment" );
}
}
}
}
#===================================
sub _link_extractor {
#===================================
my $contents = shift;
return sub {
while ( $contents =~ m{$Link_Re}g ) {
return ( $1, $2 );
}
return;
};
}
#===================================
sub report {
#===================================
my $self = shift;
my $bad = $self->bad;
return "All cross-document links OK"
unless keys %$bad;
my @error = "Bad cross-document links:";
for my $file ( sort keys %$bad ) {
push @error, " $file contains broken links to:";
push @error, map {" - $_"} sort keys %{ $bad->{$file} };
}
return join "\n", @error, '';
}
#===================================
sub _file_exists {
#===================================
my ( $self, $file, $path ) = @_;
my $seen = $self->seen;
$seen->{$path} = -e $file
unless exists $seen->{$path};
return $seen->{$path};
}
#===================================
sub _fragment_exists {
#===================================
my ( $self, $file, $path, $frag ) = @_;
my $seen = $self->seen;
unless ( exists $seen->{"$path#$frag"} ) {
my $content = $file->slurp( iomode => '<:encoding(UTF-8)' );
$content =~ s{.+<!-- start body -->}{}s;
$content =~ s{<!-- end body -->.+}{}s;
while ( $content =~ m{<\w+ [^>]*id="([^"]+)"}g ) {
$seen->{"$path#$1"} = 1;
}
}
return $seen->{"$path#$frag"} ||= 0;
}
#===================================
sub add_bad {
#===================================
my ( $self, $file, $id ) = @_;
$self->bad->{$file}{$id} = 1;
}
#===================================
sub root { shift->{root} }
sub seen { shift->{seen} }
sub bad { shift->{bad} }
sub has_bad { !keys %{ shift->bad } }
#===================================
1