lib/ES/LinkCheck.pm (113 lines of code) (raw):

package ES::LinkCheck; use strict; use warnings; use v5.10; use ES::Util qw(run); our $Link_Re = qr{ (?:https?://(?:www.)?elastic.co|[\s"])/guide/ ([^"\#<>\s]+) # path (?:\#([^"<>\s]+))? # fragment }x; #=================================== sub new { #=================================== my $class = shift; my $root = shift or die "No root dir specified"; bless { root => $root, seen => {}, bad => {} }, $class; } #=================================== sub check { #=================================== my $self = shift; my $dir = $self->root; $self->root->recurse( callback => sub { my $item = shift; if ( $item->is_dir ) { return $item->PRUNE if $item->basename eq 'images'; return; } $self->check_file($item) if $item->basename =~ /\.html$/; } ); return $self->has_bad; } #=================================== sub check_file { #=================================== my ( $self, $file, $extract, $file_descr ) = @_; $file_descr ||= "$file"; my $source = $file->slurp( iomode => '<:encoding(UTF-8)' ); return $self->check_source( $source, $extract, $file_descr ); } #=================================== sub check_source { #=================================== my ( $self, $source, $extract, $file_descr ) = @_; $extract ||= \&_link_extractor; my $link_it = $extract->($source); my $seen = $self->seen; while ( my ( $path, $fragment ) = $link_it->() ) { my $dest = $self->root->file($path); unless ( $self->_file_exists( $dest, $path ) ) { # Check if the path contains 'main' or 'master' if ($path =~ /main|master/) { # Warn (not error!) on broken main/master links warn "Warning: $file_descr contains a broken link to $path\n"; } else { # Add non-main/master broken links to the list of bad links # A list length greater than zero will result in a build failure $self->add_bad( $file_descr, $path ); } next; } next unless $fragment; unless ( $self->_fragment_exists( $dest, $path, $fragment ) ) { # Check if the path contains 'main' or 'master' if ($path =~ /main|master/) { # Warn (not error!) on broken main/master links warn "Warning: $file_descr contains a broken link to $path#$fragment\n"; } else { # Add non-main/master broken links to the list of bad links # A list length greater than zero will result in a build failure $self->add_bad( $file_descr, "$path#$fragment" ); } } } } #=================================== sub _link_extractor { #=================================== my $contents = shift; return sub { while ( $contents =~ m{$Link_Re}g ) { return ( $1, $2 ); } return; }; } #=================================== sub report { #=================================== my $self = shift; my $bad = $self->bad; return "All cross-document links OK" unless keys %$bad; my @error = "Bad cross-document links:"; for my $file ( sort keys %$bad ) { push @error, " $file contains broken links to:"; push @error, map {" - $_"} sort keys %{ $bad->{$file} }; } return join "\n", @error, ''; } #=================================== sub _file_exists { #=================================== my ( $self, $file, $path ) = @_; my $seen = $self->seen; $seen->{$path} = -e $file unless exists $seen->{$path}; return $seen->{$path}; } #=================================== sub _fragment_exists { #=================================== my ( $self, $file, $path, $frag ) = @_; my $seen = $self->seen; unless ( exists $seen->{"$path#$frag"} ) { my $content = $file->slurp( iomode => '<:encoding(UTF-8)' ); $content =~ s{.+<!-- start body -->}{}s; $content =~ s{<!-- end body -->.+}{}s; while ( $content =~ m{<\w+ [^>]*id="([^"]+)"}g ) { $seen->{"$path#$1"} = 1; } } return $seen->{"$path#$frag"} ||= 0; } #=================================== sub add_bad { #=================================== my ( $self, $file, $id ) = @_; $self->bad->{$file}{$id} = 1; } #=================================== sub root { shift->{root} } sub seen { shift->{seen} } sub bad { shift->{bad} } sub has_bad { !keys %{ shift->bad } } #=================================== 1