build_docs.pl (920 lines of code) (raw):
#!/usr/bin/env perl
# Flush on every print even if we're writing to a pipe (like docker).
$| = 1;
use strict;
use warnings;
use v5.10;
binmode( STDIN, ":utf8" );
binmode( STDOUT, ":utf8" );
binmode( STDERR, ":utf8" );
our ($Old_Pwd);
our @Old_ARGV = @ARGV;
use Cwd;
use FindBin;
use Data::Dumper;
use XML::LibXML;
BEGIN {
$Old_Pwd = Cwd::cwd();
chdir "$FindBin::RealBin/";
}
use lib 'lib';
use ES::Util qw(
run $Opts
build_chunked build_single
proc_man
timestamp
write_html_redirect
write_nginx_redirects
write_nginx_test_config
write_nginx_preview_config
start_web_resources_watcher
start_preview
build_web_resources
);
use Getopt::Long qw(:config no_auto_abbrev no_ignore_case no_getopt_compat);
use YAML qw(LoadFile);
use Path::Class qw(dir file);
use Sys::Hostname;
use ES::BranchTracker();
use ES::DocsRepo();
use ES::Repo();
use ES::Book();
use ES::TargetRepo();
use ES::Toc();
use ES::LinkCheck();
GetOptions($Opts, @{ command_line_opts() }) || exit usage();
check_opts();
our $ConfPath = pick_conf();
our $Conf = LoadFile($ConfPath);
# We no longer support running outside of our "standard" docker container.
# `build_docs` signals to us that it is in the standard docker container by
# passing this argument.
die 'build_docs.pl is unsupported. Use build_docs instead' unless $Opts->{in_standard_docker};
if ( $Opts->{asciidoctor} ) {
say <<MSG
The Asciidoctor migration is complete! --asciidoctor will emit this message
forever in honor of our success but otherwise doesn't do anything.
MSG
}
if ( $Opts->{direct_html} ) {
say <<MSG
The direct_html migration is complete! --direct_html will emit this message
forever in honor of our success but otherwise doesn't do anything.
MSG
}
init_env();
$Opts->{doc} ? build_local()
: $Opts->{all} ? build_all()
: $Opts->{preview} ? preview()
: usage();
#===================================
sub build_local {
#===================================
my $doc = $Opts->{doc};
my $index = file($doc)->absolute($Old_Pwd);
die "File <$doc> doesn't exist" unless -f $index;
say "Building HTML from $doc";
my $dir = dir( $Opts->{out} || 'html_docs' )->absolute($Old_Pwd);
my $raw_dir = $dir->subdir( 'raw' );
$Opts->{resource}
= [ map { dir($_)->absolute($Old_Pwd) } @{ $Opts->{resource} || [] } ];
_guess_opts( $index );
$Opts->{roots}{docs} = '/docs_build' unless $Opts->{roots}{docs};
my @alternatives;
if ( $Opts->{alternatives} ) {
for ( @{ $Opts->{alternatives} } ) {
my @parts = split /:/;
unless (scalar @parts == 3) {
die "alternatives must contain exactly two :s but was [$_]";
}
push @alternatives, {
source_lang => $parts[0],
alternative_lang => $parts[1],
dir => $parts[2],
};
}
}
# Get a head start on web resources if we're going to need them.
my $web_resources_pid = start_web_resources_watcher if $Opts->{open};
my $latest = !$Opts->{suppress_migration_warnings};
if ( $Opts->{single} ) {
build_single( $index, $raw_dir, $dir, %$Opts,
latest => $latest,
alternatives => \@alternatives,
relativize => 0,
);
}
else {
build_chunked( $index, $raw_dir, $dir, %$Opts,
latest => $latest,
alternatives => \@alternatives,
relativize => 0,
);
}
say "Done";
if ( $Opts->{open} ) {
my $preview_pid = start_preview( 'fs', $raw_dir, 'template.html', 0 );
serve_local_preview( $dir, 0, $web_resources_pid, $preview_pid );
}
}
#===================================
sub _guess_opts {
#===================================
my $index = shift;
$Opts->{edit_urls} = {};
$Opts->{roots} = {};
my $toplevel = _find_toplevel( $index->parent );
my $remote = _pick_best_remote( $toplevel );
my $branch = _guess_branch( $toplevel );
my $repo_name = _guess_repo_name( $remote );
# We couldn't find the top level so lets make a wild guess.
$toplevel = $index->parent unless $toplevel;
printf "Guessed toplevel=[%s] remote=[%s] branch=[%s] repo=[%s]\n", $toplevel, $remote, $branch, $repo_name;
$Opts->{branch} = $branch;
$Opts->{roots}{ $repo_name } = $toplevel;
$Opts->{edit_urls}{ $toplevel } = ES::Repo::edit_url_for_url_and_branch(
$remote || 'unknown', $branch
);
for my $resource ( @{ $Opts->{resource} } ) {
$toplevel = _find_toplevel( $resource );
$remote = _pick_best_remote( $toplevel );
$branch = _guess_branch( $toplevel );
$repo_name = _guess_repo_name( $remote );
# We couldn't find the top level so lets make a wild guess.
$toplevel = $resource unless $toplevel;
$Opts->{roots}{ $repo_name } = $toplevel;
$Opts->{edit_urls}{ $toplevel } = ES::Repo::edit_url_for_url_and_branch(
$remote || 'unknown', $branch
);
}
}
#===================================
sub _find_toplevel {
#===================================
my $docpath = shift;
my $original_pwd = Cwd::cwd();
chdir $docpath;
my $toplevel = eval { run qw(git rev-parse --show-toplevel) };
chdir $original_pwd;
say "Couldn't find repo toplevel for $docpath" unless $toplevel;
return $toplevel || 0;
}
#===================================
sub _pick_best_remote {
#===================================
my $toplevel = shift;
return 0 unless $toplevel;
local $ENV{GIT_DIR} = dir($toplevel)->subdir('.git');
my $projectName = dir( $toplevel )->basename;
my $remotes = eval { run qw(git remote -v) } || '';
# We prefer either an elastic or elasticsearch-cn organization. All
# but two books are in elastic but elasticsearch-cn is special.
if ($remotes =~ m|\s+(\S+[/:]elastic(?:search-cn)?/$projectName)\.git|) {
# Prefer a remote with the same name as the working directory, if it exists
return $1;
}
if ($remotes =~ m|\s+(\S+[/:]elastic(?:search-cn)?/\S+)|) {
# Otherwise, take any remote from one of the preferred organizations
return $1;
}
say "Couldn't find an Elastic remote for $toplevel. Generating edit links targeting the first remote instead.";
if ($remotes =~ m|\s+(\S+[/:]\S+/\S+)|) {
return $1;
}
return 0;
}
#===================================
sub _guess_branch {
#===================================
my $toplevel = shift;
return 'master' unless $toplevel;
local $ENV{GIT_DIR} = dir($toplevel)->subdir('.git');
my $real_branch = eval { run qw(git rev-parse --abbrev-ref HEAD) } || 'master';
# Detects common branch patterns like:
# 7.x
# 7.1
# 18.5
# Also normalizes brackport style patters like:
# blah_blah_7.x
# bort_foo_7_x
# zip_zop_12.8
# qux_12_8
return $1 if $real_branch =~ /(\d+[\._][\dx]+)$/;
# Otherwise we just assume we're trageting master. This'll be right when
# the branch is actually 'master' and when this is a feature branch. It
# obviously won't always be right, but for the most part that *should* be
# ok because we have pull request builds which will double check the links.
return 'master';
}
#===================================
sub _guess_repo_name {
#===================================
my ( $remote ) = @_;
return 'repo' unless $remote;
$remote = dir( $remote )->basename;
$remote =~ s/\.git$//;
return $remote;
}
#===================================
sub build_all {
#===================================
$Opts->{target_repo} = 'git@github.com:elastic/built-docs.git' unless ( $Opts->{target_repo} );
my ( $repos_dir, $temp_dir, $reference_dir ) = init_dirs();
say "Updating repositories";
my $target_repo = init_target_repo( $repos_dir, $temp_dir, $reference_dir );
my $tracker = init_repos(
$repos_dir, $temp_dir, $reference_dir, $target_repo );
my $build_dir = $target_repo->destination->subdir( 'html' );
$build_dir->mkpath;
my $raw_build_dir = $target_repo->destination->subdir( 'raw' );
my $contents = $Conf->{contents}
or die "Missing <contents> configuration section";
my $toc_extra = $Conf->{toc_extra} ? $ConfPath->parent->file( $Conf->{toc_extra} ) : 0;
my $toc = ES::Toc->new( $Conf->{contents_title} || 'Guide', $toc_extra );
my $redirects = $target_repo->destination->file( 'redirects.conf' );
if ( $Opts->{linkcheckonly} ){
say "Skipping documentation builds."
}
else {
say "Building docs";
build_entries(
$raw_build_dir, $build_dir, $temp_dir, $toc, $tracker, @$contents
);
say "Writing main TOC";
$toc->write( $raw_build_dir, $build_dir, $temp_dir, 0 );
build_web_resources( $target_repo->destination );
say "Writing extra HTML redirects";
for ( @{ $Conf->{redirects} } ) {
write_html_redirect( $build_dir->subdir( $_->{prefix} ),
$_->{redirect} );
}
say "Writing nginx redirects";
write_nginx_redirects( $redirects, $build_dir, $temp_dir );
}
if ( $Opts->{skiplinkcheck} ) {
say "Skipped Checking links";
}
else {
say "Checking links";
check_links($build_dir);
}
$tracker->prune_out_of_date;
push_changes( $build_dir, $target_repo, $tracker ) if $Opts->{push};
serve_local_preview( $build_dir, $redirects, 0, 0 ) if $Opts->{open};
$temp_dir->rmtree;
}
#===================================
sub check_links {
#===================================
my $build_dir = shift;
my $link_checker = ES::LinkCheck->new($build_dir);
$link_checker->check;
check_kibana_links( $build_dir, $link_checker ) if exists $Conf->{repos}{kibana};
# Comment out due to build errors
# check_elasticsearch_links( $build_dir, $link_checker ) if exists $Conf->{repos}{elasticsearch};
if ( $link_checker->has_bad || $Opts->{warnlinkcheck}) {
say $link_checker->report;
}
else {
die $link_checker->report;
}
}
#===================================
sub check_kibana_links {
#===================================
my $build_dir = shift;
my $link_checker = shift;
my $branch;
my $version;
say "Checking Kibana links";
my $extractor = sub {
my $contents = shift;
return sub {
while ( $contents =~ m!`(\$\{(?:baseUrl|ELASTIC.+|KIBANA_DOCS|PLUGIN_DOCS|FLEET_DOCS|APM_DOCS|STACK_DOCS|SECURITY_SOLUTION_DOCS|STACK_GETTING_STARTED|APP_SEARCH_DOCS|ENTERPRISE_SEARCH_DOCS|INTEGRATIONS_DEV_DOCS|WORKPLACE_SEARCH_DOCS|SERVERLESS_DOCS)\}[^`]+)`!g ) {
my $path = $1;
$path =~ s/\$\{(?:DOC_LINK_VERSION|urlVersion)\}/$version/;
$path =~ s/\$\{(?:ECS_VERSION)\}/current/;
# In older versions, the variable `${ELASTIC_DOCS}` referred to
# the Elasticsearch Guide. In newer branches, the
# variable is called `${ELASTICSEARCH_DOCS}`
$path =~ s!\$\{ELASTIC_DOCS\}!en/elasticsearch/reference/$version/!;
$path =~ s!\$\{ELASTICSEARCH_DOCS\}!en/elasticsearch/reference/$version/!;
$path =~ s!\$\{KIBANA_DOCS\}!en/kibana/$version/!;
$path =~ s!\$\{PLUGIN_DOCS\}!en/elasticsearch/plugins/$version/!;
$path =~ s!\$\{OBSERVABILITY_DOCS\}!en/observability/$version/!;
$path =~ s!\$\{FLEET_DOCS\}!en/fleet/$version/!;
$path =~ s!\$\{APM_DOCS\}!en/apm/!;
$path =~ s!\$\{STACK_DOCS\}!en/elastic-stack/$version/!;
$path =~ s!\$\{SECURITY_SOLUTION_DOCS\}!en/security/$version/!;
$path =~ s!\$\{STACK_GETTING_STARTED\}!en/elastic-stack-get-started/$version/!;
$path =~ s!\$\{APP_SEARCH_DOCS\}!en/app-search/$version/!;
$path =~ s!\$\{ENTERPRISE_SEARCH_DOCS\}!en/enterprise-search/$version/!;
$path =~ s!\$\{WORKPLACE_SEARCH_DOCS\}!en/workplace-search/$version/!;
$path =~ s!\$\{MACHINE_LEARNING_DOCS\}!en/machine-learning/$version/!;
$path =~ s!\$\{INTEGRATIONS_DEV_DOCS}!en/integrations-developer/current/!;
$path =~ s!\$\{SERVERLESS_DOCS}!/en/serverless/current/!;
# Replace the "https://www.elastic.co/guide/" URL prefix so that
# it becomes a file path in the built docs.
$path =~ s!\$\{(?:baseUrl|ELASTIC_WEBSITE_URL)\}guide/!!;
# We don't want to check any links to www.elastic.co that aren't
# part of the docs.
return "" if $path =~ m/\$\{(?:baseUrl|ELASTIC_WEBSITE_URL|ELASTIC_GITHUB|API_DOCS|ELASTICSEARCH_APIS|ELASTICSEARCH_SERVERLESS_APIS|KIBANA_APIS|KIBANA_SERVERLESS_APIS)\}.*/;
# Otherwise, return the link to check
return ( split /#/, $path );
}
return;
};
};
my $src_path = 'src/ui/public/documentation_links/documentation_links';
my $legacy_path = 'src/legacy/ui/public/documentation_links/documentation_links';
my $repo = ES::Repo->get_repo('kibana');
my @versions = sort map { $_->basename }
grep { $_->is_dir } $build_dir->subdir('en/kibana')->children;
my $link_check_name = 'link-check-kibana';
for (@versions) {
$version = $_;
next if $version eq 'current' || $version =~ /^\d/ && $version lt 5;
# @versions is looping through the directories in the output (which
# still contains `master`), but we need to look in the `main` branch of
# the Kibana repo for this file.
#
# TODO: remove as part of
# https://github.com/elastic/docs/issues/2264
if ($version eq "master") {
$branch = "main"
}
else {
if ($version eq "8.x") {
$branch = "8.19"
}
else {
$branch = $version
}
}
# $branch = $version eq "master" ? "main" : $version;
say " Branch: $branch, Version: $version";
my $links_file;
my $source = eval {
$links_file = "src/platform/packages/shared/kbn-doc-links/src/get_doc_links.ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = "packages/kbn-doc-links/src/get_doc_links.ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = $src_path . ".js";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = $src_path . ".ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = $legacy_path . ".js";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = $legacy_path . ".ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = "src/core/packages/doc-links/core-doc-links-browser-internal/src/doc_links_service.ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = "packages/core/doc-links/core-doc-links-browser-internal/src/doc_links_service.ts";
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = "src/core/public/doc_links/doc_links_service.ts";
$repo->show_file( $link_check_name, $branch, $links_file );
};
die "failed to find kibana links file;\n$@" unless $source;
$link_checker->check_source( $source, $extractor,
"Kibana [$version]: $links_file" );
# Mark the file that we need for the link check done so we can use
# --keep_hash with it during some other build.
$repo->mark_done( $link_check_name, $branch, $links_file, 0 );
}
}
#===================================
sub check_elasticsearch_links {
#===================================
my $build_dir = shift;
my $link_checker = shift;
my $branch;
my $version;
say "Checking Elasticsearch links";
# Grab URLs from the JSON file. This is lame, but we sort of need to parse
# using regexes because that's what the rest of the infrastructure expects.
# So we grab all quoted strings that contain `html`. This *should* be fine
# for a while because the keys in the file are all in SHOUTING_SNAKE_CASE
# so even if one contains "html" it'll contain "HTML" which doesn't match.
my $json_extractor = sub {
my $contents = shift;
return sub {
while ( $contents =~ m!"([^"\#]+)(?:\#([^"]+))?"!g ) {
my $path = $1;
next unless $path =~ m!html!;
return "en/elasticsearch/reference/$version/$path";
}
return;
};
};
my $tabdelim_extractor = sub {
my $contents = shift;
return sub {
while ( $contents =~ m!"[^\t]+\t(.*)"!g ) {
return "en/elasticsearch/reference/$version/$1";
}
return;
};
};
my $src_path = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json';
my $repo = ES::Repo->get_repo('elasticsearch');
my @versions = sort map { $_->basename }
grep { $_->is_dir } $build_dir->subdir('en/elasticsearch/reference')->children;
my $link_check_name = 'link-check-elasticsearch';
for (@versions) {
$version = $_;
# check versions after 8.6
next if $version eq 'current' || $version =~ /^(\d+)\.(\d+)/ && ($1 lt 8 || ($1 eq 8 && $2 lt 7));
# @versions is looping through the directories in the output (which
# still contains `master`), but we need to look in the `main` branch of
# the ES repo for this file.
#
# TODO: remove as part of
# https://github.com/elastic/docs/issues/2264
$branch = $version eq "master" ? "main" : $version;
say " Branch: $branch, Version: $version";
my $links_file;
my $extractor;
my $source = eval {
$links_file = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json';
$extractor = $json_extractor;
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = 'libs/core/src/main/resources/org/elasticsearch/core/reference-docs-links.txt';
$extractor = $tabdelim_extractor;
$repo->show_file( $link_check_name, $branch, $links_file );
};
die "failed to find elasticsearch links file;\n$@" unless $source;
$link_checker->check_source( $source, $extractor,
"Elasticsearch [$version]: $src_path" );
# Mark the file that we need for the link check done so we can use
# --keep_hash with it during some other build.
$repo->mark_done( $link_check_name, $branch, $src_path, 0 );
}
}
#===================================
sub build_entries {
#===================================
my ( $raw_build, $build, $temp_dir, $toc, $tracker, @entries ) = @_;
while ( my $entry = shift @entries ) {
my $title = $entry->{title}
or die "Missing title for entry: " . Dumper($entry);
if ( my $sections = $entry->{sections} ) {
my $base_dir = $entry->{base_dir} || '';
my $raw_sub_build = $raw_build->subdir($base_dir);
my $sub_build = $build->subdir($base_dir);
my $toc_extra = $entry->{toc_extra} ? $ConfPath->parent->file( $entry->{toc_extra} ) : 0;
my $section_toc = build_entries(
$raw_sub_build, $sub_build, $temp_dir,
ES::Toc->new( $title, $toc_extra, $entry->{lang} ),
$tracker, @$sections
);
if ($base_dir) {
$section_toc->write( $raw_sub_build, $sub_build, $temp_dir );
$toc->add_entry(
{ title => $title,
url => $base_dir . '/index.html'
}
);
}
else {
$toc->add_entry($section_toc);
}
next;
}
my $book = ES::Book->new(
dir => $build,
raw_dir => $raw_build,
temp_dir => $temp_dir,
%$entry
);
$toc->add_entry( $book->build( $Opts->{rebuild}, $ConfPath ) );
$tracker->allowed_book( $book );
}
return $toc;
}
#===================================
sub build_sitemap {
#===================================
my ( $dir, $changed ) = @_;
# Build the sitemap by iterating over all of the toc and index files. Uses
# the old sitemap to populate the dates for files that haven't changed.
# Use "now" for files that have.
my $sitemap = $dir->file('sitemap.xml');
my $now = timestamp();
my %dates;
if ( -e $sitemap ) {
my $doc = XML::LibXML->load_xml( location => $sitemap );
for ($doc->firstChild->childNodes) {
next unless $_->nodeName eq 'url';
my $loc;
my $lastmod;
for ($_->childNodes) {
$loc = $_->to_literal if $_->nodeName eq 'loc';
$lastmod = $_->to_literal if $_->nodeName eq 'lastmod';
}
die "Dind't find <loc> in $_" unless $loc;
die "Dind't find <lastmod> in $_" unless $lastmod;
$loc =~ s|https://www.elastic.co/guide/||;
$dates{$loc} = $lastmod;
}
}
for ( split /\0/, $changed ) {
next unless s|^html/||;
$dates{$_} = $now;
}
# Build a list of the files we're going to index and sort it so entries in
# the sitemap don't "jump around".
my @files;
$dir->recurse(
callback => sub {
my $item = shift;
return unless $item->is_dir && $item->basename eq 'current';
if ( -e $item->file('toc.html') ) {
my $content = $item->file('toc.html')
->slurp( iomode => '<:encoding(UTF-8)' );
push @files, $item->file($_)
for ( $content =~ /href="([^"]+)"/g );
}
elsif ( -e $item->file('index.html') ) {
push @files, $item->file('index.html');
}
return $item->PRUNE;
}
);
@files = sort @files;
open my $fh, '>', $sitemap or die "Couldn't create $sitemap: $!";
say $fh <<SITEMAP_START;
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
SITEMAP_START
for ( @files ) {
my $loc = $_->relative($dir);
my $url = "https://www.elastic.co/guide/$loc";
my $date = $dates{$loc};
die "Couldn't find a modified time for $loc" unless $date;
say $fh <<ENTRY;
<url>
<loc>$url</loc>
<lastmod>$date</lastmod>
<changefreq>weekly</changefreq>
<priority>0.5</priority>
</url>
ENTRY
}
say $fh "</urlset>";
close $fh or die "Couldn't close $sitemap: $!"
}
#===================================
sub init_dirs {
#===================================
my $repos_dir = $Opts->{reposcache} || '.repos';
$repos_dir = dir($repos_dir)->absolute;
$repos_dir->mkpath;
my $temp_dir = dir('/tmp/docsbuild');
$temp_dir = $temp_dir->absolute;
$temp_dir->rmtree;
$temp_dir->mkpath;
my $reference_dir = dir($Opts->{reference});
if ( $reference_dir ) {
$reference_dir = $reference_dir->absolute;
die "Missing reference directory $reference_dir" unless -e $reference_dir;
}
return ( $repos_dir, $temp_dir, $reference_dir );
}
#===================================
sub init_target_repo {
#===================================
my ( $repos_dir, $temp_dir, $reference_dir ) = @_;
my $target_repo = ES::TargetRepo->new(
git_dir => $repos_dir->subdir('target_repo.git'),
url => $Opts->{target_repo},
reference => $reference_dir,
destination => dir( "$temp_dir/target_repo" ),
branch => $Opts->{target_branch} || 'master',
);
$target_repo->update_from_remote;
return $target_repo;
}
#===================================
sub init_repos {
#===================================
my ( $repos_dir, $temp_dir, $reference_dir, $target_repo ) = @_;
printf(" - %20s: Checking out minimal\n", 'target_repo');
$target_repo->checkout_minimal();
my %child_dirs = map { $_ => 1 } $repos_dir->children;
delete $child_dirs{ $temp_dir->absolute };
my $conf = $Conf->{repos}
or die "Missing <repos> in config";
my @repo_names = sort keys %$conf;
delete $child_dirs{ $target_repo->git_dir->absolute };
my $tracker_path = $target_repo->destination . '/html/branches.yaml';
# check out all remaining repos in parallel
my $tracker = ES::BranchTracker->new( file($tracker_path), @repo_names );
my $pm = proc_man( $Opts->{procs} * 3 );
unless ( $pm->start('target_repo') ) {
printf(" - %20s: Checking out remaining\n", 'target_repo');
$target_repo->checkout_all();
$pm->finish;
}
for my $name (@repo_names) {
next if $name eq 'docs';
my $url = $conf->{$name};
# We always use ssh-style urls regardless of conf.yaml so we can use
# our ssh key for the cloning.
$url =~ s|https://([^/]+)/|git\@$1:|;
my $repo = ES::Repo->new(
name => $name,
git_dir => $repos_dir->subdir("$name.git"),
tracker => $tracker,
url => $url,
reference => $reference_dir,
keep_hash => $Opts->{keep_hash} || 0,
);
delete $child_dirs{ $repo->git_dir->absolute };
if ( $Opts->{linkcheckonly} ){
say "Skipping fetching repo $name."
}
else {
$pm->start($name) and next;
$repo->update_from_remote();
$pm->finish;
}
}
$pm->wait_all_children;
# Parse the --sub_dir options and attach the to the repo
my %sub_dirs = ();
foreach (@{ $Opts->{sub_dir} }) {
die "invalid --sub_dir $_"
unless /(?<repo>[^:]+):(?<branch>[^:]+):(?<dir>.+)/;
my $dir = dir($+{dir})->absolute;
die "--sub_dir $dir doesn't exist" unless -e $dir;
ES::Repo->get_repo($+{repo})->add_sub_dir($+{branch}, $dir);
}
for ( keys %child_dirs ) {
my $dir = dir($_);
next unless -d $dir;
say "Removing old repo <" . $dir->basename . ">";
$dir->rmtree;
}
# Setup the docs repo
# We support configuring the remote for the docs repo for testing
ES::DocsRepo->new(
tracker => $tracker,
dir => $conf->{docs} || '/docs_build',
keep_hash => $Opts->{keep_hash} || 0
);
return $tracker;
}
#===================================
sub preview {
#===================================
$Opts->{target_repo} = 'git@github.com:elastic/built-docs.git' unless ( $Opts->{target_repo} );
my $nginx_config = file('/tmp/nginx.conf');
write_nginx_preview_config( $nginx_config );
if ( my $nginx_pid = fork ) {
my ( $repos_dir, $temp_dir, $reference_dir ) = init_dirs();
my $target_repo;
unless ( $Opts->{gapped} ) {
say "Cloning built docs";
$target_repo = init_target_repo( $repos_dir, $temp_dir, $reference_dir );
}
say "Built docs are ready";
my $default_template = $Opts->{gapped} ? "air_gapped_template.html" : "template.html";
my $preview_pid = start_preview(
'git', '/docs_build/.repos/target_repo.git', $default_template, $Opts->{gapped}
);
$SIG{TERM} = sub {
# We should be a good citizen and shut down the subprocesses.
# This isn't so important in k8s or docker because we shoot
# the entire container when we're done, but it is nice when
# testing.
say 'Terminating preview services...nginx';
kill 'TERM', $nginx_pid;
wait;
say 'Terminating preview services...preview';
kill 'TERM', $preview_pid;
wait;
say 'Terminated preview services';
exit 0;
};
if ( $Opts->{gapped} ) {
wait;
} else {
while (1) {
sleep 1;
my $fetch_result = eval { $target_repo->fetch };
say $fetch_result if $fetch_result;
say $@ if $@;
}
}
exit;
} else {
close STDIN;
open( STDIN, "</dev/null" );
exec( qw(nginx -c), $nginx_config );
}
}
#===================================
sub push_changes {
#===================================
my ($build_dir, $target_repo, $tracker ) = @_;
my $outstanding = $target_repo->outstanding_changes;
if ( $tracker->has_non_local_changes || $outstanding ) {
say "Saving branch tracker";
$tracker->write;
say "Building sitemap";
build_sitemap( $build_dir, $outstanding );
say "Commiting changes";
$target_repo->commit;
say "Pushing changes";
$target_repo->push_changes;
if ( $Opts->{announce_preview} ) {
say "A preview will soon be available at " .
$Opts->{announce_preview};
}
} else {
say "No changes to push";
}
}
#===================================
sub init_env {
#===================================
if (exists $ENV{SSH_AUTH_SOCK}
&& $ENV{SSH_AUTH_SOCK} eq '/tmp/forwarded_ssh_auth') {
print "Waiting for ssh auth to be forwarded to " . hostname . "\n";
while (<>) {
# Read from stdin waiting for the signal that we're ready. We
# use stdin here because it prevents us from leaving the docker
# container running if something goes wrong with the forwarding
# process. The mechanism of action is that when something goes
# wrong build_docs will die, closing stdin. That will cause us
# to drop out of this loop and cause the process to terminate.
last if ($_ eq "ready\n");
}
die '/tmp/forwarded_ssh_auth is missing' unless (-e '/tmp/forwarded_ssh_auth');
print "Found ssh auth\n";
}
if ( $Opts->{preview} ) {
# `--preview` is run in k8s it doesn't *want* a tty
# so it should avoid doing housekeeping below.
return;
}
# If we're in docker we're relying on closing stdin to cause an orderly
# shutdown because it is really the only way for us to know for sure
# that the python build_docs process thats on the host is dead.
# Since perl's threads are "not recommended" we fork early in the run
# process and have the parent synchronously wait read from stdin. A few
# things can happen here and each has a comment below:
if ( my $child_pid = fork ) {
$SIG{CHLD} = sub {
# The child process exits so we should exit with whatever
# exit code it gave us. This can also come about because the
# child process is killed.
use POSIX ":sys_wait_h";
my $child_status = 'missing';
while ((my $child = waitpid(-1, WNOHANG)) > 0) {
my $status = $? >> 8;
if ( $child == $child_pid ) {
$child_status = $status;
} else {
# Some other subprocess died on us. The calling code
# will handle it.
}
}
exit $child_status unless ( $child_status eq 'missing');
};
$SIG{INT} = sub {
# We're interrupted. This'll happen if we somehow end up in
# the foreground. It isn't likely, but if it does happen we
# should interrupt the child just in case it wasn't already
# interrupted and then exit with whatever code the child exits
# with.
kill 'INT', $child_pid;
wait;
exit $? >> 8;
};
$SIG{TERM} = sub {
# We're terminated. We should pass on the love to the
# child process and return its exit code.
kill 'TERM', $child_pid;
wait;
exit $? >> 8;
};
while (<>) {}
# STDIN is closed. This'll happen if the python build_docs process
# on the host dies for some reason. When the host process dies we
# should do our best to die too so the docker container exits and
# is removed. We do that by interrupting the child and exiting with
# whatever exit code it exits with.
kill 'TERM', $child_pid;
wait;
exit $? >> 8;
}
# If we're running in docker then we won't have a useful username
# so we hack one into place with nss wrapper.
open(my $override, '>', '/tmp/passwd')
or dir("Couldn't write override user file");
# We use the `id` command here because it fetches the id. The native
# perl way to do this (getpwuid($<)) doesn't work because it needs a
# complete user. And we *aren't* one.
my $uid = `id -u`;
my $gid = `id -g`;
chomp($uid);
chomp($gid);
print $override "docker:x:$uid:$gid:docker:/tmp:/bin/bash\n";
close $override;
$ENV{LD_PRELOAD} = '/usr/lib/libnss_wrapper.so';
$ENV{NSS_WRAPPER_PASSWD} = '/tmp/passwd';
$ENV{NSS_WRAPPER_GROUP} = '/etc/group';
}
#===================================
sub pick_conf {
#===================================
return file( 'conf.yaml' ) unless $Opts->{conf};
my $conf = file($Opts->{conf});
$conf = dir($Old_Pwd)->file($Opts->{conf}) if $conf->is_relative;
return $conf if -e $conf;
die "$conf doesn't exist";
}
#===================================
# Serve the documentation that we just built.
#
# docs_dir - directory containing generated docs : Path::Class::dir
# redirects_file - file containing redirects or 0 if there aren't
# - any redirects : Path::Class::file||0
# web_resources_pid - pid of a subprocess that rebuilds the web resources on
# the fly if we're running one or 0
# preview_pid - pid of the preview application or 0 if we're not running it
#===================================
sub serve_local_preview {
#===================================
my ( $docs_dir, $redirects_file, $web_resources_pid, $preview_pid ) = @_;
if ( my $nginx_pid = fork ) {
# parent
$SIG{INT} = sub {
say 'Terminating preview services...nginx';
kill 'TERM', $nginx_pid;
wait;
if ( $preview_pid ) {
say 'Terminating preview services...preview';
kill 'TERM', $preview_pid;
wait;
}
if ( $web_resources_pid ) {
say 'Terminating preview services...parcel';
kill 'TERM', $web_resources_pid;
wait;
}
};
$SIG{TERM} = $SIG{INT};
wait;
say 'Terminated preview services';
exit;
} else {
my $nginx_config = file('/tmp/nginx.conf');
write_nginx_test_config(
$nginx_config, $docs_dir, $redirects_file,
$web_resources_pid, $preview_pid
);
close STDIN;
open( STDIN, "</dev/null" );
exec( qw(nginx -c), $nginx_config );
}
}
#===================================
sub command_line_opts {
#===================================
return [
# Options only compatible with --doc
'doc=s',
'alternatives=s@',
'chunk=i',
'lang=s',
'lenient',
'out=s',
'resource=s@',
'respect_edit_url_overrides',
'single',
'suppress_migration_warnings',
'toc',
'private',
# Options only compatible with --all
'all',
'announce_preview=s',
'target_branch=s',
'target_repo=s',
'keep_hash',
'linkcheckonly',
'push',
'rebuild',
'reference=s',
'reposcache=s',
'skiplinkcheck',
'warnlinkcheck',
'sub_dir=s@',
'user=s',
# Options only compatible with --preview
'preview',
'gapped',
# Options that do *something* for either --doc or --all or --preview
'asciidoctor',
'conf=s',
'direct_html',
'in_standard_docker',
'open',
'procs=i',
'verbose',
];
}
#===================================
sub usage {
#===================================
say <<USAGE;
Build local docs:
build_docs --doc path/to/index.asciidoc [opts]
Opts:
--chunk 1 Also chunk sections into separate files
--alternatives <source_lang>:<alternative_lang>:<dir>
Examples in alternative languages.
--lang Defaults to 'en'
--lenient Ignore linking errors
--out dest/dir/ Defaults to ./html_docs.
--resource Path to image dir - may be repeated
--respect_edit_url_overrides
Respects `:edit_url:` overrides in the book.
--single Generate a single HTML page, instead of
a chunking into a file per chapter
--suppress_migration_warnings
Suppress warnings about Asciidoctor migration
issues. Use this when building "old" branches.
--toc Include a TOC at the beginning of the page.
--private Indicate that the github repo is private.
WARNING: Anything in the `out` dir will be deleted!
Build docs from all repos in conf.yaml:
build_docs --all [opts]
Opts:
--keep_hash Build docs from the same commit hash as last time
--linkcheckonly Skips the documentation builds. Checks links only.
--push Commit the updated docs and push to origin
--announce_preview <host>
Causes the build to log a line about where to find
a preview of the build if anything is pushed.
--rebuild Rebuild all branches of every book regardless of
what has changed
--reference Directory of `--mirror` clones to use as a
local cache
--repos_cache Directory to which working repositories are cloned.
Defaults to `<script_dir>/.repos`.
--skiplinkcheck Omit the step that checks for broken links
--warnlinkcheck Checks for broken links but does not fail if they exist
--sub_dir Use a directory as a branch of some repo
(eg --sub_dir elasticsearch:master:~/Code/elasticsearch)
--target_repo Repository to which to commit docs
--target_branch Branch to which to commit docs
--user Specify which GitHub user to use, if not your own
General Opts:
--asciidoctor Emit a happy message.
--conf <ymlfile> Use your own configuration file, defaults to the
bundled conf.yaml
--direct_html Emit a happy message.
--in_standard_docker
Specified by build_docs when running in
its container
--open Open the docs in a browser once built.
--procs Number of processes to run in parallel, defaults
to 3
--verbose Output more logs
USAGE
}
#===================================
sub check_opts {
#===================================
if ( !$Opts->{doc} ) {
die('--alternatives only compatible with --doc') if $Opts->{alternatives};
die('--chunk only compatible with --doc') if $Opts->{chunk};
# Lang will be 'en' even if it isn't specified so we don't check it.
die('--lenient only compatible with --doc') if $Opts->{lenient};
die('--out only compatible with --doc') if $Opts->{out};
die('--resource only compatible with --doc') if $Opts->{resource};
die('--respect_edit_url_overrides only compatible with --doc') if $Opts->{respect_edit_url_overrides};
die('--single only compatible with --doc') if $Opts->{single};
die('--toc only compatible with --doc') if $Opts->{toc};
die('--private only compatible with --doc') if $Opts->{private};
}
if ( !$Opts->{all} ) {
die('--keep_hash only compatible with --all') if $Opts->{keep_hash};
die('--linkcheckonly only compatible with --all') if $Opts->{linkcheckonly};
die('--push only compatible with --all') if $Opts->{push};
die('--announce_preview only compatible with --all') if $Opts->{announce_preview};
die('--rebuild only compatible with --all') if $Opts->{rebuild};
die('--reposcache only compatible with --all') if $Opts->{reposcache};
die('--skiplinkcheck only compatible with --all') if $Opts->{skiplinkcheck};
die('--warnlinkcheck only compatible with --all') if $Opts->{warnlinkcheck};
die('--sub_dir only compatible with --all') if $Opts->{sub_dir};
}
if ( !$Opts->{preview} ) {
die('--gapped only compatible with --preview') if $Opts->{gapped};
}
if ( !$Opts->{all} && !$Opts->{preview} ) {
die('--reference only compatible with --all or --preview') if $Opts->{reference};
die('--target_repo only compatible with --all or --preview') if $Opts->{target_repo};
}
if ($Opts->{skiplinkcheck} && $Opts->{warnlinkcheck} ) {
die('--warnlinkcheck is incompatible with --skiplinkcheck');
}
}