versioned_plugins.rb (363 lines of code) (raw):

require "clamp" require "json" require "fileutils" require "time" require "stud/try" require "octokit" require "erb" require "pmap" require_relative 'lib/core_ext/erb_result_with_hash' require_relative 'lib/logstash-docket' require_relative 'git_helper' class VersionedPluginDocs < Clamp::Command option "--output-path", "OUTPUT", "Path to a directory where logstash-docs repository will be cloned and written to", required: true option "--skip-existing", :flag, "Don't generate documentation if asciidoc file exists" option "--latest-only", :flag, "Only generate documentation for latest version of each plugin", :default => false option "--repair", :flag, "Apply several heuristics to correct broken documentation", :default => false option "--plugin-regex", "REGEX", "Only generate if plugin matches given regex", :default => "logstash-(?:codec|filter|input|output|integration)" option "--dry-run", :flag, "Don't create a commit or pull request against logstash-docs", :default => false option("--since", "STRING", "gems newer than this date", default: nil) { |v| v && Time.parse(v) } option("--parallelism", "NUMBER", "for performance", default: 4) { |v| Integer(v) } PLUGIN_SKIP_LIST = [ "logstash-codec-example", "logstash-input-example", "logstash-filter-example", "logstash-output-example", "logstash-filter-script", "logstash-input-java_input_example", "logstash-filter-java_filter_example", "logstash-output-java_output_example", "logstash-codec-java_codec_example" ] # If plugins are under the logstash-plugins org, they will automatically explored. # However, if plugin is located in other org, we need to manually list up here in a {org}/{plugin-repo-name} format. ADDITIONAL_ORG_PLUGINS = [ "elastic/logstash-filter-elastic_integration" ] STACK_VERSIONS_BASE_URL = "https://raw.githubusercontent.com/elastic/docs/master/shared/versions/stack/" def logstash_docs_path File.join(output_path, "logstash-docs") end def docs_path File.join(output_path, "docs") end attr_reader :octo, :stack_version, :ecs_version include LogstashDocket def execute setup_github_client check_rate_limit! clone_docs_repo fetch_stack_versions generate_docs if new_versions? unless dry_run? puts "creating pull request.." submit_pr end else puts "No new versions detected. Exiting.." end end def setup_github_client Octokit.auto_paginate = true if ENV.fetch("GITHUB_TOKEN", "").size > 0 puts "using a github token" else puts "not using a github token" end @octo = Octokit::Client.new(:access_token => ENV["GITHUB_TOKEN"]) end def check_rate_limit! rate_limit = octo.rate_limit puts "Current GitHub rate limit: #{rate_limit.remaining}/#{rate_limit.limit}" if rate_limit.remaining < 100 puts "Warning! Api rate limit is close to being reached, this script may fail to execute" end end def generate_docs puts "writing to #{logstash_docs_path}" repos = octo.org_repos("logstash-plugins") repos = repos.map {|repo| repo.name }.select {|repo| repo.match(plugin_regex) } repos = (repos - PLUGIN_SKIP_LIST).sort.uniq.map {|repo| "logstash-plugins/#{repo}"} repos = repos.concat(ADDITIONAL_ORG_PLUGINS) puts "found #{repos.size} repos" # TODO: make less convoluted timestamp_reference = since || Time.strptime($TIMESTAMP_REFERENCE, "%a, %d %b %Y %H:%M:%S %Z") puts "Generating docs since #{timestamp_reference.inspect}" plugins_indexes_to_rebuild = Util::ThreadsafeWrapper.for(Set.new) package_indexes_to_rebuild = Util::ThreadsafeWrapper.for(Set.new) plugin_version_index = Util::ThreadsafeIndex.new { Util::ThreadsafeWrapper.for(Set.new) } plugin_names_by_type = Util::ThreadsafeIndex.new { Util::ThreadsafeWrapper.for(Set.new) } # We need to fetch version metadata from repositories that contain plugins repos_requiring_rebuild = Util::ThreadsafeWrapper.for(Set.new) # we work from a single set of Repository objects repositories = repos.map do |repo_name| $stderr.puts("[#{repo_name}]: loading releases...") source = Source::Github.new(repo: repo_name, octokit: @octo) Repository::from_source(source.repo, source) end # Iterate over the repos to identify which need reindexing. # This is a bit complicated because a single repo is not the complete # source-of-truth for a plugin (e.g., previously stand-alone plugins # being absorbed into an integration plugin package) repositories.peach(parallelism) do |repository| latest_release = repository.last_release if latest_release.nil? $stderr.puts("#{repository.desc}: no releases on rubygems.\n") next end # if the repository has no releases, or none since our `timestamp_reference`, # it doesn't need to be added to the reindex list here. if latest_release.release_date.nil? || latest_release.release_date < timestamp_reference $stderr.puts("#{repository.desc}: no new releases.\n") next end # the repository has one or more releases since our `timestamp_reference`, which means # it will need to be reindexed. $stderr.puts("#{repository.desc}: found new release\n") # repos_requiring_rebuild.add(repository.name) && # $stderr.puts("[repo:#{repository.name}]: marked for reindex\n") # if the latest release is an integration plugin, each of the plugins it contains # may have previously been sourced in a different repository; add the plugin name # to the list of repositories requiring reindexing. latest_release.with_embedded_plugins.each do |plugin| repos_requiring_rebuild.add(plugin.canonical_name) && $stderr.puts("#{plugin.desc}: marking for reindex\n") end end # Now that we know which repositories require reindexing, we can start the work. repositories.peach(parallelism) do |repository| unless repos_requiring_rebuild.include?(repository.name) $stderr.puts("[repo:#{repository.name}]: rebuild not required. skipping.\n") latest_release = repository.last_release latest_release && latest_release.with_embedded_plugins.each do |plugin| next unless versions_index_exists?(plugin.name, plugin.type) plugin_names_by_type.fetch(plugin.type).add(plugin.name) end next end $stderr.puts("[repo:#{repository.name}]: rebuilding versioned docs\n") repository.source_tagged_releases.each do |released_plugin| released_plugin.with_embedded_plugins.each do |plugin| if expand_plugin_doc(plugin) plugins_indexes_to_rebuild.add(plugin.canonical_name) plugin_version_index.fetch(plugin.canonical_name).add(plugin) plugin_names_by_type.fetch(plugin.type).add(plugin.name) else $stderr.puts("#{plugin.desc}: documentation not available; skipping remaining releases from repository\n") break false end end || break break if latest_only? end end $stderr.puts("REINDEXING PLUGINS, loading plugin aliases...") alias_definitions_by_type = Util::AliasDefinitionsLoader.get_alias_definitions # add aliases named to the partitioned plugin names collection alias_definitions_by_type.each do |type, alias_definitions| alias_definitions.each do |alias_definition| plugin_names_by_type.fetch(type).add(alias_definition.fetch("alias")) end end # rewrite alias indices if target plugin was changed $stderr.puts("REINDEXING PLUGINS ALIASES... #{alias_definitions_by_type.size}\n") alias_definitions_by_type.each do |type, alias_definitions| alias_definitions.each do |alias_definition| $stderr.puts("[plugin:#{alias_definition.fetch("alias")}] reindexing\n") write_alias_index(type, alias_definition.fetch("alias"), alias_definition.fetch("from")) end end # rewrite incomplete plugin indices $stderr.puts("REINDEXING PLUGINS... #{plugins_indexes_to_rebuild.size}\n") plugins_indexes_to_rebuild.each do |canonical_name| $stderr.puts("[plugin:#{canonical_name}] reindexing\n") versions = plugin_version_index.fetch(canonical_name).sort_by(&:version).reverse.map do |plugin| [plugin.tag, plugin.release_date.strftime("%Y-%m-%d")] end _, type, name = canonical_name.split('-',3) write_versions_index(name, type, versions) end # rewrite integration package indices package_indexes_to_rebuild.each do |canonical_name| # TODO: build package indices end # rewrite versions-by-type indices $stderr.puts("REINDEXING TYPES... #{}\n") plugin_names_by_type.each do |type, names| $stderr.puts("[type:#{type}] reindexing\n") write_type_index(type, names.sort) end end def clone_docs_repo `git clone git@github.com:elastic/logstash-docs.git #{logstash_docs_path}` Dir.chdir(logstash_docs_path) do |path| `git checkout versioned_plugin_docs` last_commit_date = `git log -1 --date=short --pretty=format:%cd` $TIMESTAMP_REFERENCE=(Time.parse(last_commit_date) - 24*3600).strftime("%a, %d %b %Y %H:%M:%S %Z") end end def new_versions? Dir.chdir(logstash_docs_path) do |path| `git diff --name-status` `! git diff-index --quiet HEAD` $?.success? end end def submit_pr branch_name = "versioned_docs_new_content" git_helper = GitHelper.new("elastic/logstash-docs") if git_helper.branch_exists?(branch_name) puts "WARNING: Branch \"#{branch_name}\" already exists. Not creating a new PR. Please merge the existing PR or delete the PR and the branch." return end pr_title = "auto generated update of versioned plugin documentation" git_helper.commit(logstash_docs_path, branch_name, "updated versioned plugin docs") git_helper.create_pull_request(branch_name, "versioned_plugin_docs", pr_title, "") end def branch_exists?(client, branch_name) client.branch("elastic/logstash-docs", branch_name) true rescue Octokit::NotFound false end ## # Expands and persists docs for the given `VersionedPlugin`, refusing to overwrite if `--skip-existing`. # Writes description of plugin with release date to STDOUT on success (e.g., "logstash-filter-mutate@v1.2.3 2017-02-28\n") # # @param plugin [VersionedPlugin] # @return [Boolean]: returns `true` IFF docs exist on disc. def expand_plugin_doc(plugin) release_tag = plugin.tag release_date = plugin.release_date ? plugin.release_date.strftime("%Y-%m-%d") : "unreleased" changelog_url = plugin.changelog_url output_asciidoc = "#{logstash_docs_path}/docs/versioned-plugins/#{plugin.type}s/#{plugin.name}-#{release_tag}.asciidoc" if File.exist?(output_asciidoc) && skip_existing? $stderr.puts "[#{plugin.desc}]: skipping - file already exists\n" return true end $stderr.puts "#{plugin.desc}: fetching documentation\n" content = plugin.documentation if content.nil? $stderr.puts("#{plugin.desc}: doc not found\n") return false end content = extract_doc(content, plugin.canonical_name, release_tag, release_date, changelog_url) directory = File.dirname(output_asciidoc) FileUtils.mkdir_p(directory) if !File.directory?(directory) File.write(output_asciidoc, content) puts "#{plugin.desc}: #{release_date}" true end def expand_package_doc(package) # TODO: expand package-specific doc end def extract_doc(doc, plugin_full_name, release_tag, release_date, changelog_url) _, type, name = plugin_full_name.split("-",3) # documenting what variables are used below this point # version: string, v-prefixed # date: string release date as YYYY-MM-DD # type: string e.g., from /\Alogstash-(?<type>input|output|codec|filter)-(?<name>.*)\z/ # name: string e.g., from /\Alogstash-(?<type>input|output|codec|filter)-(?<name>.*)\z/ # changelog_url: dynamically created from repository and version # Replace %VERSION%, etc content = doc \ .gsub("%VERSION%", release_tag) \ .gsub("%RELEASE_DATE%", release_date) \ .gsub("%CHANGELOG_URL%", changelog_url) \ .gsub(":include_path: ../../../../logstash/docs/include", ":include_path: ../include/6.x") \ content = content.sub(/^=== .+? [Pp]lugin$/) do |header| "#{header} {version}" end if repair? content = content.gsub(/^====== /, "===== ") .gsub(/^\[source\]$/, "[source,shell]") .gsub('[id="plugins-{type}-{plugin}', '[id="plugins-{type}s-{plugin}') .gsub(":include_path: ../../../logstash/docs/include", ":include_path: ../include/6.x") .gsub(/[\t\r ]+$/,"") content = content .gsub("<<string,string>>", "{logstash-ref}/configuration-file-structure.html#string[string]") .gsub("<<array,array>>", "{logstash-ref}/configuration-file-structure.html#array[array]") .gsub("<<number,number>>", "{logstash-ref}/configuration-file-structure.html#number[number]") .gsub("<<boolean,boolean>>", "{logstash-ref}/configuration-file-structure.html#boolean[boolean]") .gsub("<<hash,hash>>", "{logstash-ref}/configuration-file-structure.html#hash[hash]") .gsub("<<password,password>>", "{logstash-ref}/configuration-file-structure.html#password[password]") .gsub("<<path,path>>", "{logstash-ref}/configuration-file-structure.html#path[path]") .gsub("<<uri,uri>>", "{logstash-ref}/configuration-file-structure.html#uri[uri]") .gsub("<<bytes,bytes>>", "{logstash-ref}/configuration-file-structure.html#bytes[bytes]") .gsub("<<event-api,Event API>>", "{logstash-ref}/event-api.html[Event API]") .gsub("<<dead-letter-queues>>", '{logstash-ref}/dead-letter-queues.html[dead-letter-queues]') .gsub("<<logstash-config-field-references>>", "{logstash-ref}/event-dependent-configuration.html#logstash-config-field-references[Field References]") end content = content.gsub('[id="plugins-', '[id="{version}-plugins-') .gsub("<<plugins-{type}s-common-options>>", "<<{version}-plugins-{type}s-{plugin}-common-options>>") .gsub("<<plugins-{type}-{plugin}", "<<plugins-{type}s-{plugin}") .gsub("<<plugins-{type}s-{plugin}", "<<{version}-plugins-{type}s-{plugin}") .gsub("<<plugins-#{type}s-#{name}", "<<{version}-plugins-#{type}s-#{name}") .gsub("[[dlq-policy]]", '[id="{version}-dlq-policy"]') .gsub("<<dlq-policy>>", '<<{version}-dlq-policy>>') .gsub("[Kafka Input Plugin @9.1.0](https://github.com/logstash-plugins/logstash-input-rabbitmq/blob/v9.1.0/CHANGELOG.md)", "[Kafka Input Plugin @9.1.0](https://github.com/logstash-plugins/logstash-input-kafka/blob/v9.1.0/CHANGELOG.md)") .gsub("[Kafka Output Plugin @8.1.0](https://github.com/logstash-plugins/logstash-output-rabbitmq/blob/v8.1.0/CHANGELOG.md)", "[Kafka Output Plugin @8.1.0](https://github.com/logstash-plugins/logstash-output-kafka/blob/v8.1.0/CHANGELOG.md)") if repair? content.gsub!(/<<plugins-.+?>>/) do |link| match = link.match(/<<plugins-(?<link_type>\w+)-(?<link_name>\w+)(?:,(?<link_text>.+?))?>>/) if match.nil? link else if match[:link_type] == "#{type}s" && match[:link_name] == name # do nothing. it's an internal link link else # it's an external link. let's convert it if match[:link_text].nil? "{logstash-ref}/plugins-#{match[:link_type]}-#{match[:link_name]}.html[#{match[:link_name]} #{match[:link_type][0...-1]} plugin]" else "{logstash-ref}/plugins-#{match[:link_type]}-#{match[:link_name]}.html[#{match[:link_text]}]" end end end end match = content.match(/\[id="{version}-plugins-{type}s-{plugin}-common-options"\]/) if match.nil? && type != "codec" content = content.sub("\ninclude::{include_path}/{type}.asciidoc[]", "[id=\"{version}-plugins-{type}s-{plugin}-common-options\"]\ninclude::{include_path}/{type}.asciidoc[]") end if type == "codec" content = content.sub("This plugin supports the following configuration options plus the <<{version}-plugins-{type}s-{plugin}-common-options>> described later.\n", "") content = content.sub("Also see <<{version}-plugins-{type}s-{plugin}-common-options>> for a list of options supported by all\ncodec plugins.\n", "") content = content.sub("\n[id=\"{version}-plugins-{type}s-{plugin}-common-options\"]\ninclude::{include_path}/{type}.asciidoc[]", "") content = content.sub("\ninclude::{include_path}/{type}.asciidoc[]", "") end end write_stack_versions(content, type) end def versions_index_exists?(name, type) File.exist?("#{logstash_docs_path}/docs/versioned-plugins/#{type}s/#{name}-index.asciidoc") end def write_versions_index(name, type, versions) output_asciidoc = "#{logstash_docs_path}/docs/versioned-plugins/#{type}s/#{name}-index.asciidoc" lazy_create_output_folder(output_asciidoc) template = ERB.new(IO.read("logstash/templates/docs/versioned-plugins/plugin-index.asciidoc.erb")) content = template.result_with_hash(name: name, type: type, versions: versions) File.write(output_asciidoc, content) end def write_type_index(type, plugins) template = ERB.new(IO.read("logstash/templates/docs/versioned-plugins/type.asciidoc.erb")) output_asciidoc = "#{logstash_docs_path}/docs/versioned-plugins/#{type}s-index.asciidoc" lazy_create_output_folder(output_asciidoc) content = template.result_with_hash(type: type, plugins: plugins) File.write(output_asciidoc, content) end def write_alias_index(type, alias_name, target) template = ERB.new(IO.read("logstash/templates/docs/versioned-plugins/alias-index.asciidoc.erb")) output_asciidoc = "#{logstash_docs_path}/docs/versioned-plugins/#{type}s/#{alias_name}-index.asciidoc" lazy_create_output_folder(output_asciidoc) content = template.result_with_hash(type: type, alias_name: alias_name, target: target) File.write(output_asciidoc, content) end def lazy_create_output_folder(file_name) directory = File.dirname(file_name) FileUtils.mkdir_p(directory) if !File.directory?(directory) end def fetch_stack_versions current_stack_versions = resolve_current_versions @stack_version = get_stack_version(current_stack_versions) puts "Stack version: #{@stack_version}\n" @ecs_version = get_ecs_version(current_stack_versions) puts "ECS version: #{@ecs_version}\n" end def fetch_stack_versions_doc(version) Net::HTTP.get(URI.parse(STACK_VERSIONS_BASE_URL + version + ".asciidoc")) end def resolve_current_versions current_stack_versions_ref_doc = fetch_stack_versions_doc("current") current_doc_link = current_stack_versions_ref_doc[/include::(.*?).asciidoc/m, 1] fetch_stack_versions_doc(current_doc_link) end def get_stack_version(stack_versions) get_major_and_minor_versions(stack_versions[/\:version:\s+(.*?)\n/, 1]) end def get_ecs_version(stack_versions) get_major_and_minor_versions(stack_versions[/\:ecs_version:\s+(.*?)\n/, 1]) end # In VPR documentation URLs, only major and minor versions are used. def get_major_and_minor_versions(full_version) raise "Stack version cannot be null." if full_version.nil? version = Gem::Version.new(full_version) version.segments.first().to_s + "." + version.segments[1].to_s end def write_stack_versions(content, type) # BRANCH and ECS_VERSION are newly added, will be available when every plugin index docs are re-indexed. # This is a backfill logic to add the fields after :type: entry if content =~ /\[":branch: %BRANCH%"\]/ type_entry = ":type: #{type}\n" logstash_version_entry = ":branch: %BRANCH%\n" ecs_version_entry = ":ecs_version: %ECS_VERSION%\n" index = content.index(type_entry) index = index + type_entry.length content.insert(index, logstash_version_entry) content.insert(index + logstash_version_entry.length, ecs_version_entry) end content = content \ .gsub("%BRANCH%", @stack_version) \ .gsub("%ECS_VERSION%", @ecs_version) end end if __FILE__ == $0 VersionedPluginDocs.run end