tools/collate_minutes.rb

#!/usr/bin/env ruby $LOAD_PATH.unshift '/srv/whimsy/lib' require 'whimsy/asf' require 'builder' require 'ostruct' require 'nokogiri' require 'net/https' require 'fileutils' require 'wunderbar' Wunderbar.log_level = 'info' unless Wunderbar.logger.info? # try not to override CLI flags # Add datestamp to log messages (progname is not needed as each prog has its own logfile) Wunderbar.logger.formatter = proc { |severity, datetime, progname, msg| "_#{severity} #{datetime} #{msg}\n" } # for monitoring purposes at_exit do if $! and not $!.instance_of? SystemExit msg = "#{$!.backtrace.first} #{$!.message}" rescue $! puts "\n*** Exception #{$!.class} : #{msg} ***" end Wunderbar.info "Finished #{__FILE__}" end Wunderbar.info "Starting #{__FILE__}" # destination directory SITE_MINUTES = ASF::Config.get(:board_minutes) || File.expand_path(File.join('..', '..', 'www', 'board', 'minutes'), __FILE__) # list of SVN resources needed SVN_SITE_RECORDS_MINUTES = ASF::SVN['minutes'] BOARD = ASF::SVN['foundation_board'] KEEP = ARGV.delete '--keep' # keep obsolete files? force = ARGV.delete '--force' # rerun regardless NOSTAMP = ARGV.delete '--nostamp' # don't add dynamic timestamp to pages (for debug compares) NOWARN_LAYOUT = ARGV.delete '--nowarn_layout' # don't add layout change warning to pages (for debug compares) DUMP_AGENDA = ARGV.delete '--dump_agenda' # output agenda details to stdout DUMP_PENDING = ARGV.delete '--dump_pending' # output agenda details to stdout STAMP = (NOSTAMP ? Time.new(1970) : Time.now).strftime '%Y-%m-%d %H:%M' YYYYMMDD = ARGV.shift || '20*' # Allow override of minutes to process TIME_DIFF = (ARGV.shift || '300').to_i # Allow override of seconds of time diff (WHIMSY-204) for testing MINUTES_NAME = "board_minutes_#{YYYYMMDD}.txt" MINUTES_PATH = File.join(SVN_SITE_RECORDS_MINUTES, '*', MINUTES_NAME) Wunderbar.info "Processing minutes matching #{MINUTES_NAME}" INDEX_FILE = "#{SITE_MINUTES}/index.html" # quick exit if everything is up to date if File.exist? INDEX_FILE input = Dir[MINUTES_PATH, "#{BOARD}/board_minutes_20*.txt"]. map {|name| File.stat(name).mtime}. push(File.stat(__FILE__).mtime, ASF.library_mtime). max indexmtime = File.stat(INDEX_FILE).mtime diff = indexmtime - input Wunderbar.info "Most recent update: #{input}" Wunderbar.info "Index file update: #{indexmtime} Diff: #{diff}" # WHIMSY-204: allow for update window # TODO: consider storing actual update check time if diff >= TIME_DIFF Wunderbar.info "All up to date! (#{TIME_DIFF})" unless force # Add stamp to index page page = File.read(INDEX_FILE) open(INDEX_FILE, 'w') { |file| # must agree with section.add_child file.write page.sub(/(Last run: )\d{4}-\d\d-\d\d \d\d:\d\d(\. The data is extracted from a list of)/,"\\1#{STAMP}\\2") } exit end end end Wunderbar.info 'Processing input files' # mapping of committee names to canonical names (generally from ldap) canonical = Hash.new {|hash, name| name} # extract podling information site = {} ASF::Podling.list.each do |podling| if podling.display_name.downcase != podling.name canonical[podling.display_name.downcase] = podling.name end if podling.status == 'graduated' and podling.enddate next if Date.today - podling.enddate > 90 end site[podling.name] = { name: podling.display_name, status: podling.status, link: "http://incubator.apache.org/projects/#{podling.name}.html", text: podling.description } end # get site information DATAURI = 'https://whimsy.apache.org/public/committee-info.json' local_copy = File.expand_path('../../www/public/committee-info.json', __FILE__) if File.exist?(local_copy) && (Time.now - File.stat(local_copy).mtime < 3600) Wunderbar.info "Using #{local_copy}" cinfo = JSON.parse(File.read(local_copy)) else Wunderbar.info 'Fetching remote copy of committee-info.json' response = Net::HTTP.get_response(URI(DATAURI)) response.value() # Raises error if not OK cinfo = JSON.parse(response.body) end cinfo['committees'].each do |id,v| if v['display_name'].downcase != id canonical[v['display_name'].downcase] = id end site[id] = {:name => v['display_name'], :link => v['site'], :text => v['description']} end # parse the calendar for layout info (note: hack for &raquo and  ) CALENDAR = URI.parse 'https://www.apache.org/foundation/board/calendar.html' http = Net::HTTP.new(CALENDAR.host, CALENDAR.port) http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_NONE get = Net::HTTP::Get.new CALENDAR.request_uri $calendar = Nokogiri::HTML(http.request(get).body.gsub('&raquo', '»').gsub(' ', ' ')) # Link to headerlink css link = Nokogiri::XML::Node.new 'link', $calendar link.set_attribute('rel', 'stylesheet') link.set_attribute('href', 'https://www.apache.org/css/headerlink.css') $calendar.at('head').add_child(link) # add some style style = Nokogiri::XML::Node.new 'style', $calendar style.content = %{ table { border: 1px solid #ccc; margin-bottom: 10px; width: 100%; border-collapse: collapse; border-spacing: 0; } tbody th, tbody td { border-bottom: 1px solid #ccc; border-top: 1px solid #ccc; padding: 0.2em 1em; } pre.report { color: black; font-family: Consolas,monospace } } $calendar.at('head').add_child(style) # Make links absolute %w(a img link script).each do |name| $calendar.search(name).each do |element| element['href'] = (CALENDAR + element['href'].strip).to_s if element['href'] element['src'] = (CALENDAR + element['src'].strip).to_s if element['src'] end end # handle project name changes # see also www/board/minutes/.htaccess # also see parse (Executive) Officer Reports ca. line 670 def name_changes(title) title.sub! 'Ace', 'ACE' # WHIMSY-31 title.sub! 'ADF Faces', 'MyFaces' # via Trinidad title.sub! 'Amber', 'Oltu' title.sub! 'Apache/TCL', 'Tcl' title.sub! 'Argus', 'Ranger' title.sub! 'ASF Rep. for W3C', 'W3C Relations' title.sub! 'Bean Validation', 'BVal' title.sub! 'BeanValidation', 'BVal' title.sub! 'Bluesky', 'BlueSky' title.sub! 'BRPC', 'brpc' title.sub! 'Callback', 'Cordova' title.sub! 'Conferences', 'Conference Planning' title.sub! 'Cxx Standard Library', 'C++ Standard Library' title.sub! 'Deft', 'AWF' title.sub! 'DLab', 'DataLab' title.sub! 'Distributed Release Audit Tool (DRAT)', 'DRAT' title.sub! 'Dolphin Scheduler', 'DolphinScheduler' # board_minutes_2019_11_20.txt title.sub! 'Easyant', 'EasyAnt' title.sub! 'Empire-DB', 'Empire-db' title.sub! 'Fleece', 'Johnzon' title.sub! 'Geroniomo', 'Geronimo' title.sub! 'iBatis', 'iBATIS' title.sub! 'infrastructure', 'Infrastructure' title.sub! 'ISIS', 'Causeway' title.sub! 'Isis', 'Causeway' title.sub! 'IVY', 'Ivy' title.sub! 'JackRabbit', 'Jackrabbit' title.sub! 'James', 'JAMES' title.sub! 'Java Community Process', 'JCP' title.sub! 'JSecurity', 'Shiro' title.sub! 'Juice', 'JuiCE' title.sub! 'log4php', 'Log4php' title.sub! 'Lucene.NET', 'Lucene.Net' title.sub! 'lucene4c', 'Lucene4c' title.sub! 'MesaTEE', 'Teaclave' title.sub! 'Ode', 'ODE' title.sub! 'ODFToolkit', 'ODF Toolkit' title.sub! 'Open for Business', 'OFBiz' title.sub! 'TomEE (OpenEJB)', 'TomEE' title.sub! 'OpenEJB', 'TomEE' title.sub! 'Openmeetings', 'OpenMeetings' title.sub! 'OpenOffice.org', 'OpenOffice' title.sub! 'Optiq', 'Calcite' title.sub! 'Orc', 'ORC' title.sub! 'Oscar', 'Felix' title.sub! 'PonyMail', 'Pony Mail' title.sub! 'PRC', 'Public Relations' title.sub! 'Public Relations Commitee', 'Public Relations' title.sub! 'Quarks', 'Edgent' title.sub! 'SensSoft', 'Flagon' title.sub! 'Servicecomb', 'ServiceComb' title.sub! 'Singa', 'SINGA' title.sub! 'Socialsite', 'SocialSite' title.sub! 'stdcxx', 'C++ Standard Library' title.sub! 'STDCXX', 'C++ Standard Library' title.sub! 'Steve', 'STeVe' title.sub! 'Stratosphere', 'Flink' title.sub! 'SystemML', 'SystemDS' title.sub! 'TCL', 'Tcl' title.sub! 'TubeMQ', 'InLong' title.sub! 'Web services', 'Web Services' title.sub! 'Zest', 'Polygene' title.sub! "Infrastructure (President's)", 'Infrastructure' title.sub! %r{\bKi\b}, 'Shiro' title.sub! %r{^HTTPD?$}, 'HTTP Server' title.sub! %r{^Infrastructure .*}, 'Infrastructure' title.sub! %r{^Labs .*}, 'Labs' title.sub! %r{^Logging$}, 'Logging Services' title.sub! %r{APR$}, 'Portable Runtime (APR)' title.sub! %r{CeltiX[Ff]ire}, 'CXF' title.sub! %r{Fund[- ][rR]aising}, 'Fundraising' title.sub! %r{Perl-Apache( PMC)?}, 'Perl' title.sub! %r{Portable Runtime$}, 'Portable Runtime (APR)' title.sub! %r{Public Relations Committee}, 'Public Relations' title.sub! %r{Security$}, 'Security Team' end agenda = {} posted = Dir[MINUTES_PATH].sort unapproved = Dir[File.join(BOARD, MINUTES_NAME)].sort FileUtils.mkdir_p SITE_MINUTES seen={} (posted+unapproved).each do |txt| date = $1 if txt =~ /(\d\d\d\d_\d\d_\d\d)/ next unless date if seen.has_key? date Wunderbar.warn "Already processed #{seen[date]}; skipping #{txt}" next end Wunderbar.info "Parsing input for #{date}" seen[date] = txt minutes = open(txt) {|file| file.read} pending = {} # parse Attachments (includes both Officer Reports and Committee Reports) minutes.scan(/ -{41}\n # separator Attachment\s\s?(\w+):[ ](.+?)\n # Attachment, Title (.)(.*?)\n # separator, report (?=[-_]{41,}\n(?:End|Attach)) # separator /mx).each do |attach,title,cont,text| # We need to keep the start of the second line. # Otherwise leading spaces in the report body look like a continuation line if cont == ' ' # continuation line was not empty; check if it's a continuation # join multiline titles while text.start_with? ' ' append, text = text.split("\n", 2) title += ' ' + append.strip end end owners = nil if title =~ /^Report from the(?: VP of)? (.+)/i title = $1 if title =~ /^(.+?) +\[([^\]]+)\]/ title = $1 owners = $2 end end title.sub! /Special /, '' title.sub! /Requested /, '' title.sub! /(^| )Report To The Board( On)?( |$)/i, '' title.sub! /^Board Report for /, '' title.sub! /^Status [Rr]eport for (the )?/, '' title.sub! /^Report from the /i, '' title.sub! /^Status report for the /i, '' title.sub! /^Apache /, '' title.sub! /^\/ /, '' title.sub! /\s+\[.*\]\s*$/, '' title.sub! /\sTeam$/, '' title.sub! /\s[Cc]ommittee?\s*$/, '' title.sub! /\s[Pp]roject\s*$/, '' title.sub! /\sPMC$/, '' title.sub! 'Apache Software Foundation', 'ASF' name_changes(title) next if title.strip.empty? next if text.strip.empty? and title =~ /Intentionally (left )?Blank/i next if text.strip.empty? and title =~ /There is No/i report = pending[attach] ||= OpenStruct.new report.meeting = date report.attach = attach report.owners ||= owners if owners report.title = title.strip #.downcase report.text = text if title =~ /budget|spending/i report.subtitle = title report.title = 'Budget' report.attach = '@' + attach elsif title =~ /Contributor License Agreement/ report.subtitle = title report.title = 'Legal Affairs' report.attach = '1' + attach elsif title =~ /P(rofit-and-|&)L(oss)? Report/ report.subtitle = title report.title = 'Treasurer' report.attach = '1' + attach elsif title =~ /alleged JBoss IP infringement/ report.subtitle = title report.title = 'Alleged JBoss IP Infringement' report.attach = '@' + attach elsif title =~ /Written Consent of the Directors/ report.attach = '@' + attach end if title == 'Incubator' and text sections = text.split(/\nStatus [rR]eport (.*)\n=+\n/) # Some early 2012 minutes have a 'Detailed Reports' header before the first podling report # i.e. the podling reports follow the line # '-------------------- Detailed Reports --------------------' # instead of the following # '--------------------' # Some reports include trailing spaces after the ---- # podling header may now be prefixed with ## (since June 2019) # Also there may be a blank line before the ## sections = text.split(/\n[-=][-=]+(?: Detailed Reports ---+)?\s*\n(?:\n?##)?\s*([a-zA-Z].*)\n\n/) if sections.length < 9 sections = [''] if sections.include? 'FAILED TO REPORT' sections = text.split(/\n(\w+)\n-+\n\n/) if sections.length < 9 sections = text.split(/\n=+\s+([\w.]+)\s+=+\n+/) if sections.length < 9 prev = nil if sections.length > 1 report.text = sections.shift sections.each_slice(2) do |title, text| title.sub! /^regarding /, '' title.sub! /^for /, '' title.sub! /^from /, '' title.sub! /^the /, '' title.sub! /\sPPMC$/, '' if title =~ /Apache (.*) is a/ text = title + "\n" + text title = $1 end if title =~ /(.*) has been incubating/ text = title + "\n" + text title = $1 end if title =~ /(.*) -- (DID NOT REPORT)/ text = $2 + "\n" + text title = $1 end if title =~ /(.*?) - (.*)/ text = $2 + "\n" + text title = $1 end if title =~ /(.*? sponsored) incubation $(.*)$/ text = $2 + "\n" + text title = $1 end next if title == 'April 2011 podling reports' name_changes(title) title.sub! /\s+$.*$$/, '' title.sub! /^Apache(: Project)?/, '' if %w(Mentors Committers).include? title prev.text += "\n== #{title}==\n\n#{text}" if prev next end report = OpenStruct.new report.meeting = date report.attach = '.' + title report.title = title.strip report.text = text pending[report.attach] = report prev = report end end end end # parse Officer and Committee Reports for owners and comments minutes.scan(/ \[([^\n]+)\]\n\n # owners \s{7}See\sAttachment\s\s?(\w+) # attach (.*?)\n # comments \s\s\s\s?\w # separator /mx).each do |owners,attach,comments| report = pending[attach] ||= OpenStruct.new report.meeting = date report.attach = attach report.owners = owners cs = comments.strip report.comments = cs if cs.length > 0 end # fill in comments from missing reports # TODO: temporarily omit Additional Officer processing as it generates some incorrect ownership ['Committee', '_Additional Officer_'].each do |section| reports = minutes[/^ \d\. #{section} Reports(\s*(\n| .*\n)+)/,1] next unless reports reports.split(/^ (\w+)\./)[1..-1].each_slice(2) do |attach, comments| next if attach.length > 2 # Why? next if comments.include? 'See Attachment' # handled above owners = comments[/\[([^\n]+)\]/,1] comments.sub!(/.*\s+\n/, '') next if comments.empty? # TODO: This does not work properly attach = ('A'..attach).count.to_s if section == 'Additional Officer' report = pending[attach] ||= OpenStruct.new report.meeting = date report.attach = attach report.owners = owners cs = comments.strip report.comments = cs if cs.length > 0 end end # parse Action Items minutes.scan(/ \n\s+(\w+)\.\s # attach Review\sOutstanding\s(Action\sItems)\n\n? (.*?) # text \n\s?\d # separator /mx).each do |attach, title, text| report = OpenStruct.new report.title ||= title #.downcase report.meeting = date report.attach = '+' + title text.gsub! /^\s?\d+\.\s.*\s*\Z/, '' report.text = text.gsub Regexp.new('^'+text.match(/^ */)[0]), '' if text pending[title] = report end # parse other agenda items establish='' # pick up misplaced PMC creates minutes.scan(/ \n\s*(\w+)\.\s # attach (Discussion\sItems|Unfinished\sBusiness|New\sBusiness|Announcements)\n (.*?) # text (?=\n\s?\d) # separator /mx).each do |attach, title, text| next if text.strip.empty? next if text =~ /\A\s*none\.?\s*\z/i next if text =~ /\A\s*no unfinished business\.?\s*\z/i if text =~ /Establish the Apache \S+ Project/ # 2012_08_28 establish += text next end if title !~ /Discussion/ or text !~ /\A\n*\s{3,5}[0-9A-Z]\.\s.*\n\n/ report = OpenStruct.new report.title ||= title #.downcase report.meeting = date report.attach = '+' + title report.text = text.strip pending[title] = report else text.scan(/ \s{3}[\s\d]([0-9A-Z])\. # agenda item \s+(.*?)\n # title (.*?) # text (?=\n\s{3,5}\d?[0-9A-Z]\.\s|\z) # next section /mx).each do |attach,title,text| if title.include? "\n" and title.length > 120 title = title.split("\n") text = title[1..-1].join("\n") + "\n" + text title = title[0] end title.sub! 'VP, Data Privacy', 'VP Data Privacy' title.sub! /Executive Session $\d\d.*?$/, 'Executive Session' # Drop times from titles report = OpenStruct.new report.title = title.gsub(/\s+/, ' ') report.meeting = date report.attach = '+' + title report.text = text.strip if title =~ /budget|spending/i report.subtitle = title report.title = 'Budget' report.attach = '@' + attach elsif title =~ /Legal Affairs/ report.subtitle = title report.title = 'Legal Affairs' report.attach = '1' + attach elsif title =~ /date.+member.+meeting/i || title =~ /member.+meeting.+date/i report.subtitle = title report.title = 'Set Date for Members Meeting' report.attach = '@' + attach else pmcs = %w{Geronimo iBATIS Santuario} pmcs.each do |pmc| if title =~ /#{pmc}/i report.subtitle = title report.title = pmc report.attach = '.' + pmc end end end pending[title] = report end end end # parse Special Orders orders = establish + minutes.split(/^ \d\. Special Orders/,2).last.split(/^ \d\./,2).first # Some section ids have a leading digit, hence [\s\d] orders.scan(/ \s{3}[\s\d]([A-Z])\. # agenda item \s+(.*?)\n\s*\n # title (.*?) # text (?=\n\s{3,4}[\s\d][A-Z]\.\s|\z) # next section /mx).each do |attach,title,text| next if title.count("\n")>1 report = OpenStruct.new title.sub! /(^|\n)\s*Resolution R\d:/, '' title.sub! 'Standardise the privacy policy for Foundation web sites', 'Standardise privacy policy for foundation websites' title.sub!(/^(?:Proposed )?Resolution (\[R\d\]|to|for) ./) {|c| c[-1..-1].upcase} title.sub! /\.$/, '' report.title ||= title.strip report.meeting = date report.attach = '@' + title report.text = text.strip # Columns: # Pfx Title Match # If Title is a number, then extract that part of the match rules = [ :X, 2, /Terminat(e|ion of) the (.+?) (Project|PMC|Committee)/, :X, 1, /Separate (.+?) from the Apache Software Foundation/, :E, 1, /Establishing a PMC for a (.*) project/, :E, 1, /Establish (.+?) as a top level project/, :E, 1, /Establish (AsterixDB)/, # 2016_04_20 :E, 4, /Estab?lish(ing|ment)? (of )?(the |an )?(.+?) (board )?(PMC|[pP]roject|[cC]ommittee)$/, :E, 2, /Creat(e|ion of) the (.+?) (Project|PMC)/, :E, 2, /To (re-establish|create) the (.+?) PMC/, :E, 2, /Reestablish(ing the)? (.+?)( Project| Committee | Team)/, :E, 1, /^Apache (.+?) Project$/, :C, 3, /(Change|Appoint).* Vice President of (the )?(.+)/, :C, 2, /(Appoint|Establish) a new (.+?) PMC Chair/, :C, 1, /New Vice President for the (.+?) PMC/, :C, 1, /Appoint.* as the (.*?) of the ASF/, :C, 1, /Appointment of (.*?) Committee Chair/, :C, 3, /Appoint(ing a)? new [cC]hair (for|of the) (.*?)( Project|$)/, :C, 1, /Alter the Chair of the (.+?) Project/, :C, 2, /[cC]hange (the )?[cC]hair of the (.+?) (Project|PMC)/, :C, 3, /[Cc]hang(e|ing) (to )?the (.+?) (Project |PMC )?Chair/, :C, 2, /Change (of|the) (.+?) (PMC |Project |Committee )Chair/, :C, 1, /Resolution to change the (.+?) Chair/, :C, 1, /PMC chair change for (.+)/, :C, 1, /Change PMC [Cc]hair for (.+?) Project/, :C, 3, /Appoint a (new )?(chair for |Vice President of )(.+)/, :C, 1, /Appoint .*? as (.+?) chairman/, :C, 1, /Change Chair for Apache (.+)/, :M, 1, /Reboot the (.+?) (PMC|Committee)/, :M, 1, /(.+?) election of new PMC/, :M, 2, /Update (membership of the )?(.+?) Committee/, :M, 1, /Change to the (.*)? Committee Membership/, :M, 1, /Change the Apache (.*) Project Name/, :M, 1, /Change the Apache (.*) Project Management Committee/, 1, 1, /Update ?(audit.+?) Membership/i, :M, 1, /Update ?(.+?) Membership/, :R, 1, /Rename.* to the ?(.+?) Project/, '@', 1, /(.*) Renewal/, :C, 'Conference Planning', /Conferences? Committee/, '@', 'Budget', /Spending Resolution/i, '@', 'Budget', /Budget/i, '@', 'Bylaws', /Bylaw/i, '@', 'Chief Media Officer', /Chief Media Officer/i, 1, 'JCP', /Java Community Process/, 1, 'JCP', /JCP/, 1, 'Public Relations', /Public Relations/i, 1, 'Marketing and Publicity', /Press/i, 1, 'Legal Affairs', /License/i, 1, 'Legal Affairs', /Copyright/i, 1, 'Legal Affairs', /contributor agreement/i, 1, 'Legal Affairs', /CLA/, 1, 'Legal Affairs', /[MG]PL/, 1, 'Brand Management', /use.*feather/, 1, 'Brand Management', /Trademark/, 1, 'Brand Management', /use.*Apache name/, 1, 'Brand Management', /Brand Management/i, 1, 'Travel Assistance', /TAC/, 1, 'Travel Assistance', /Travel Assistance/, 1, 'Conference Planning', /Conference Planning/, 1, 'Fundraising', /Fundraising/, 1, 'Audit', /Audit/i, :C, 'Public Relations', /Appoint Brian Fitzpatrick as a Vice President/, '@', 'Appoint Executive Officers', /Appoint(ment of)? (new |ASF )?[oO]fficers/, '@', 'Appoint Executive Officers', /Election of Officers/, '@', 'Appoint Executive Officers', /Officer Appointments/i, '@', 'Set Date for Members Meeting', /date.* member'?s meeting/i, '@', 'PMC Membership Change Process', /Empower PMC chairs to change the membership/i, '@', 'PMC Membership Change Process', /Amend the Procedure for PMC Membership Changes/i, '@', 'Secretarial Assistant', /Approve contract with Jon Jagielski/, '@', 'Alleged JBoss IP Infringement', /alleged JBoss IP infringe?ment/, '@', 'Discussion Items', /^Discuss/ ] rules.each_slice(3) do |prefix, select, pattern| match = pattern.match(report.title) if match report.subtitle = report.title if select.is_a? Integer report.title = match[select] else report.title = select end report.attach = "#{prefix}#{report.attach}" break end end report.title.sub! /^Apache /, '' name_changes(report.title) report.title.sub! 'standing Audit', 'Audit' report.title.sub! 'federated identity', 'Federated Identity' report.title.sub! 'WSIF', 'Web Services' pending[title] = report end # parse (Executive) Officer Reports execs = minutes[/Officer Reports(.*?)\n[[:blank:]]{1,3}\d+\./m,1] if execs execs.sub! /\s*Executive officer reports approved.*?\n*\Z/, '' # attachments start like this: att_prefix = '\n[[:blank:]]{1,5}([A-Z])\.[[:blank:]]' execs.scan(/ #{att_prefix}([^\n]*?)\n # attach, title (.*?) # text (?=#{att_prefix}|\Z) # separator /mx).each do |attach, title, text| next unless text next unless title next if title.start_with? 'This interim budget shows a surplus' next if title.start_with? "President's discretionary fund returned to" title.sub! 'Executive VP', 'Executive Vice President' title.sub! 'Exec. V.P. and Secretary', 'Secretary' title.sub! 'Vice Chairman', 'Vice Chair' title.sub! 'Acting Chairman', 'Board Chair' # merge report(s) from acting chair title.sub! 'Chairman', 'Board Chair' report = OpenStruct.new if title.include? ' [' report.owners = title.split(' [').last.sub(']','').strip title = title.split(' [').first end report.title ||= title.strip #.downcase report.title.gsub! /^V\.?P\.? of /, '' report.title.gsub! /\/Apache$/, '' report.title = 'Infrastructure' if report.title =~ /Infrastructure/ report.title = 'Treasurer' if report.title =~ /Treasurer/ report.meeting = date report.attach = '*' + title report.text = text.dup pending[title] = report end end if DUMP_PENDING puts 'Dump of pending data for ' + date pending.each do |k,v| puts "#{k} #{k == v.attach ? '==' : '!='} #{v.attach}" puts v.title puts "O: #{v.owners}" if v.owners puts "S: #{v.subtitle}" if v.subtitle p "C: #{v.comments}" if v.comments text = v.text puts "#{text.size} #{text.split("\n",2)[0]}" puts '' end end # Add to the running tally pending.each_value do |report| next if not report.title or report.title.empty? # flag unposted reports; exclude unposted special orders report.posted = posted.include? txt next if not report.posted and (report.attach =~ /^[A-Z]?@/ or report.attach !~ /^[A-Z.]/) agenda[report.title] ||= [] agenda[report.title] << report end end if DUMP_AGENDA puts 'Dump of agenda data for this run' agenda.each do |title, reports| p [reports.length > 1 ? '>1' : '=1', reports.last.attach[0..1], reports.length, title] end end Wunderbar.info 'Starting to generate output' # determine link for each report link = {} agenda.each do |title, reports| link[title] = title.sub('C++','Cxx').gsub(/\W/,'_') + '.html' end # Simplify creating content def getHTMLbody() builder = Builder::XmlMarkup.new :indent => 2 yield builder return Nokogiri::HTML(builder.target!).at('body').children end # Combine content produced here with the template fetched previously def layout(title = nil) builder = Builder::XmlMarkup.new :indent => 2 yield builder content = Nokogiri::HTML(builder.target!) if title $calendar.at('title').content = "Board Meeting Minutes - #{title}" # $calendar.at('h2').content = "Board Meeting Minutes - #{title}" else $calendar.at('title').content = 'Board Meeting Minutes' # $calendar.at('h2').content = "Board Meeting Minutes" end # Adjust the page header # find the intro para; assume it is the first para with a strong tag # then back up to the main container class for the page content section = $calendar.at('.container p strong').parent.parent # Extract all the paragraphs paragraphs = section.search('p') # remove all the existing content section.children.each {|child| child.remove} # Add the replacement first para section.add_child getHTMLbody {|x| x.p do if title x.text! "This was extracted (@ #{STAMP}) from a list of" else # main index, which is always replaced if any input files have changed # text below must agree with code that updates the index when no changes have occurred x.text! "Last collate_minutes.rb run: #{STAMP}. The data is extracted from a list of" end x.a 'minutes', :href => 'http://www.apache.org/foundation/records/minutes/' x.text! 'which have been approved by the Board.' x.br x.strong 'Please Note' # squiggly heredoc causes problems for Eclipse plugin, but leading spaces don't matter here x.text! <<-EOT The Board typically approves the minutes of the previous meeting at the beginning of every Board meeting; therefore, the list below does not normally contain details from the minutes of the most recent Board meeting. EOT unless NOWARN_LAYOUT x.br x.br x.strong 'WARNING: these pages may omit some original contents of the minutes.' x.br x.text 'This is due to changes in the layout of the source minutes over the years.' x.text 'Fixes are being worked on.' end end } # and the second para which is assumed to be the list of years section.add_child paragraphs[1] section.add_child "\n" # separator to make it easier to read source # now add the content provided by the builder block content.at('body').children.each {|child| section.add_child child} $calendar.to_html end Dir.entries(SITE_MINUTES).each do |p| next unless p.end_with? '.html' next if p == 'index.html' unless link.has_value? p unless KEEP Wunderbar.info "Dropping #{p}" File.delete(File.join(SITE_MINUTES,p)) else Wunderbar.info "Outdated? #{p}" end end end # remove variable date from page def remove_date(page) # '%Y-%m-%d %H:%M' page.sub /This was extracted $@ \d\d\d\d-\d\d-\d\d \d\d:\d\d$ from a list of/,'' end # output each individual report by owner agenda.sort.each do |title, reports| page = layout(title) do |x| info = site[canonical[title.downcase]] if info # site information found, link to it x.h1 do x.a info[:name], :href => info[:link], :title => info[:text] end else x.h1 title end reports.reverse.each do |report| _id = report.meeting.gsub('_', '-') x.h2 id: _id do if report.posted href = 'http://apache.org/foundation/records/minutes/' + "#{report.meeting[0...4]}/board_minutes_#{report.meeting}.txt" else href = ASF::SVN.svnpath!('foundation_board', "board_minutes_#{report.meeting}.txt") end x.a Date.parse(report.meeting.gsub('_','/')).strftime('%d %b %Y'), href: href, id: "minutes_#{report.meeting}" if report.owners x.span "[#{report.owners}]", :style => 'font-size: 14px' end # Add headerlink marker x.a '¶', href: "##{_id}", title: 'Permanent link', :class => 'headerlink' end x.h3 report.subtitle if report.subtitle if report.posted text = report.text.gsub(/^\t+/) {|tabs| ' ' * (8*tabs.length)} text.gsub!(/ *$/, '') indent = text.scan(/^([ ]+)/).flatten.min.to_s.length - 1 text.gsub! /^#{' '*indent}/, '' if indent > 0 text = $1 + text if text =~ /\A\w.*\n(\s+)/ text = text.to_s.rstrip # N.B. The syntax "class: report" causes problems for the Eclipse Ruby plugin x.pre text, 'class' => 'report' unless text.strip.empty? if report.comments and report.comments.strip != '' report.comments.split(/\n\s*\n/).each do |p| x.p p, :style => 'width: 40em' end elsif text.strip.empty? if report.subtitle and not report.subtitle.empty? x.p {x.em 'Discussion Item with no text or minutes'} else x.p {x.em 'A report was expected, but not received'} end end elsif report.text.strip.empty? x.p {x.em 'A report was expected, but not received'} else x.p do x.em 'Report was filed, but display is awaiting the approval ' + 'of the Board minutes.' end end end end dest = File.join(SITE_MINUTES, link[title]) if force or !File.exist?(dest) or (remove_date(File.read(dest)) != remove_date(page)) Wunderbar.info "Writing #{link[title]}" open(dest, 'w') {|file| file.write page} # else # Wunderbar.info "Not updating #{link[title]}" end end # Classification scheme # Pfx = reports.last.attach[0] # Count = reports.length # # Pfx Count Section # '*' >1 Executive Officer Reports # 0-9 >1 Additional Officer Reports # A-Z >1 Committee Reports # '.' any Podling Reports # '@' >1 Repeating Special Orders # '+' >1 Other Agenda Items # !'.' =1 Other Attachments, Special Orders, and Discussions # output index agenda = agenda.sort_by {|title, reports| title.downcase} page = layout do |x| x.h2 'Executive Officer Reports', :id => 'executive' x.ul do agenda.each do |title, reports| next unless reports.last.attach =~ /^\*/ next if reports.length == 1 x.li do x.a title, :href => link[title] end end end x.h2 'Additional Officer Reports', :id => 'officer' x.ul do agenda.each do |title, reports| next unless reports.last.attach =~ /^\d/ next if reports.length == 1 x.li do x.a title, :href => link[title] end end end x.h2 'Committee Reports', :id => 'committee' list = [] agenda.each do |title, reports| next unless reports.last.attach =~ /^[A-Z]/ next if reports.length == 1 list << title end cols = 6 slice = (list.length+cols-1)/cols x.table do (0...slice).each do |i| x.tr do (0...cols).each do |j| x.td do title = list[i+j*slice] if title info = site[canonical[title.downcase]] if info x.a title, :href => link[title], :title => info[:text] else if cinfo['committees'][title] x.em { x.a title, :href => link[title] } else x.del { x.a title, :href => link[title] } end end end end end end end end x.h2 'Podling Reports', :id => 'podling' list = [] agenda.each do |title, reports| next unless reports.last.attach =~ /^[.]/ list << title end cols = 6 slice = (list.length+cols-1)/cols x.table do (0...slice).each do |i| x.tr do (0...cols).each do |j| x.td do title = list[i+j*slice] if title info = site[canonical[title.downcase]] if info if %w{dormant retired}.include? info[:status] x.del do x.a title, :href => link[title], :title => info[:text] end else x.a title, :href => link[title], :title => info[:text] end else x.em { x.a title, :href => link[title] } end end end end end end end x.h2 'Repeating Special Orders', :id => 'orders' x.ul do agenda.each do |title, reports| next unless reports.last.attach =~ /^@/ next if reports.length == 1 x.li do x.a title, :href => link[title] end end end x.h2 'Other Attachments, Special Orders, and Discussions', :id => 'other' x.ul do other = {} agenda.each do |title, reports| next unless reports.length == 1 next if reports.last.attach =~ /^[.]/ other[reports.first.subtitle || title] = title end other.sort.each do |subtitle, title| x.li do x.a subtitle, :href => link[title] end end end x.h2 'Other Agenda Items', :id => 'agenda' x.ul do agenda.each do |title, reports| next unless reports.last.attach =~ /^\+/ next if reports.length == 1 x.li do x.a title, :href => link[title] end end end end open(INDEX_FILE, 'w') {|file| file.write page} Wunderbar.info "Wrote #{SITE_MINUTES}/index.html"

tools/collate_minutes.rb (876 lines of code) (raw):