Parser.prototype.extractHeaderInfo = function()

in app/pypi-recent/lib/parser.js [73:187]
67 lines of code
21 McCabe index (conditional complexity)

Parser.prototype.extractHeaderInfo = function(context) {
  var self = this;

  // Preprocess the target a bit to remove extra whitespace
  // and other characters that can confuse the NLP
  var target = context[0];

  // Remove the href of any link in the line, because many links contain
  // version numbers from tags or branch names which can confuse the parser.
  var links = target.match(/\[([^\]]+)\]\(([^)]+)\)/);
  while (links) {
    target = target.substr(0, links.index) + links[1] + target.substr(links.index + links[0].length);
    links = target.match(/\[([^\]]+)\]\(([^)]+)\)/);
  }

  // Determine the markup weight of the target line.
  var markupWeight;
  if (target.slice(0, 3) === '###') {
    // Check to see if the target line is an h3
    markupWeight = 3;
  } else if (target.slice(0, 2) === '##') {
    // Check to see if the target line is an h2
    markupWeight = 2;
  } else if (target.slice(0, 1) === '#') {
    // Check to see if the target line is an h1
    markupWeight = 1;
  } else {
    // Check to see if the line below the target line is an underline, formed with either
    // the equals sign or the dash symbol.
    if (context[1]) {
      var underlines = (context[1].match(/[=|-]+/g) || []);
      if (underlines && underlines.length) {
        let count = underlines[0].length;
        if (count > context[1].length * 0.8) {
          // Line below is more than 80% =, so probably an underline indicator.
          markupWeight = 1;
        } else {
          // Nothing special about the line below, therefore nothing special about this line.
          markupWeight = 0;
        }
      }
    }
  }

  // Find a version number in the line
  var versions = findVersions(target, { loose: true });

  var version;
  if (_.isArray(versions)) {
    // If the date format is like YYYY.MM.DD then it can result in
    // the date being grabbed as a version number. So loop through the
    // possible versions and reject any in which the major version number
    // is excessively large.
    for (var i in versions) {
      var parsed = parseSemVer(versions[i]);

      // Filter out dates in the format DD.MM.YYYY and YYYY.MM.DD
      // These dates end up looking like version numbers, but their extra
      // large major or patch versions are a good signal that they are
      // actually dates.
      if (parsed.major < 1900 && parsed.patch < 1900) {
        version = versions[i];
        break;
      }
    }
  }

  if (!version) {
    // Don't bother doing a CPU laden bruteforce search for a date if
    // there is no version number.
    return null;
  }

  var dateTarget = target;

  // Remove identified versions from the date search string to prevent them
  // from being falsely interpreted as dates
  var looseVersions = dateTarget.match(versionNumber);
  var v;

  // Filter out the full semantic versions
  for (v of versions) {
    dateTarget = dateTarget.replace(v, '');
  }

  // Also filter out versions like 1.2 which the "loose" parser picks up and then
  // coerces to their correct semver version like 1.2.0
  for (v of looseVersions) {
    dateTarget = dateTarget.replace(v, '');
  }

  // Clean up date formats a bit?
  dateTarget = dateTarget.replace(/ +(?= )/g, '');
  dateTarget = dateTarget.replace(/(\(|\)|\:)/g, '');
  dateTarget = dateTarget.replace(/(\-)/g, '/');
  dateTarget = dateTarget.replace(/(\.)/g, '/');
  dateTarget = dateTarget.replace('now', ''); // The date parser is a little too good ;)
  dateTarget = dateTarget.replace('week', ''); // So we must cripple it a bit

  // Do a brute force search for a date
  //console.log(target);
  //console.log('Found version', version);
  //console.log(dateTarget);
  var date = self.dateSearch(dateTarget);
  //console.log('Found date', date);

  if ((version && date > 0) || (version && markupWeight > 0)) {
    return {
      version: version,
      date: date
    };
  } else {
    return null;
  }
};