Parser.prototype.removeHTMLComments = function()

in app/crawl/lib/parser.js [191:237]


Parser.prototype.removeHTMLComments = function(doc) {
  var docLength = doc.length;
  var strippedContent = '';
  var commentDepth = 0;
  var inCodeBlock = false;

  for (var i = 0; i < docLength; i++) {
    if (doc.substr(i, 3) === '```') {
      strippedContent += '```';
      i += 3; // Advance past the start of the multiline code block

      if (inCodeBlock === '```') {
        inCodeBlock = false;
      } else if (inCodeBlock === false) {
        inCodeBlock = '```';
      }
    }

    if (doc[i] === '`') {
      if (inCodeBlock === '`') {
        inCodeBlock = false;
      } else if (inCodeBlock === false) {
        inCodeBlock = '`';
      }
    }

    if (!inCodeBlock && doc.substr(i, 4) === '<!--') {
      i += 4; // Advance to the end of the comment open tag
      commentDepth++;
      continue;
    }

    if (!inCodeBlock && doc.substr(i, 3) === '-->') {
      i += 3; // Advance to the end of the comment close tag
      if (commentDepth > 0) {
        commentDepth--;
      }
    }

    // Not inside an HTML comment
    if (commentDepth === 0) {
      strippedContent += doc[i];
    }
  }

  return strippedContent;
};