List _cleanTokens()

in lib/src/license_detection/tokenizer.dart [100:144]


List<Token> _cleanTokens(List<Token> tokens) {
  var output = <Token>[];
  var tokenID = 0;
  var firstInLine = true;
  final len = tokens.length;
  for (var i = 0; i < len; i++) {
    var token = tokens[i];
    // Ignore new line tokens for now.
    // If accuracy of detection is low apply
    // Guideline 2.1.4: Text that can be omitted from license.
    if (_newLineRegex.hasMatch(token.value)) {
      firstInLine = true;
      continue;
    }

    // Ignores list items.
    if (firstInLine && _isListItem(token.value)) {
      continue;
    }

    firstInLine = false;

    var text = _cleanToken(token.value);

    final textMap = _remainingEquivalentWords[text];

    if (textMap != null) {
      if (i + 1 < len) {
        final nextToken = tokens[i + 1];
        if (textMap[0] == _cleanToken(nextToken.value)) {
          if (text == 'copyright') {
            output.add(Token(text, tokenID++, token.span));
            token = tokens[i + 1];
          }
          text = textMap[1];
          i++;
        }
      }
    }

    output.add(Token(text, tokenID++, token.span));
  }

  return output;
}