in lib/src/license_detection/tokenizer.dart [100:144]
List<Token> _cleanTokens(List<Token> tokens) {
var output = <Token>[];
var tokenID = 0;
var firstInLine = true;
final len = tokens.length;
for (var i = 0; i < len; i++) {
var token = tokens[i];
// Ignore new line tokens for now.
// If accuracy of detection is low apply
// Guideline 2.1.4: Text that can be omitted from license.
if (_newLineRegex.hasMatch(token.value)) {
firstInLine = true;
continue;
}
// Ignores list items.
if (firstInLine && _isListItem(token.value)) {
continue;
}
firstInLine = false;
var text = _cleanToken(token.value);
final textMap = _remainingEquivalentWords[text];
if (textMap != null) {
if (i + 1 < len) {
final nextToken = tokens[i + 1];
if (textMap[0] == _cleanToken(nextToken.value)) {
if (text == 'copyright') {
output.add(Token(text, tokenID++, token.span));
token = tokens[i + 1];
}
text = textMap[1];
i++;
}
}
}
output.add(Token(text, tokenID++, token.span));
}
return output;
}