in core/src/main/java/org/apache/sling/cms/core/readability/impl/ReadabilityServiceImpl.java [180:202]
private List<Word> extractWords(String sentence) {
List<Word> words = new ArrayList<>();
BreakIterator wordIterator = BreakIterator.getWordInstance(locale);
wordIterator.setText(sentence);
int start = wordIterator.first();
for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) {
String original = sentence.substring(start, end);
String wordStr = original.toLowerCase(locale);
wordStr = deduplicateVowels(wordStr);
wordStr = stripWordStem(wordStr);
wordStr = wordStr.trim();
int sylables = countSylables(wordStr);
if (!wordStr.isEmpty() && (sylables != 0 || isWordExpression.matcher(wordStr).matches())) {
if (sylables == 0) {
sylables = 1;
}
Word word = new Word(original, wordStr, sylables, sylables >= complexityMin);
words.add(word);
}
}
return words;
}