in extensions/vw/tabular/pdf/src/main/java/org/apache/causeway/extensions/tabular/pdf/factory/internal/Tokenizer.java [48:286]
public static List<Token> tokenize(final String text, final WrappingFunction wrappingFunction) {
if(text == null) return Collections.emptyList();
final List<Token> tokens = new ArrayList<>();
final Stack<Integer> possibleWrapPoints = wrappingFunction == null
? findWrapPoints(text)
: findWrapPointsWithFunction(text, wrappingFunction);
int textIndex = 0;
final StringBuilder sb = new StringBuilder();
// taking first wrap point
Integer currentWrapPoint = possibleWrapPoints.pop();
while (textIndex < text.length()) {
if (textIndex == currentWrapPoint) {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(POSSIBLE_WRAP_POINT);
currentWrapPoint = possibleWrapPoints.pop();
}
final char c = text.charAt(textIndex);
switch (c) {
case '<':
boolean consumed = false;
if (textIndex < text.length() - 2) {
final char lookahead1 = text.charAt(textIndex + 1);
final char lookahead2 = text.charAt(textIndex + 2);
if ('i' == lookahead1 && '>' == lookahead2) {
// <i>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(OPEN_TAG_I);
textIndex += 2;
consumed = true;
} else if ('b' == lookahead1 && '>' == lookahead2) {
// <b>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(OPEN_TAG_B);
textIndex += 2;
consumed = true;
} else if ('b' == lookahead1 && 'r' == lookahead2) {
if (textIndex < text.length() - 3) {
// <br>
final char lookahead3 = text.charAt(textIndex + 3);
if (lookahead3 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(WRAP_POINT_BR);
// normal notation <br>
textIndex += 3;
consumed = true;
} else if (textIndex < text.length() - 4) {
// <br/>
final char lookahead4 = text.charAt(textIndex + 4);
if (lookahead3 == '/' && lookahead4 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(WRAP_POINT_BR);
// normal notation <br/>
textIndex += 4;
consumed = true;
} else if (textIndex < text.length() - 5) {
final char lookahead5 = text.charAt(textIndex + 5);
if (lookahead3 == ' ' && lookahead4 == '/' && lookahead5 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(WRAP_POINT_BR);
// in case it is notation <br />
textIndex += 5;
consumed = true;
}
}
}
}
} else if ('p' == lookahead1 && '>' == lookahead2) {
// <p>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(WRAP_POINT_P);
textIndex += 2;
consumed = true;
} else if ('o' == lookahead1 && 'l' == lookahead2) {
// <ol>
if (textIndex < text.length() - 3) {
final char lookahead3 = text.charAt(textIndex + 3);
if (lookahead3 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(OPEN_TAG_OL);
textIndex += 3;
consumed = true;
}
}
} else if ('u' == lookahead1 && 'l' == lookahead2) {
// <ul>
if (textIndex < text.length() - 3) {
final char lookahead3 = text.charAt(textIndex + 3);
if (lookahead3 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(OPEN_TAG_UL);
textIndex += 3;
consumed = true;
}
}
} else if ('l' == lookahead1 && 'i' == lookahead2) {
// <li>
if (textIndex < text.length() - 3) {
final char lookahead3 = text.charAt(textIndex + 3);
if (lookahead3 == '>') {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
// clean string builder
sb.delete(0, sb.length());
}
tokens.add(WRAP_POINT_LI);
textIndex += 3;
consumed = true;
}
}
} else if ('/' == lookahead1) {
// one character tags
if (textIndex < text.length() - 3) {
final char lookahead3 = text.charAt(textIndex + 3);
if ('>' == lookahead3) {
if ('i' == lookahead2) {
// </i>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_I);
textIndex += 3;
consumed = true;
} else if ('b' == lookahead2) {
// </b>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_B);
textIndex += 3;
consumed = true;
} else if ('p' == lookahead2) {
//</p>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_P);
textIndex += 3;
consumed = true;
}
}
}
if (textIndex < text.length() - 4) {
// lists
final char lookahead3 = text.charAt(textIndex + 3);
final char lookahead4 = text.charAt(textIndex + 4);
if ('l' == lookahead3) {
if ('o' == lookahead2 && '>' == lookahead4) {
// </ol>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_OL);
textIndex += 4;
consumed = true;
} else if ('u' == lookahead2 && '>' == lookahead4) {
// </ul>
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_UL);
textIndex += 4;
consumed = true;
}
} else if ('l' == lookahead2 && 'i' == lookahead3) {
// </li>
if ('>' == lookahead4) {
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(CLOSE_TAG_LI);
textIndex += 4;
consumed = true;
}
}
}
}
}
if (!consumed) {
sb.append('<');
}
break;
default:
sb.append(c);
break;
}
textIndex++;
}
if (sb.length() > 0) {
tokens.add(Token.text(sb.toString()));
sb.delete(0, sb.length());
}
tokens.add(POSSIBLE_WRAP_POINT);
return tokens;
}