in src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java [134:429]
private void update(final char[] buf, int len) throws IOException {
int start = 0;
final int end = len;
for (int curr = start; curr < end; curr++) {
final char c = buf[curr];
switch (parseState) {
case OUTSIDE:
if (c == '<') {
if (curr > start) {
documentHandler.onCharacters(buf, start, curr - start);
}
start = curr;
parseState = PARSE_STATE.TAG;
parseSubState = 0;
tagType = TT_MAYBE;
resetTagName();
} else if (c == '$') {
exprType = EXPR_MAYBE;
parseState = PARSE_STATE.EXPRESSION;
}
break;
case TAG:
switch (parseSubState) {
case -1:
if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = parseState;
parseState = PARSE_STATE.STRING;
parseSubState = -1;
} else if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
}
break;
case 0:
if (c == '!') {
parseState = PARSE_STATE.COMMENT;
parseSubState = 0;
tagType = TT_NONE;
// keep the accumulated buffer
} else if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = parseState;
parseState = PARSE_STATE.STRING;
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
} else if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
tagType = TT_NONE;
flushBuffer();
} else if (!Character.isWhitespace(c)) {
tagNameBuffer.write(c);
parseSubState = 1;
} else {
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
}
break;
case 1:
if (c == '"' || c == '\'') {
tagType = TT_TAG;
parseSubState = 2;
quoteChar = c;
prevParseState = parseState;
parseState = PARSE_STATE.STRING;
} else if (c == '>') {
parseState = processTag(buf, start, curr - start + 1)
? PARSE_STATE.SCRIPT
: PARSE_STATE.OUTSIDE;
start = curr + 1;
tagType = TT_NONE;
parseSubState = 0;
} else if (Character.isWhitespace(c)) {
tagType = TT_TAG;
parseSubState = 2;
} else {
tagNameBuffer.write(c);
}
break;
case 2:
if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = parseState;
parseState = PARSE_STATE.STRING;
} else if (c == '>') {
if (tagType == TT_TAG) {
parseState = processTag(buf, start, curr - start + 1)
? PARSE_STATE.SCRIPT
: PARSE_STATE.OUTSIDE;
start = curr + 1;
} else {
flushBuffer();
parseState = "SCRIPT".equalsIgnoreCase(getTagName())
? PARSE_STATE.SCRIPT
: PARSE_STATE.OUTSIDE;
}
tagType = TT_NONE;
parseSubState = 0;
}
break;
default:
break;
}
break;
case COMMENT:
switch (parseSubState) {
case 0:
if (c == '-') {
parseSubState++;
} else if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = PARSE_STATE.TAG;
parseState = PARSE_STATE.STRING;
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
} else if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
tagType = TT_NONE;
flushBuffer();
} else {
parseState = PARSE_STATE.TAG;
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
}
break;
case 1:
if (c == '-') {
parseSubState++;
} else if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = PARSE_STATE.TAG;
parseState = PARSE_STATE.STRING;
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
} else if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
tagType = TT_NONE;
flushBuffer();
} else {
parseState = PARSE_STATE.TAG;
parseSubState = -1;
tagType = TT_NONE;
flushBuffer();
}
break;
case 2:
if (c == '-') {
parseSubState++;
}
break;
case 3:
if (c == '-') {
parseSubState++;
} else {
parseSubState = 2;
}
break;
case 4:
if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
processComment(buf, start, curr - start + 1);
start = curr + 1;
} else {
parseSubState = 2;
}
break;
default:
break;
}
break;
case SCRIPT:
switch (parseSubState) {
case 0:
if (c == '<') {
if (curr > start) {
documentHandler.onCharacters(buf, start, curr - start);
}
start = curr;
tagType = TT_MAYBE;
parseSubState++;
}
break;
case 1:
if (c == '/') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 2:
if (c == 'S' || c == 's') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 3:
if (c == 'C' || c == 'c') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 4:
if (c == 'R' || c == 'r') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 5:
if (c == 'I' || c == 'i') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 6:
if (c == 'P' || c == 'p') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 7:
if (c == 'T' || c == 't') {
parseSubState++;
} else {
tagType = TT_NONE;
parseSubState = 0;
flushBuffer();
}
break;
case 8:
if (c == '>') {
processTag(buf, start, curr - start + 1);
start = curr + 1;
tagType = TT_NONE;
parseState = PARSE_STATE.OUTSIDE;
}
break;
default:
break;
}
break;
case STRING:
if (c == quoteChar) {
parseState = prevParseState;
}
break;
case EXPRESSION:
if (exprType == EXPR_MAYBE && c != '{') {
// not a valid expression
if (c == '<') {
// reset to process tag correctly
curr--;
}
parseState = PARSE_STATE.OUTSIDE;
} else if (c == '}') {
parseState = PARSE_STATE.OUTSIDE;
}
exprType = EXPR_NONE;
break;
default:
break;
}
}
if (start < end) {
if (tagType == TT_NONE && parseState != PARSE_STATE.COMMENT) {
documentHandler.onCharacters(buf, start, end - start);
} else {
buffer.write(buf, start, end - start);
}
}
}