in java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java [89:175]
public boolean match(byte[] input, int offset, int length) {
boolean[] seen = new boolean[stateCount + 1];
int[] currentStates = new int[stateCount];
int[] nextStates = new int[stateCount];
int currentStatesIndex = 0;
int nextStatesIndex;
currentStates[currentStatesIndex++] = 0;
int limit = offset + length;
int current = offset;
boolean accept = false;
while (current < limit) {
int codepoint = INVALID_CODEPOINT;
// decode the next UTF-8 codepoint
int header = input[current] & 0xFF;
if (header < 0x80) {
// normal ASCII
// 0xxx_xxxx
codepoint = header;
current++;
} else if ((header & 0b1110_0000) == 0b1100_0000) {
// 110x_xxxx 10xx_xxxx
if (current + 1 < limit) {
codepoint = ((header & 0b0001_1111) << 6) | (input[current + 1] & 0b0011_1111);
current += 2;
}
} else if ((header & 0b1111_0000) == 0b1110_0000) {
// 1110_xxxx 10xx_xxxx 10xx_xxxx
if (current + 2 < limit) {
codepoint =
((header & 0b0000_1111) << 12)
| ((input[current + 1] & 0b0011_1111) << 6)
| (input[current + 2] & 0b0011_1111);
current += 3;
}
} else if ((header & 0b1111_1000) == 0b1111_0000) {
// 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
if (current + 3 < limit) {
codepoint =
((header & 0b0000_0111) << 18)
| ((input[current + 1] & 0b0011_1111) << 12)
| ((input[current + 2] & 0b0011_1111) << 6)
| (input[current + 3] & 0b0011_1111);
current += 4;
}
}
if (codepoint == INVALID_CODEPOINT) {
return false;
}
accept = false;
nextStatesIndex = 0;
Arrays.fill(seen, false);
for (int i = 0; i < currentStatesIndex; i++) {
int state = currentStates[i];
if (!seen[state] && loopback[state]) {
nextStates[nextStatesIndex++] = state;
accept |= state == acceptState;
seen[state] = true;
}
int next = state + 1;
if (!seen[next] && (match[state] == ANY || match[state] == codepoint)) {
nextStates[nextStatesIndex++] = next;
accept |= next == acceptState;
seen[next] = true;
}
}
if (nextStatesIndex == 0) {
return false;
}
if (!exact && accept) {
return true;
}
int[] tmp = currentStates;
currentStates = nextStates;
nextStates = tmp;
currentStatesIndex = nextStatesIndex;
}
return accept;
}