public boolean match()

in java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java [89:175]


  public boolean match(byte[] input, int offset, int length) {
    boolean[] seen = new boolean[stateCount + 1];
    int[] currentStates = new int[stateCount];
    int[] nextStates = new int[stateCount];
    int currentStatesIndex = 0;
    int nextStatesIndex;

    currentStates[currentStatesIndex++] = 0;

    int limit = offset + length;
    int current = offset;
    boolean accept = false;
    while (current < limit) {
      int codepoint = INVALID_CODEPOINT;

      // decode the next UTF-8 codepoint
      int header = input[current] & 0xFF;
      if (header < 0x80) {
        // normal ASCII
        // 0xxx_xxxx
        codepoint = header;
        current++;
      } else if ((header & 0b1110_0000) == 0b1100_0000) {
        // 110x_xxxx 10xx_xxxx
        if (current + 1 < limit) {
          codepoint = ((header & 0b0001_1111) << 6) | (input[current + 1] & 0b0011_1111);
          current += 2;
        }
      } else if ((header & 0b1111_0000) == 0b1110_0000) {
        // 1110_xxxx 10xx_xxxx 10xx_xxxx
        if (current + 2 < limit) {
          codepoint =
              ((header & 0b0000_1111) << 12)
                  | ((input[current + 1] & 0b0011_1111) << 6)
                  | (input[current + 2] & 0b0011_1111);
          current += 3;
        }
      } else if ((header & 0b1111_1000) == 0b1111_0000) {
        // 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
        if (current + 3 < limit) {
          codepoint =
              ((header & 0b0000_0111) << 18)
                  | ((input[current + 1] & 0b0011_1111) << 12)
                  | ((input[current + 2] & 0b0011_1111) << 6)
                  | (input[current + 3] & 0b0011_1111);
          current += 4;
        }
      }

      if (codepoint == INVALID_CODEPOINT) {
        return false;
      }

      accept = false;
      nextStatesIndex = 0;
      Arrays.fill(seen, false);
      for (int i = 0; i < currentStatesIndex; i++) {
        int state = currentStates[i];
        if (!seen[state] && loopback[state]) {
          nextStates[nextStatesIndex++] = state;
          accept |= state == acceptState;
          seen[state] = true;
        }
        int next = state + 1;
        if (!seen[next] && (match[state] == ANY || match[state] == codepoint)) {
          nextStates[nextStatesIndex++] = next;
          accept |= next == acceptState;
          seen[next] = true;
        }
      }

      if (nextStatesIndex == 0) {
        return false;
      }

      if (!exact && accept) {
        return true;
      }

      int[] tmp = currentStates;
      currentStates = nextStates;
      nextStates = tmp;
      currentStatesIndex = nextStatesIndex;
    }

    return accept;
  }