public static LikeMatcher compile()

in java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java [58:144]


  public static LikeMatcher compile(String pattern, Optional<Character> escape, boolean optimize) {
    List<Pattern> parsed = parse(pattern, escape);

    // Calculate minimum and maximum size for candidate strings
    // This is used for short-circuiting the match if the size of
    // the input is outside those bounds
    int minSize = 0;
    int maxSize = 0;
    boolean unbounded = false;
    for (Pattern expression : parsed) {
      if (expression instanceof Literal) {
        Literal literal = (Literal) expression;
        int length = literal.getValue().getBytes(UTF_8).length;
        minSize += length;
        maxSize += length;
      } else if (expression instanceof ZeroOrMore) {
        unbounded = true;
      } else if (expression instanceof Any) {
        Any any = (Any) expression;
        int length = any.getLength();
        minSize += length;
        maxSize += length * 4; // at most 4 bytes for a single UTF-8 codepoint
      }
    }

    // Calculate exact match prefix and suffix
    // If the pattern starts and ends with a literal, we can perform a quick
    // exact match to short-circuit DFA evaluation
    byte[] prefix = new byte[0];
    byte[] suffix = new byte[0];

    int patternStart = 0;
    int patternEnd = parsed.size() - 1;
    if (parsed.size() > 0 && parsed.get(0) instanceof Literal) {
      Literal literal = (Literal) parsed.get(0);
      prefix = literal.getValue().getBytes(UTF_8);
      patternStart++;
    }

    if (parsed.size() > 1 && parsed.get(parsed.size() - 1) instanceof Literal) {
      Literal literal = (Literal) parsed.get(parsed.size() - 1);
      suffix = literal.getValue().getBytes(UTF_8);
      patternEnd--;
    }

    // If the pattern (after excluding constant prefix/suffixes) ends with an unbounded match (i.e.,
    // %)
    // we can perform a non-exact match and end as soon as the DFA reaches an accept state -- there
    // is no need to consume the remaining input
    // This section determines whether the pattern is a candidate for non-exact match.
    boolean exact = true; // whether to match to the end of the input
    if (patternStart <= patternEnd && parsed.get(patternEnd) instanceof ZeroOrMore) {
      // guaranteed to be Any or ZeroOrMore because any Literal would've been turned into a suffix
      // above
      exact = false;
      patternEnd--;
    }

    Optional<Matcher> matcher = Optional.empty();
    if (patternStart <= patternEnd) {
      boolean hasAny = false;
      for (int i = patternStart; i <= patternEnd; i++) {
        Pattern item = parsed.get(i);
        if (item instanceof Any) {
          hasAny = true;
          break;
        }
      }

      if (hasAny) {
        if (optimize) {
          matcher = Optional.of(new DenseDfaMatcher(parsed, patternStart, patternEnd, exact));
        } else {
          matcher = Optional.of(new NfaMatcher(parsed, patternStart, patternEnd, exact));
        }
      } else {
        matcher = Optional.of(new FjsMatcher(parsed, patternStart, patternEnd, exact));
      }
    }

    return new LikeMatcher(
        minSize,
        unbounded ? OptionalInt.empty() : OptionalInt.of(maxSize),
        prefix,
        suffix,
        matcher);
  }