in parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/deprecated/PathGlobPattern.java [72:160]
public void set(String glob) {
StringBuilder regex = new StringBuilder();
int setOpen = 0;
int curlyOpen = 0;
int len = glob.length();
hasWildcard = false;
for (int i = 0; i < len; i++) {
char c = glob.charAt(i);
switch (c) {
case BACKSLASH:
if (++i >= len) {
error("Missing escaped character", glob, i);
}
regex.append(c).append(glob.charAt(i));
continue;
case '.':
case '$':
case '(':
case ')':
case '|':
case '+':
// escape regex special chars that are not glob special chars
regex.append(BACKSLASH);
break;
case '*':
if (i + 1 < len && glob.charAt(i + 1) == '*') {
regex.append('.');
i++;
break;
}
regex.append("[^" + PATH_SEPARATOR + "]");
hasWildcard = true;
break;
case '?':
regex.append('.');
hasWildcard = true;
continue;
case '{': // start of a group
regex.append("(?:"); // non-capturing
curlyOpen++;
hasWildcard = true;
continue;
case ',':
regex.append(curlyOpen > 0 ? '|' : c);
continue;
case '}':
if (curlyOpen > 0) {
// end of a group
curlyOpen--;
regex.append(")");
continue;
}
break;
case '[':
if (setOpen > 0) {
error("Unclosed character class", glob, i);
}
setOpen++;
hasWildcard = true;
break;
case '^': // ^ inside [...] can be unescaped
if (setOpen == 0) {
regex.append(BACKSLASH);
}
break;
case '!': // [! needs to be translated to [^
regex.append(setOpen > 0 && '[' == glob.charAt(i - 1) ? '^' : '!');
continue;
case ']':
// Many set errors like [][] could not be easily detected here,
// as []], []-] and [-] are all valid POSIX glob and java regex.
// We'll just let the regex compiler do the real work.
setOpen = 0;
break;
default:
}
regex.append(c);
}
if (setOpen > 0) {
error("Unclosed character class", glob, len);
}
if (curlyOpen > 0) {
error("Unclosed group", glob, len);
}
compiled = Pattern.compile(regex.toString());
}