in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/regex/RegexInit.java [55:159]
private int determineBestRegexMethod( String pattern ) {
for( int i = 0; i < javaRegexOnly.length; i++ ) {
for( int j = pattern.length(); j > 0; ) {
j = pattern.lastIndexOf(javaRegexOnly[i], j );
if( j > 0 ) {
int precedingEsc = precedingEscapes(pattern, j);
if( precedingEsc %2 == 0 ) {
return 0;
}
j = j - precedingEsc;
} else if ( j == 0 ) {
return 0;
}
}
}
// Determine if there are any complex unions in pattern
// Complex unions are [a-m[n-z]]
int index = pattern.indexOf('[');
if( index >= 0 ) {
int precedingEsc = precedingEscapes(pattern, index);
if( index != 0 ) {
while( precedingEsc %2 == 1 ) {
index = pattern.indexOf('[', index + 1);
precedingEsc = precedingEscapes(pattern, index);
}
}
int index2 = 0;
int index3 = 0;
while( index != -1 && index < pattern.length() ) {
index2 = pattern.indexOf(']', index );
if( index2 == -1 ) {
break;
}
precedingEsc = precedingEscapes(pattern, index2);
// Find the next ']' which is not '\\]'
while( precedingEsc %2 == 1 ) {
index2 = pattern.indexOf(']', index2 + 1);
precedingEsc = precedingEscapes(pattern, index2);
}
if( index2 == -1 ) {
break;
}
index3 = pattern.indexOf('[', index + 1 );
precedingEsc = precedingEscapes(pattern, index3);
if( index3 == -1 ) {
break;
}
// Find the next '[' which is not '\\['
while( precedingEsc %2 == 1 ) {
index3 = pattern.indexOf('[', index3 + 1);
precedingEsc = precedingEscapes(pattern, index3);
}
if( index3 == -1 ) {
break;
}
if( index3 < index2 ) {
return 0;
}
index = index3;
}
}
index = pattern.lastIndexOf('\\');
if( index > -1 ) {
int precedingEsc = precedingEscapes(pattern, index);
// This is the case where we have complex regexes
// e.g. \d, \D, \s...etc
while( index != -1 ) {
if( precedingEsc %2 == 0 && (index + 1 ) < pattern.length() ) {
char index_1 = pattern.charAt(index + 1 );
if( index_1 == '1' || index_1 == '2' ||
index_1 == '3' || index_1 == '4' ||
index_1 == '5' || index_1 == '6' ||
index_1 == '7' || index_1 == '8' ||
index_1 == '9' ||
index_1 == 'a' || index_1 == 'e' ||
index_1 == '0' || index_1 == 'x' ||
index_1 == 'u' ||
index_1 == 'c' ||
index_1 == 'Q' ||
index_1 == 'w' || index_1 == 'W' ||
index_1 == 'd' || index_1 == 'D' ||
index_1 == 's' || index_1 == 'S' ||
index_1 == 'p' || index_1 == 'P' ||
index_1 == 'b' || index_1 == 'B' ||
index_1 == 'A' || index_1 == 'G' ||
index_1 == 'z' || index_1 == 'Z'
) {
return 0;
}
}
// We skip past all the escapes
index = index - ( precedingEsc + 1 );
precedingEsc = -1;
if( index >= 0 ){
index = pattern.lastIndexOf('\\',index);
precedingEsc = precedingEscapes(pattern, index);
}
}
}
return 1;
}