private int determineBestRegexMethod()

in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/regex/RegexInit.java [55:159]


    private int determineBestRegexMethod( String pattern ) {

        for( int i = 0; i < javaRegexOnly.length; i++ ) {
            for( int j = pattern.length(); j > 0; ) {
                j = pattern.lastIndexOf(javaRegexOnly[i], j );
                if( j > 0 ) {
                    int precedingEsc = precedingEscapes(pattern, j);
                    if( precedingEsc %2 == 0 ) {
                        return 0;
                    }
                    j = j - precedingEsc;
                } else if ( j == 0 ) {
                    return 0;
                }
            }
        }

        // Determine if there are any complex unions in pattern
        // Complex unions are [a-m[n-z]]
        int index = pattern.indexOf('[');
        if( index >= 0 ) {
            int precedingEsc = precedingEscapes(pattern, index);
            if( index != 0 ) {
                while( precedingEsc %2 == 1 ) {
                    index = pattern.indexOf('[', index + 1);
                    precedingEsc = precedingEscapes(pattern, index);
                }
            }
            int index2 = 0;
            int index3 = 0;
            while( index != -1 && index < pattern.length() ) {
                index2 = pattern.indexOf(']', index );
                if( index2 == -1 ) {
                    break;
                }
                precedingEsc = precedingEscapes(pattern, index2);
                // Find the next ']' which is not '\\]'
                while( precedingEsc %2 == 1 ) {
                    index2 = pattern.indexOf(']', index2 + 1);
                    precedingEsc = precedingEscapes(pattern, index2);
                }                
                if( index2 == -1 ) {
                    break;
                }
                index3 = pattern.indexOf('[', index + 1 );
                precedingEsc = precedingEscapes(pattern, index3);
                if( index3 == -1 ) {
                    break;
                }
                // Find the next '[' which is not '\\['
                while( precedingEsc %2 == 1 ) {
                    index3 = pattern.indexOf('[', index3 + 1);
                    precedingEsc = precedingEscapes(pattern, index3);
                }
                if( index3 == -1 ) {
                    break;
                }
                if( index3 < index2 ) {
                    return 0;
                }
                index = index3;
            }
        }

        index = pattern.lastIndexOf('\\');
        if( index > -1 ) {
            int precedingEsc = precedingEscapes(pattern, index);
            // This is the case where we have complex regexes
            // e.g. \d, \D, \s...etc
            while( index != -1 ) {
                if( precedingEsc %2 == 0 && (index + 1 ) < pattern.length() ) {
                    char index_1 = pattern.charAt(index + 1 );
                    if( index_1 == '1' || index_1 == '2' ||
                            index_1 == '3' || index_1 == '4' ||
                            index_1 == '5' || index_1 == '6' ||
                            index_1 == '7' || index_1 == '8' ||
                            index_1 == '9' ||
                            index_1 == 'a' || index_1 == 'e' ||
                            index_1 == '0' || index_1 == 'x' ||
                            index_1 == 'u' ||
                            index_1 == 'c' ||
                            index_1 == 'Q' ||
                            index_1 == 'w' || index_1 == 'W' ||
                            index_1 == 'd' || index_1 == 'D' ||
                            index_1 == 's' || index_1 == 'S' ||
                            index_1 == 'p' || index_1 == 'P' ||
                            index_1 == 'b' || index_1 == 'B' ||
                            index_1 == 'A' || index_1 == 'G' ||
                            index_1 == 'z' || index_1 == 'Z'   
                    ) {
                        return 0; 
                    }
                }

                // We skip past all the escapes
                index = index - ( precedingEsc + 1 );
                precedingEsc = -1;
                if( index >= 0 ){
                    index = pattern.lastIndexOf('\\',index);
                    precedingEsc = precedingEscapes(pattern, index);
                }
            }
        }
        return 1;
    }