size_t CRegex::literalCount()

in lib/core/CRegex.cc [248:363]


size_t CRegex::literalCount() const {
    if (!m_Initialised) {
        LOG_ERROR(<< "Regex not initialised");
        return 0;
    }

    // This is only approximate at the moment - there will be cases it gets
    // things wrong - good enough for now, but may need improving in the future
    // depending on what it's used for

    size_t count(0);

    std::string regexStr(m_Regex.str());

    bool inSubMatch(false);
    size_t squareBracketCount(0);
    size_t braceCount(0);
    size_t subCount(0);
    size_t minSubCount(std::numeric_limits<size_t>::max());

    for (std::string::iterator iter = regexStr.begin(); iter != regexStr.end(); ++iter) {
        char thisChar(*iter);

        switch (thisChar) {
        case '$':
            // Perl can expand variables, so should really skip over
            // variable names at this point
            break;
        case '.':
        case '^':
        case '*':
        case '+':
        case '?':
            break;
        case '\\':
            ++iter;
            if (iter == regexStr.end()) {
                LOG_ERROR(<< "Inconsistency - backslash at the end of regex");
                return count;
            }
            thisChar = *iter;
            if (thisChar != 'd' && thisChar != 's' && thisChar != 'w' &&
                thisChar != 'D' && thisChar != 'S' && thisChar != 'W' &&
                (thisChar < '0' || thisChar > '9')) {
                if (squareBracketCount == 0 && braceCount == 0) {
                    std::string::iterator nextIter(iter + 1);
                    if (nextIter == regexStr.end() ||
                        (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) {
                        if (inSubMatch) {
                            ++subCount;
                        } else {
                            ++count;
                        }
                    }
                }
            }
            break;
        case '[':
            ++squareBracketCount;
            break;
        case ']':
            if (squareBracketCount == 0) {
                LOG_ERROR(<< "Inconsistency - more ] than [");
            } else {
                --squareBracketCount;
            }
            break;
        case '{':
            ++braceCount;
            break;
        case '}':
            if (braceCount == 0) {
                LOG_ERROR(<< "Inconsistency - more } than {");
            } else {
                --braceCount;
            }
            break;
        case '|':
            if (inSubMatch) {
                if (subCount < minSubCount) {
                    minSubCount = subCount;
                }
                subCount = 0;
            } else {
            }
            break;
        case '(':
            inSubMatch = true;
            break;
        case ')':
            inSubMatch = false;
            if (subCount < minSubCount) {
                minSubCount = subCount;
            }
            count += minSubCount;
            subCount = 0;
            minSubCount = std::numeric_limits<size_t>::max();
            break;
        default:
            if (squareBracketCount == 0 && braceCount == 0) {
                std::string::iterator nextIter(iter + 1);
                if (nextIter == regexStr.end() ||
                    (*nextIter != '*' && *nextIter != '+' && *nextIter != '?')) {
                    if (inSubMatch) {
                        ++subCount;
                    } else {
                        ++count;
                    }
                }
            }
            break;
        }
    }

    return count;
}