const int FARegexpLexer_char::FindToken()

in blingfirecompile.library/src/FARegexpLexer_char.cpp [49:168]


const int FARegexpLexer_char::FindToken (const int Pos, int * pLength) const
{
    DebugLogAssert (pLength);
    DebugLogAssert (0 <= Pos && Pos < m_Length);

    int length = 1;

    // get next symbol
    char c = m_pRegexp [Pos];

    // see whether it is a special symbol
    if (IsSpecial (c) && !IsEscaped (Pos)) {

        // get char type
        const int Type = GetType (c);

        // if it is LRB then int should follow, e.g. <7
        if (FARegexpTree::TYPE_LTRBR == Type && Pos + length != m_Length) {

            c = m_pRegexp [Pos + length];
            while (isdigit ((unsigned char) c)) {
                length++;
                if (Pos + length == m_Length) {
                    FASyntaxError (m_pRegexp, m_Length, Pos, \
                        "Wrong place for extracting bracket.");
                    throw FAException (FAMsg::SyntaxError, __FILE__, __LINE__);
                }
                c = m_pRegexp [Pos + length];
            }
            // skip the following space
            if (IsSpace (c)) {
                length++;
            }
        }

        *pLength = length;
        return Type;

    // process \xHEX labels
    } else if (IsEscaped (Pos) && ('x' == c || 'X' == c) ) {

        c = m_pRegexp [Pos + length];
        while (Pos + length < m_Length && IsHex (c)) {
            length++;
            c = m_pRegexp [Pos + length];
        }
        // skip the following space
        if (IsSpace (c)) {
            length++;
        }

        *pLength = length;
        return FARegexpTree::TYPE_SYMBOL;

    // process ranges [^[:alnum:]Aa0-9\xfff0-\xffff-] as a single label
    } else if (!IsEscaped (Pos) && '[' == c) {

        bool in_spec_range = false;

        c = m_pRegexp [Pos + length];

        while ((Pos + length < m_Length) && \
            (']' != c || IsEscaped (Pos + length) || in_spec_range)) {

            // the beginning of chracter class [:
            if (Pos + length < m_Length && '[' == c && \
                !in_spec_range && ':' == m_pRegexp [Pos + length + 1]) {
                length++;
                in_spec_range = true;
            // the end of the chracter class [:
            } else if (Pos + length < m_Length && ':' == c && \
                in_spec_range && ']' == m_pRegexp [Pos + length + 1]) {
                length++;
                in_spec_range = false;
            // withing the chracter class
            } else if (in_spec_range && ! isalpha ((unsigned char) c)) {
                FASyntaxError (m_pRegexp, m_Length, Pos, \
                    "Bad character class name.");
                throw FAException (FAMsg::SyntaxError, __FILE__, __LINE__);
            }

            length++;
            c = m_pRegexp [Pos + length];

        }
        // did not find the closing ]
        if (Pos + length == m_Length) {
            FASyntaxError (m_pRegexp, m_Length, Pos, \
                "Missing closing bracket for the character range.");
            throw FAException (FAMsg::SyntaxError, __FILE__, __LINE__);
        }

        *pLength = length + 1;
        return FARegexpTree::TYPE_SYMBOL;

    } else if (IsSpace (c) && !IsEscaped (Pos)) {

        // ignore
        return -1;

    } else {

        if (m_UseUtf8) {

            *pLength = FAUtf8Size (m_pRegexp + Pos);

            if (0 >= *pLength) {
                FASyntaxError (m_pRegexp, m_Length, Pos, \
                    "Bad UTF-8 character.");
                throw FAException (FAMsg::SyntaxError, __FILE__, __LINE__);
            }

        } else {

            *pLength = 1;
        }

        return FARegexpTree::TYPE_SYMBOL;
    }
}