int RegularExpression::match()

in src/xercesc/util/regx/RegularExpression.cpp [995:1205]


int RegularExpression::match(Context* const context, const Op* const operations,
                             XMLSize_t offset) const
{
    ValueStackOf<RE_RuntimeContext>* opStack=NULL;
    Janitor<ValueStackOf<RE_RuntimeContext> > janStack(NULL);
    if(context->fLimit > 256)
    {
        opStack=new ValueStackOf<RE_RuntimeContext>(16, context->fMemoryManager);
        janStack.reset(opStack);
    }
    const Op* tmpOp = operations;
    bool ignoreCase = isSet(context->fOptions, IGNORE_CASE);
    int doReturn;

    while (tmpOp != 0) {
        // no one wants to return -5, only -1, 0, and greater
        doReturn = -5;

        if (offset > context->fLimit || offset < context->fStart)
            doReturn = -1;
        else
        {
            switch(tmpOp->getOpType()) {
                case Op::O_CHAR:
                    if (!matchChar(context, tmpOp->getData(), offset, ignoreCase))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_DOT:
                    if (!matchDot(context, offset))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_RANGE:
                case Op::O_NRANGE:
                    if (!matchRange(context, tmpOp, offset, ignoreCase))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_ANCHOR:
                    if (!matchAnchor(context, tmpOp->getData(), offset))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_BACKREFERENCE:
                    if (!matchBackReference(context, tmpOp->getData(), offset,
                                            ignoreCase))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_STRING:
                    if (!matchString(context, tmpOp->getLiteral(), offset, ignoreCase))
                        doReturn = -1;
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
                case Op::O_FINITE_CLOSURE:
                {
                    XMLInt32 id = tmpOp->getData();
                    // if id is not -1, it's a closure with a child token having a minumum length,
                    // where id is the index of the fOffsets array where its status is stored
                    if (id >= 0) {
                        int prevOffset = context->fOffsets[id];
                        if (prevOffset < 0 || prevOffset != (int)offset) {
                            context->fOffsets[id] = (int)offset;
                        }
                        else {
                            // the status didn't change, we haven't found other copies; move on to the next match
                            context->fOffsets[id] = -1;
                            tmpOp = tmpOp->getNextOp();
                            break;
                        }
                    }

                    // match the subitems until they do
                    int ret;
                    while((ret = match(context, tmpOp->getChild(), offset)) != -1)
                    {
                        if(offset == (XMLSize_t)ret)
                            break;
                        offset = ret;
                    }

                    if (id >= 0) {
                        // loop has ended, reset the status for this closure
                        context->fOffsets[id] = -1;
                    }
                    tmpOp = tmpOp->getNextOp();
                }
                break;
                case Op::O_FINITE_NONGREEDYCLOSURE:
                {
                    int ret = match(context,tmpOp->getNextOp(),offset);
                    if (ret >= 0)
                        doReturn = ret;
                    else
                    {
                        // match the subitems until they do
                        int ret;
                        while((ret = match(context, tmpOp->getChild(), offset)) != -1)
                        {
                            if(offset == (XMLSize_t)ret)
                                break;
                            offset = ret;
                        }
                        tmpOp = tmpOp->getNextOp();
                    }
                }
                break;
                case Op::O_CLOSURE:
                {
                    XMLInt32 id = tmpOp->getData();
                    // if id is not -1, it's a closure with a child token having a minumum length,
                    // where id is the index of the fOffsets array where its status is stored
                    if (id >= 0) {
                        int prevOffset = context->fOffsets[id];
                        if (prevOffset < 0 || prevOffset != (int)offset) {
                            context->fOffsets[id] = (int)offset;
                        }
                        else {
                            // the status didn't change, we haven't found other copies; move on to the next match
                            context->fOffsets[id] = -1;
                            tmpOp = tmpOp->getNextOp();
                            break;
                        }
                    }

                    if(opStack!=NULL)
                    {
                        opStack->push(RE_RuntimeContext(tmpOp, offset));
                        tmpOp = tmpOp->getChild();
                    }
                    else
                    {
                        int ret = match(context, tmpOp->getChild(), offset);
                        if (id >= 0) {
                            context->fOffsets[id] = -1;
                        }
                        if (ret >= 0)
                            doReturn = ret;
                        else
                            tmpOp = tmpOp->getNextOp();
                    }
                }
                break;
                case Op::O_QUESTION:
                {
                    if(opStack!=NULL)
                    {
                        opStack->push(RE_RuntimeContext(tmpOp, offset));
                        tmpOp = tmpOp->getChild();
                    }
                    else
                    {
                        int ret = match(context, tmpOp->getChild(), offset);
                        if (ret >= 0)
                            doReturn = ret;
                        else
                            tmpOp = tmpOp->getNextOp();
                    }
                }
                break;
                case Op::O_NONGREEDYCLOSURE:
                case Op::O_NONGREEDYQUESTION:
                {
                    int ret = match(context,tmpOp->getNextOp(),offset);
                    if (ret >= 0)
                        doReturn = ret;
                    else
                        tmpOp = tmpOp->getChild();
                }
                break;
                case Op::O_UNION:
                    doReturn = matchUnion(context, tmpOp, offset);
                    break;
                case Op::O_CAPTURE:
                    if (context->fMatch != 0 && tmpOp->getData() != 0)
                        doReturn = matchCapture(context, tmpOp, offset);
                    else
                        tmpOp = tmpOp->getNextOp();
                    break;
            }
        }
        if (doReturn != -5) {
            if (opStack==NULL || opStack->size() == 0)
                return doReturn;
            RE_RuntimeContext ctx = opStack->pop();
            tmpOp = ctx.op_;
            offset = ctx.offs_;
            if (tmpOp->getOpType() == Op::O_CLOSURE) {
                XMLInt32 id = tmpOp->getData();
                if (id >= 0) {
                    // loop has ended, reset the status for this closure
                    context->fOffsets[id] = -1;
                }
            }
            if (tmpOp->getOpType() == Op::O_CLOSURE || tmpOp->getOpType() == Op::O_QUESTION) {
                if (doReturn >= 0)
                    return doReturn;
            }
            tmpOp = tmpOp->getNextOp();
        }
    }

    return (int)offset;
}