in ragel/src/ragel/parsetree.cpp [299:480]
void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
{
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( 0 );
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* Transfer the first item of non-empty lmAction tables to the item sets
* of the states that follow. Exclude states that have no transitions out.
* This must happen on a separate pass so that on each iteration of the
* next pass we have the item set entries from all lmAction tables. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if ( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert( toState );
/* Can only optimize this if there are no transitions out.
* Note there can be out transitions going nowhere with
* actions and they too must inhibit this optimization. */
if ( toState->outList.length() > 0 ) {
/* Fill the item sets. */
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( lmAct->value );
ms->stateBits &= ~ STB_ISMARKED;
}
}
}
}
}
}
/* The lmItem sets are now filled, telling us which longest match rules
* can succeed in which states. First determine if we need to make sure
* act is defaulted to zero. We need to do this if there are any states
* with lmItemSet.length() > 1 and NULL is included. That is, that the
* switch may get called when in fact nothing has been matched. */
int maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* The actions executed on starting to match a token. */
graph->isolateStartState();
graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
if ( maxItemSetLength > 1 ) {
/* The longest match action switch may be called when tokens are
* matched, in which case act must be initialized, there must be a
* case to handle the error, and the generated machine will require an
* error state. */
lmSwitchHandlesError = true;
pd->lmRequiresErrorState = true;
graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
}
/* The place to store transitions to restart. It maybe possible for the
* restarting to affect the searching through the graph that follows. For
* now take the safe route and save the list of transitions to restart
* until after all searching is done. */
Vector<TransAp*> restartTrans;
/* Set actions that do immediate token recognition, set the longest match part
* id and set the token ending. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if ( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert( toState );
/* Can only optimize this if there are no transitions out.
* Note there can be out transitions going nowhere with
* actions and they too must inhibit this optimization. */
if ( toState->outList.length() == 0 ) {
/* Can execute the immediate action for the longest match
* part. Redirect the action to the start state.
*
* NOTE: When we need to inhibit on_last due to leaving
* actions the above test suffices. If the state has out
* actions then it will fail because the out action will
* have been transferred to an error transition, which
* makes the outlist non-empty. */
trans->actionTable.setAction( lmAct->key,
lmAct->value->actOnLast );
restartTrans.append( trans );
}
else {
/* Look for non final states that have a non-empty item
* set. If these are present then we need to record the
* end of the token. Also Find the highest item set
* length reachable from here (excluding at transtions to
* final states). */
bool nonFinalNonEmptyItemSet = false;
maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
nonFinalNonEmptyItemSet = true;
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* If there are reachable states that are not final and
* have non empty item sets or that have an item set
* length greater than one then we need to set tokend
* because the error action that matches the token will
* require it. */
if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
/* Some states may not know which longest match item to
* execute, must set it. */
if ( maxItemSetLength > 1 ) {
/* There are transitions out, another match may come. */
trans->actionTable.setAction( lmAct->key,
lmAct->value->setActId );
}
}
}
}
}
/* Now that all graph searching is done it certainly safe set the
* restarting. It may be safe above, however this must be verified. */
for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
restart( graph, *pt );
int lmErrActionOrd = pd->curActionOrd++;
/* Embed the error for recognizing a char. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
if ( st->isFinState() ) {
/* On error execute the onActNext action, which knows that
* the last character of the token was one back and restart. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actOnNext, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actOnNext );
st->eofTarget = graph->startState;
}
else {
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actLagBehind, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actLagBehind );
st->eofTarget = graph->startState;
}
}
else if ( st->lmItemSet.length() > 1 ) {
/* Need to use the select. Take note of which items the select
* is needed for so only the necessary actions are included. */
for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
if ( *plmi != 0 )
(*plmi)->inLmSelect = true;
}
/* On error, execute the action select and go to the start state. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&lmActSelect, 1 );
st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
st->eofTarget = graph->startState;
}
}
/* Finally, the start state should be made final. */
graph->setFinState( graph->startState );
}