void LongestMatch::runLongestMatch()

in ragel/src/ragel/parsetree.cpp [299:480]


void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
{
	graph->markReachableFromHereStopFinal( graph->startState );
	for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
		if ( ms->stateBits & STB_ISMARKED ) {
			ms->lmItemSet.insert( 0 );
			ms->stateBits &= ~ STB_ISMARKED;
		}
	}

	/* Transfer the first item of non-empty lmAction tables to the item sets
	 * of the states that follow. Exclude states that have no transitions out.
	 * This must happen on a separate pass so that on each iteration of the
	 * next pass we have the item set entries from all lmAction tables. */
	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
			if ( trans->lmActionTable.length() > 0 ) {
				LmActionTableEl *lmAct = trans->lmActionTable.data;
				StateAp *toState = trans->toState;
				assert( toState );

				/* Can only optimize this if there are no transitions out.
				 * Note there can be out transitions going nowhere with
				 * actions and they too must inhibit this optimization. */
				if ( toState->outList.length() > 0 ) {
					/* Fill the item sets. */
					graph->markReachableFromHereStopFinal( toState );
					for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
						if ( ms->stateBits & STB_ISMARKED ) {
							ms->lmItemSet.insert( lmAct->value );
							ms->stateBits &= ~ STB_ISMARKED;
						}
					}
				}
			}
		}
	}

	/* The lmItem sets are now filled, telling us which longest match rules
	 * can succeed in which states. First determine if we need to make sure
	 * act is defaulted to zero. We need to do this if there are any states
	 * with lmItemSet.length() > 1 and NULL is included. That is, that the
	 * switch may get called when in fact nothing has been matched. */
	int maxItemSetLength = 0;
	graph->markReachableFromHereStopFinal( graph->startState );
	for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
		if ( ms->stateBits & STB_ISMARKED ) {
			if ( ms->lmItemSet.length() > maxItemSetLength )
				maxItemSetLength = ms->lmItemSet.length();
			ms->stateBits &= ~ STB_ISMARKED;
		}
	}

	/* The actions executed on starting to match a token. */
	graph->isolateStartState();
	graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
	graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
	if ( maxItemSetLength > 1 ) {
		/* The longest match action switch may be called when tokens are
		 * matched, in which case act must be initialized, there must be a
		 * case to handle the error, and the generated machine will require an
		 * error state. */
		lmSwitchHandlesError = true;
		pd->lmRequiresErrorState = true;
		graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
	}

	/* The place to store transitions to restart. It maybe possible for the
	 * restarting to affect the searching through the graph that follows. For
	 * now take the safe route and save the list of transitions to restart
	 * until after all searching is done. */
	Vector<TransAp*> restartTrans;

	/* Set actions that do immediate token recognition, set the longest match part
	 * id and set the token ending. */
	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
			if ( trans->lmActionTable.length() > 0 ) {
				LmActionTableEl *lmAct = trans->lmActionTable.data;
				StateAp *toState = trans->toState;
				assert( toState );

				/* Can only optimize this if there are no transitions out.
				 * Note there can be out transitions going nowhere with
				 * actions and they too must inhibit this optimization. */
				if ( toState->outList.length() == 0 ) {
					/* Can execute the immediate action for the longest match
					 * part. Redirect the action to the start state.
					 *
					 * NOTE: When we need to inhibit on_last due to leaving
					 * actions the above test suffices. If the state has out
					 * actions then it will fail because the out action will
					 * have been transferred to an error transition, which
					 * makes the outlist non-empty. */
					trans->actionTable.setAction( lmAct->key, 
							lmAct->value->actOnLast );
					restartTrans.append( trans );
				}
				else {
					/* Look for non final states that have a non-empty item
					 * set. If these are present then we need to record the
					 * end of the token.  Also Find the highest item set
					 * length reachable from here (excluding at transtions to
					 * final states). */
					bool nonFinalNonEmptyItemSet = false;
					maxItemSetLength = 0;
					graph->markReachableFromHereStopFinal( toState );
					for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
						if ( ms->stateBits & STB_ISMARKED ) {
							if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
								nonFinalNonEmptyItemSet = true;
							if ( ms->lmItemSet.length() > maxItemSetLength )
								maxItemSetLength = ms->lmItemSet.length();
							ms->stateBits &= ~ STB_ISMARKED;
						}
					}

					/* If there are reachable states that are not final and
					 * have non empty item sets or that have an item set
					 * length greater than one then we need to set tokend
					 * because the error action that matches the token will
					 * require it. */
					if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
						trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );

					/* Some states may not know which longest match item to
					 * execute, must set it. */
					if ( maxItemSetLength > 1 ) {
						/* There are transitions out, another match may come. */
						trans->actionTable.setAction( lmAct->key, 
								lmAct->value->setActId );
					}
				}
			}
		}
	}

	/* Now that all graph searching is done it certainly safe set the
	 * restarting. It may be safe above, however this must be verified. */
	for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
		restart( graph, *pt );

	int lmErrActionOrd = pd->curActionOrd++;

	/* Embed the error for recognizing a char. */
	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
		if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
			if ( st->isFinState() ) {
				/* On error execute the onActNext action, which knows that
				 * the last character of the token was one back and restart. */
				graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
						&st->lmItemSet[0]->actOnNext, 1 );
				st->eofActionTable.setAction( lmErrActionOrd, 
						st->lmItemSet[0]->actOnNext );
				st->eofTarget = graph->startState;
			}
			else {
				graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
						&st->lmItemSet[0]->actLagBehind, 1 );
				st->eofActionTable.setAction( lmErrActionOrd, 
						st->lmItemSet[0]->actLagBehind );
				st->eofTarget = graph->startState;
			}
		}
		else if ( st->lmItemSet.length() > 1 ) {
			/* Need to use the select. Take note of which items the select
			 * is needed for so only the necessary actions are included. */
			for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
				if ( *plmi != 0 )
					(*plmi)->inLmSelect = true;
			}
			/* On error, execute the action select and go to the start state. */
			graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
					&lmActSelect, 1 );
			st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
			st->eofTarget = graph->startState;
		}
	}
	
	/* Finally, the start state should be made final. */
	graph->setFinState( graph->startState );
}