in src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs [748:1081]
public int GetNextToken()
{
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char[] zzBufferL = zzBuffer;
char[] zzCMapL = ZZ_CMAP;
int[] zzTransL = ZZ_TRANS;
int[] zzRowMapL = ZZ_ROWMAP;
int[] zzAttrL = ZZ_ATTRIBUTE;
while (true)
{
zzMarkedPosL = zzMarkedPos;
yychar += zzMarkedPosL - zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ((zzAttributes & 1) == 1)
{
zzAction = zzState;
}
while (true)
{
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF)
{
zzInput = YYEOF;
goto zzForActionBreak;
}
else
{
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
bool eof = ZzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof)
{
zzInput = YYEOF;
goto zzForActionBreak;
}
else
{
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
if (zzNext == -1) goto zzForActionBreak;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ((zzAttributes & 1) == 1)
{
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ((zzAttributes & 8) == 8) goto zzForActionBreak;
}
}
zzForActionBreak:
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction])
{
case 1:
{
numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
case 47: break;
case 2:
{
positionInc = 1; return ALPHANUM;
}
case 48: break;
case 3:
{
positionInc = 1; return CJ;
}
case 49: break;
case 4:
{
numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; YyBegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 50: break;
case 5:
{
positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
case 51: break;
case 6:
{
YyBegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
}
case 52: break;
case 7:
{
YyBegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
}
case 53: break;
case 8:
{ /* Break so we don't hit fall-through warning: */
break;/* ignore */
}
case 54: break;
case 9:
{
if (numLinkToks == 0) { positionInc = 0; } else { positionInc = 1; }
numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; YyBegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 55: break;
case 10:
{
numLinkToks = 0; positionInc = 0; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 56: break;
case 11:
{
currentTokType = BOLD; YyBegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 57: break;
case 12:
{
currentTokType = ITALICS; numWikiTokensSeen++; YyBegin(STRING); return currentTokType;/*italics*/
}
case 58: break;
case 13:
{
currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; YyBegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 59: break;
case 14:
{
YyBegin(STRING); numWikiTokensSeen++; return currentTokType;
}
case 60: break;
case 15:
{
currentTokType = SUB_HEADING; numWikiTokensSeen = 0; YyBegin(STRING); /* Break so we don't hit fall-through warning: */ break;
}
case 61: break;
case 16:
{
currentTokType = HEADING; YyBegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
}
case 62: break;
case 17:
{
YyBegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
}
case 63: break;
case 18:
{ /* Break so we don't hit fall-through warning: */
break;/* ignore STRING */
}
case 64: break;
case 19:
{
YyBegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
}
case 65: break;
case 20:
{
numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK; YyBegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 66: break;
case 21:
{
YyBegin(STRING); return currentTokType;/*pipe*/
}
case 67: break;
case 22:
{
numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0) { numBalanced++; YyBegin(TWO_SINGLE_QUOTES_STATE); } else { numBalanced = 0; }/* Break so we don't hit fall-through warning: */
break;
}
case 68: break;
case 23:
{
numWikiTokensSeen = 0; positionInc = 1; YyBegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 69: break;
case 24:
{
numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; YyBegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 70: break;
case 25:
{
numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; YyBegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 71: break;
case 26:
{
YyBegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
}
case 72: break;
case 27:
{
numLinkToks = 0; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 73: break;
case 28:
{
currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; YyBegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 74: break;
case 29:
{
currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; YyBegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 75: break;
case 30:
{
YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 76: break;
case 31:
{
numBalanced = 0; currentTokType = ALPHANUM; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
}
case 77: break;
case 32:
{
numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK; YyBegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 78: break;
case 33:
{
positionInc = 1; return APOSTROPHE;
}
case 79: break;
case 34:
{
positionInc = 1; return HOST;
}
case 80: break;
case 35:
{
positionInc = 1; return NUM;
}
case 81: break;
case 36:
{
positionInc = 1; return COMPANY;
}
case 82: break;
case 37:
{
currentTokType = BOLD_ITALICS; YyBegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 83: break;
case 38:
{
numBalanced = 0; currentTokType = ALPHANUM; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
}
case 84: break;
case 39:
{
numBalanced = 0; currentTokType = ALPHANUM; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
}
case 85: break;
case 40:
{
positionInc = 1; return ACRONYM;
}
case 86: break;
case 41:
{
positionInc = 1; return EMAIL;
}
case 87: break;
case 42:
{
numBalanced = 0; currentTokType = ALPHANUM; YyBegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
}
case 88: break;
case 43:
{
positionInc = 1; numWikiTokensSeen++; YyBegin(EXTERNAL_LINK_STATE); return currentTokType;
}
case 89: break;
case 44:
{
numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; YyBegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 90: break;
case 45:
{
currentTokType = CATEGORY; numWikiTokensSeen = 0; YyBegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 91: break;
case 46:
{
numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY; YyBegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 92: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
{
zzAtEOF = true;
return YYEOF;
}
else
{
ZzScanError(ZZ_NO_MATCH);
}
break;
}
}
}