in src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs [386:1398]
protected void EmitTryFindNextPossibleStartingPosition()
{
Debug.Assert(_regexTree != null);
_int32LocalsPool?.Clear();
_readOnlySpanCharLocalsPool?.Clear();
LocalBuilder inputSpan = DeclareReadOnlySpanChar();
LocalBuilder pos = DeclareInt32();
bool rtl = (_options & RegexOptions.RightToLeft) != 0;
// Load necessary locals
// int pos = base.runtextpos;
// ReadOnlySpan<char> inputSpan = dynamicMethodArg; // TODO: We can reference the arg directly rather than using another local.
Mvfldloc(s_runtextposField, pos);
Ldarg_1();
Stloc(inputSpan);
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
// It's rare for min required length to be 0, so we don't bother special-casing the check,
// especially since we want the "return false" code regardless. This differs from the source
// generator, where the return false is emitted at the end of the find method, and thus
// avoids the branch for the 0 case.
int minRequiredLength = _regexTree.FindOptimizations.MinRequiredLength;
Debug.Assert(minRequiredLength >= 0);
Label returnFalse = DefineLabel();
Label finishedLengthCheck = DefineLabel();
// if (pos > inputSpan.Length - minRequiredLength) // or pos < minRequiredLength for rtl
// {
// base.runtextpos = inputSpan.Length; // or 0 for rtl
// return false;
// }
Ldloc(pos);
if (!rtl)
{
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
if (minRequiredLength > 0)
{
Ldc(minRequiredLength);
Sub();
}
Ble(finishedLengthCheck);
}
else
{
Ldc(minRequiredLength);
Bge(finishedLengthCheck);
}
MarkLabel(returnFalse);
Ldthis();
if (!rtl)
{
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
}
else
{
Ldc(0);
}
Stfld(s_runtextposField);
Ldc(0);
Ret();
MarkLabel(finishedLengthCheck);
// Emit any anchors.
if (EmitAnchors())
{
return;
}
// Either anchors weren't specified, or they don't completely root all matches to a specific location.
switch (_regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingString_LeftToRight:
case FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
case FindNextStartingPositionMode.FixedDistanceString_LeftToRight:
EmitIndexOfString_LeftToRight();
break;
case FindNextStartingPositionMode.LeadingString_RightToLeft:
EmitIndexOf_RightToLeft();
break;
case FindNextStartingPositionMode.LeadingSet_LeftToRight:
case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
EmitFixedSet_LeftToRight();
break;
case FindNextStartingPositionMode.LeadingSet_RightToLeft:
EmitFixedSet_RightToLeft();
break;
case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight:
EmitLiteralAfterAtomicLoop();
break;
default:
Debug.Fail($"Unexpected mode: {_regexTree.FindOptimizations.FindMode}");
goto case FindNextStartingPositionMode.NoSearch;
case FindNextStartingPositionMode.NoSearch:
// return true;
Ldc(1);
Ret();
break;
}
// Emits any anchors. Returns true if the anchor roots any match to a specific location and thus no further
// searching is required; otherwise, false.
bool EmitAnchors()
{
Label label;
// Anchors that fully implement TryFindNextPossibleStartingPosition, with a check that leads to immediate success or failure determination.
switch (_regexTree.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
// if (pos != 0) goto returnFalse;
// return true;
Ldloc(pos);
Ldc(0);
Bne(returnFalse);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Start:
case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start:
// if (pos != base.runtextstart) goto returnFalse;
// return true;
Ldloc(pos);
Ldthisfld(s_runtextstartField);
Bne(returnFalse);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_EndZ:
// if (pos < inputSpan.Length - 1) base.runtextpos = inputSpan.Length - 1;
// return true;
label = DefineLabel();
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(1);
Sub();
Bge(label);
Ldthis();
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(1);
Sub();
Stfld(s_runtextposField);
MarkLabel(label);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_End:
// if (pos < inputSpan.Length) base.runtextpos = inputSpan.Length;
// return true;
label = DefineLabel();
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Bge(label);
Ldthis();
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Stfld(s_runtextposField);
MarkLabel(label);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning:
// if (pos != 0) base.runtextpos = 0;
// return true;
label = DefineLabel();
Ldloc(pos);
Ldc(0);
Beq(label);
Ldthis();
Ldc(0);
Stfld(s_runtextposField);
MarkLabel(label);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_EndZ:
// if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n') goto returnFalse;
// return true;
label = DefineLabel();
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(1);
Sub();
Blt(returnFalse);
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
BgeUn(label);
Ldloca(inputSpan);
Ldloc(pos);
Call(s_spanGetItemMethod);
LdindU2();
Ldc('\n');
Bne(returnFalse);
MarkLabel(label);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_End:
// if (pos < inputSpan.Length) goto returnFalse;
// return true;
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Blt(returnFalse);
Ldc(1);
Ret();
return true;
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_End:
case FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ:
// Jump to the end, minus the min required length, which in this case is actually the fixed length.
{
int extraNewlineBump = _regexTree.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
label = DefineLabel();
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(_regexTree.FindOptimizations.MinRequiredLength + extraNewlineBump);
Sub();
Bge(label);
Ldthis();
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(_regexTree.FindOptimizations.MinRequiredLength + extraNewlineBump);
Sub();
Stfld(s_runtextposField);
MarkLabel(label);
Ldc(1);
Ret();
return true;
}
}
// Now handle anchors that boost the position but don't determine immediate success or failure.
if (!rtl) // we haven't done the work to validate these optimizations for RightToLeft
{
switch (_regexTree.FindOptimizations.LeadingAnchor)
{
case RegexNodeKind.Bol:
{
// Optimize the handling of a Beginning-Of-Line (BOL) anchor. BOL is special, in that unlike
// other anchors like Beginning, there are potentially multiple places a BOL can match. So unlike
// the other anchors, which all skip all subsequent processing if found, with BOL we just use it
// to boost our position to the next line, and then continue normally with any prefix or char class searches.
label = DefineLabel();
// if (pos > 0...
Ldloc(pos!);
Ldc(0);
Ble(label);
// ... && inputSpan[pos - 1] != '\n') { ... }
Ldloca(inputSpan);
Ldloc(pos);
Ldc(1);
Sub();
Call(s_spanGetItemMethod);
LdindU2();
Ldc('\n');
Beq(label);
// int tmp = inputSpan.Slice(pos).IndexOf('\n');
Ldloca(inputSpan);
Ldloc(pos);
Call(s_spanSliceIntMethod);
Ldc('\n');
Call(s_spanIndexOfChar);
using (RentedLocalBuilder newlinePos = RentInt32Local())
{
Stloc(newlinePos);
// if (newlinePos < 0 || newlinePos + pos + 1 > inputSpan.Length)
// {
// base.runtextpos = inputSpan.Length;
// return false;
// }
Ldloc(newlinePos);
Ldc(0);
Blt(returnFalse);
Ldloc(newlinePos);
Ldloc(pos);
Add();
Ldc(1);
Add();
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Bgt(returnFalse);
// pos += newlinePos + 1;
Ldloc(pos);
Ldloc(newlinePos);
Add();
Ldc(1);
Add();
Stloc(pos);
// We've updated the position. Make sure there's still enough room in the input for a possible match.
// if (pos > inputSpan.Length - minRequiredLength) returnFalse;
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
if (minRequiredLength != 0)
{
Ldc(minRequiredLength);
Sub();
}
Ldloc(pos);
BltFar(returnFalse);
}
MarkLabel(label);
}
break;
}
switch (_regexTree.FindOptimizations.TrailingAnchor)
{
case RegexNodeKind.End or RegexNodeKind.EndZ when _regexTree.FindOptimizations.MaxPossibleLength is int maxLength:
// Jump to the end, minus the max allowed length.
{
int extraNewlineBump = _regexTree.FindOptimizations.FindMode == FindNextStartingPositionMode.TrailingAnchor_FixedLength_LeftToRight_EndZ ? 1 : 0;
label = DefineLabel();
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(maxLength + extraNewlineBump);
Sub();
Bge(label);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
Ldc(maxLength + extraNewlineBump);
Sub();
Stloc(pos);
MarkLabel(label);
break;
}
}
}
return false;
}
// Emits a case-sensitive left-to-right search for a substring or substrings.
void EmitIndexOfString_LeftToRight()
{
RegexFindOptimizations opts = _regexTree.FindOptimizations;
Debug.Assert(opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or
FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight or
FindNextStartingPositionMode.FixedDistanceString_LeftToRight or
FindNextStartingPositionMode.LeadingStrings_LeftToRight or
FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight);
using RentedLocalBuilder i = RentInt32Local();
// int i = inputSpan.Slice(pos)...
Ldloca(inputSpan);
Ldloc(pos);
if (opts.FindMode is FindNextStartingPositionMode.FixedDistanceString_LeftToRight &&
opts.FixedDistanceLiteral is { Distance: > 0 } literal)
{
Ldc(literal.Distance);
Add();
}
Call(s_spanSliceIntMethod);
// ...IndexOf(prefix);
if (opts.FindMode is FindNextStartingPositionMode.LeadingStrings_LeftToRight or FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight)
{
LoadSearchValues(opts.LeadingPrefixes, opts.FindMode is FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
Call(s_spanIndexOfAnySearchValuesString);
}
else
{
string literalString = opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ?
opts.LeadingPrefix :
opts.FixedDistanceLiteral.String!;
LoadSearchValues([literalString], opts.FindMode is FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
Call(s_spanIndexOfAnySearchValuesString);
}
Stloc(i);
// if (i < 0) goto ReturnFalse;
Ldloc(i);
Ldc(0);
BltFar(returnFalse);
// base.runtextpos = pos + i;
// return true;
Ldthis();
Ldloc(pos);
Ldloc(i);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
}
// Emits a case-sensitive right-to-left search for a substring.
void EmitIndexOf_RightToLeft()
{
string prefix = _regexTree.FindOptimizations.LeadingPrefix;
Debug.Assert(!string.IsNullOrEmpty(prefix));
// pos = inputSpan.Slice(0, pos).LastIndexOf(prefix);
Ldloca(inputSpan);
Ldc(0);
Ldloc(pos);
Call(s_spanSliceIntIntMethod);
Ldstr(prefix);
Call(s_stringAsSpanMethod);
Call(s_spanLastIndexOfSpan);
Stloc(pos);
// if (pos < 0) goto ReturnFalse;
Ldloc(pos);
Ldc(0);
BltFar(returnFalse);
// base.runtextpos = pos + prefix.Length;
// return true;
Ldthis();
Ldloc(pos);
Ldc(prefix.Length);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
}
// Emits a search for a set at a fixed position from the start of the pattern,
// and potentially other sets at other fixed positions in the pattern.
void EmitFixedSet_LeftToRight()
{
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
List<RegexFindOptimizations.FixedDistanceSet>? sets = _regexTree.FindOptimizations.FixedDistanceSets;
RegexFindOptimizations.FixedDistanceSet primarySet = sets![0];
const int MaxSets = 4;
int setsToUse = Math.Min(sets.Count, MaxSets);
using RentedLocalBuilder iLocal = RentInt32Local();
using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal();
// ReadOnlySpan<char> span = inputSpan.Slice(pos);
Ldloca(inputSpan);
Ldloc(pos);
Call(s_spanSliceIntMethod);
Stloc(textSpanLocal);
// Use IndexOf{Any} to accelerate the skip loop via vectorization to match the first prefix.
// But we avoid using it for the relatively common case of the starting set being '.', aka anything other than
// a newline, as it's very rare to have long, uninterrupted sequences of newlines. And we avoid using it
// for the case of the starting set being anything (e.g. '.' with SingleLine), as in that case it'll always match
// the first char.
int setIndex = 0;
bool canUseIndexOf =
primarySet.Set != RegexCharClass.NotNewLineClass &&
primarySet.Set != RegexCharClass.AnyClass;
bool needLoop = !canUseIndexOf || setsToUse > 1;
Label checkSpanLengthLabel = default;
Label charNotInClassLabel = default;
Label loopBody = default;
if (needLoop)
{
checkSpanLengthLabel = DefineLabel();
charNotInClassLabel = DefineLabel();
loopBody = DefineLabel();
// for (int i = 0;
Ldc(0);
Stloc(iLocal);
BrFar(checkSpanLengthLabel);
MarkLabel(loopBody);
}
if (canUseIndexOf)
{
setIndex = 1;
if (needLoop)
{
// slice.Slice(iLocal + primarySet.Distance);
Ldloca(textSpanLocal);
Ldloc(iLocal);
if (primarySet.Distance != 0)
{
Ldc(primarySet.Distance);
Add();
}
Call(s_spanSliceIntMethod);
}
else if (primarySet.Distance != 0)
{
// slice.Slice(primarySet.Distance)
Ldloca(textSpanLocal);
Ldc(primarySet.Distance);
Call(s_spanSliceIntMethod);
}
else
{
// slice
Ldloc(textSpanLocal);
}
if (primarySet.Chars is not null)
{
Debug.Assert(primarySet.Chars.Length > 0);
switch (primarySet.Chars.Length)
{
case 1:
// tmp = ...IndexOf(setChars[0]);
Ldc(primarySet.Chars[0]);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptChar : s_spanIndexOfChar);
break;
case 2:
// tmp = ...IndexOfAny(setChars[0], setChars[1]);
Ldc(primarySet.Chars[0]);
Ldc(primarySet.Chars[1]);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptCharChar : s_spanIndexOfAnyCharChar);
break;
case 3:
// tmp = ...IndexOfAny(setChars[0], setChars[1], setChars[2]});
Ldc(primarySet.Chars[0]);
Ldc(primarySet.Chars[1]);
Ldc(primarySet.Chars[2]);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptCharCharChar : s_spanIndexOfAnyCharCharChar);
break;
default:
// tmp = ...IndexOfAny(setChars);
// tmp = ...IndexOfAny(s_searchValues);
EmitIndexOfAnyWithSearchValuesOrLiteral(primarySet.Chars, except: primarySet.Negated);
break;
}
}
else if (primarySet.Range is not null)
{
if (primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive)
{
// tmp = ...IndexOf{AnyExcept}(low);
Ldc(primarySet.Range.Value.LowInclusive);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptChar : s_spanIndexOfChar);
}
else
{
// tmp = ...IndexOfAny{Except}InRange(low, high);
Ldc(primarySet.Range.Value.LowInclusive);
Ldc(primarySet.Range.Value.HighInclusive);
Call(primarySet.Negated ? s_spanIndexOfAnyExceptInRange : s_spanIndexOfAnyInRange);
}
}
else if (RegexCharClass.IsUnicodeCategoryOfSmallCharCount(primarySet.Set, out char[]? setChars, out bool negated, out _))
{
// We have a known set of small number of characters; we can use IndexOfAny{Except}(searchValues).
// tmp = ...IndexOfAny(s_searchValues);
LoadSearchValues(setChars);
Call(negated ? s_spanIndexOfAnyExceptSearchValues : s_spanIndexOfAnySearchValues);
}
else
{
// In order to optimize the search for ASCII characters, we use SearchValues to vectorize a search
// for those characters plus anything non-ASCII (if we find something non-ASCII, we'll fall back to
// a sequential walk). In order to do that search, we actually build up a set for all of the ASCII
// characters _not_ contained in the set, and then do a search for the inverse of that, which will be
// all of the target ASCII characters and all of non-ASCII.
using var asciiChars = new ValueListBuilder<char>(stackalloc char[128]);
for (int i = 0; i < 128; i++)
{
if (!RegexCharClass.CharInClass((char)i, primarySet.Set))
{
asciiChars.Append((char)i);
}
}
using (RentedLocalBuilder span = RentReadOnlySpanCharLocal())
using (RentedLocalBuilder i = RentInt32Local())
{
// ReadOnlySpan<char> span = inputSpan...;
Stloc(span);
// int i = span.
Ldloc(span);
if (asciiChars.Length == 128)
{
// IndexOfAnyExceptInRange('\0', '\u007f');
Ldc(0);
Ldc(127);
Call(s_spanIndexOfAnyExceptInRange);
}
else
{
// IndexOfAnyExcept(searchValuesArray[...]);
LoadSearchValues(asciiChars.AsSpan().ToArray());
Call(s_spanIndexOfAnyExceptSearchValues);
}
Stloc(i);
// if ((uint)i >= span.Length) goto doneSearch;
Label doneSearch = DefineLabel();
Ldloc(i);
Ldloca(span);
Call(s_spanGetLengthMethod);
BgeUnFar(doneSearch);
// if (span[i] <= 0x7f) goto doneSearch;
Ldc(0x7f);
Ldloca(span);
Ldloc(i);
Call(s_spanGetItemMethod);
LdindU2();
BgeUnFar(doneSearch);
Label loop = DefineLabel();
MarkLabel(loop);
// do { ...
// if (CharInClass(span[i])) goto doneSearch;
Ldloca(span);
Ldloc(i);
Call(s_spanGetItemMethod);
LdindU2();
EmitMatchCharacterClass(primarySet.Set);
Brtrue(doneSearch);
// i++;
Ldloc(i);
Ldc(1);
Add();
Stloc(i);
// } while ((uint)i < span.Length);
Ldloc(i);
Ldloca(span);
Call(s_spanGetLengthMethod);
BltUnFar(loop);
// i = -1;
Ldc(-1);
Stloc(i);
MarkLabel(doneSearch);
Ldloc(i);
}
}
if (needLoop)
{
// i += tmp;
// if (tmp < 0) goto returnFalse;
using (RentedLocalBuilder tmp = RentInt32Local())
{
Stloc(tmp);
Ldloc(iLocal);
Ldloc(tmp);
Add();
Stloc(iLocal);
Ldloc(tmp);
Ldc(0);
BltFar(returnFalse);
}
}
else
{
// i = tmp;
// if (i < 0) goto returnFalse;
Stloc(iLocal);
Ldloc(iLocal);
Ldc(0);
BltFar(returnFalse);
}
if (setsToUse > 1)
{
// Of the remaining sets we're going to check, find the maximum distance of any of them.
// If it's further than the primary set we checked, we need a bounds check.
int maxDistance = sets[1].Distance;
for (int i = 2; i < setsToUse; i++)
{
maxDistance = Math.Max(maxDistance, sets[i].Distance);
}
if (maxDistance > primarySet.Distance)
{
// if ((uint)(i + maxDistance) >= slice.Length) goto returnFalse;
if (setsToUse > 1)
{
Debug.Assert(needLoop);
Ldloc(iLocal);
Ldc(maxDistance);
Add();
Ldloca(textSpanLocal);
Call(s_spanGetLengthMethod);
_ilg!.Emit(OpCodes.Bge_Un, returnFalse);
}
}
}
}
// if (!CharInClass(slice[i], prefix[0], "...")) continue;
// if (!CharInClass(slice[i + 1], prefix[1], "...")) continue;
// if (!CharInClass(slice[i + 2], prefix[2], "...")) continue;
// ...
Debug.Assert(setIndex is 0 or 1);
for (; setIndex < setsToUse; setIndex++)
{
Debug.Assert(needLoop);
Ldloca(textSpanLocal);
Ldloc(iLocal);
if (sets[setIndex].Distance != 0)
{
Ldc(sets[setIndex].Distance);
Add();
}
Call(s_spanGetItemMethod);
LdindU2();
EmitMatchCharacterClass(sets[setIndex].Set);
BrfalseFar(charNotInClassLabel);
}
// base.runtextpos = pos + i;
// return true;
Ldthis();
Ldloc(pos);
Ldloc(iLocal);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
if (needLoop)
{
MarkLabel(charNotInClassLabel);
// for (...; ...; i++)
Ldloc(iLocal);
Ldc(1);
Add();
Stloc(iLocal);
// for (...; i < span.Length - (minRequiredLength - 1); ...);
MarkLabel(checkSpanLengthLabel);
Ldloc(iLocal);
Ldloca(textSpanLocal);
Call(s_spanGetLengthMethod);
if (setsToUse > 1 || primarySet.Distance != 0)
{
Ldc(minRequiredLength - 1);
Sub();
}
BltFar(loopBody);
// base.runtextpos = inputSpan.Length;
// return false;
BrFar(returnFalse);
}
}
// Emits a right-to-left search for a set at a fixed position from the start of the pattern.
// (Currently that position will always be a distance of 0, meaning the start of the pattern itself.)
void EmitFixedSet_RightToLeft()
{
Debug.Assert(_regexTree.FindOptimizations.FixedDistanceSets is { Count: > 0 });
RegexFindOptimizations.FixedDistanceSet set = _regexTree.FindOptimizations.FixedDistanceSets![0];
Debug.Assert(set.Distance == 0);
if (set.Chars is { Length: 1 })
{
Debug.Assert(!set.Negated);
// pos = inputSpan.Slice(0, pos).LastIndexOf(set.Chars[0]);
Ldloca(inputSpan);
Ldc(0);
Ldloc(pos);
Call(s_spanSliceIntIntMethod);
Ldc(set.Chars[0]);
Call(s_spanLastIndexOfChar);
Stloc(pos);
// if (pos < 0) goto returnFalse;
Ldloc(pos);
Ldc(0);
BltFar(returnFalse);
// base.runtextpos = pos + 1;
// return true;
Ldthis();
Ldloc(pos);
Ldc(1);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
}
else
{
Label condition = DefineLabel();
// while ((uint)--pos < (uint)inputSpan.Length)
MarkLabel(condition);
Ldloc(pos);
Ldc(1);
Sub();
Stloc(pos);
Ldloc(pos);
Ldloca(inputSpan);
Call(s_spanGetLengthMethod);
BgeUnFar(returnFalse);
// if (!MatchCharacterClass(inputSpan[i], set.Set)) goto condition;
Ldloca(inputSpan);
Ldloc(pos);
Call(s_spanGetItemMethod);
LdindU2();
EmitMatchCharacterClass(set.Set);
Brfalse(condition);
// base.runtextpos = pos + 1;
// return true;
Ldthis();
Ldloc(pos);
Ldc(1);
Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
}
}
// Emits a search for a literal following a leading atomic single-character loop.
void EmitLiteralAfterAtomicLoop()
{
Debug.Assert(_regexTree.FindOptimizations.LiteralAfterLoop is not null);
(RegexNode LoopNode, (char Char, string? String, StringComparison StringComparison, char[]? Chars) Literal) target = _regexTree.FindOptimizations.LiteralAfterLoop.Value;
Debug.Assert(target.LoopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic);
Debug.Assert(target.LoopNode.N == int.MaxValue);
// while (true)
Label loopBody = DefineLabel();
Label loopEnd = DefineLabel();
MarkLabel(loopBody);
// ReadOnlySpan<char> slice = inputSpan.Slice(pos);
using RentedLocalBuilder slice = RentReadOnlySpanCharLocal();
Ldloca(inputSpan);
Ldloc(pos);
Call(s_spanSliceIntMethod);
Stloc(slice);
// Find the literal. If we can't find it, we're done searching.
// int i = slice.IndexOf(literal);
// if (i < 0) break;
using RentedLocalBuilder i = RentInt32Local();
Ldloc(slice);
if (target.Literal.String is string literalString)
{
Ldstr(literalString);
Call(s_stringAsSpanMethod);
if (target.Literal.StringComparison is StringComparison.OrdinalIgnoreCase)
{
Ldc((int)target.Literal.StringComparison);
Call(s_spanIndexOfSpanStringComparison);
}
else
{
Debug.Assert(target.Literal.StringComparison is StringComparison.Ordinal);
Call(s_spanIndexOfSpan);
}
}
else if (target.Literal.Chars is not char[] literalChars)
{
Ldc(target.Literal.Char);
Call(s_spanIndexOfChar);
}
else
{
switch (literalChars.Length)
{
case 2:
Ldc(literalChars[0]);
Ldc(literalChars[1]);
Call(s_spanIndexOfAnyCharChar);
break;
case 3:
Ldc(literalChars[0]);
Ldc(literalChars[1]);
Ldc(literalChars[2]);
Call(s_spanIndexOfAnyCharCharChar);
break;
default:
Ldstr(new string(literalChars));
Call(s_stringAsSpanMethod);
Call(s_spanIndexOfAnySpan);
break;
}
}
Stloc(i);
Ldloc(i);
Ldc(0);
BltFar(loopEnd);
// We found the literal. Walk backwards from it finding as many matches as we can against the loop.
// int prev = i;
using RentedLocalBuilder prev = RentInt32Local();
Ldloc(i);
Stloc(prev);
// while ((uint)--prev < (uint)slice.Length) && MatchCharClass(slice[prev]));
Label innerLoopBody = DefineLabel();
Label innerLoopEnd = DefineLabel();
MarkLabel(innerLoopBody);
Ldloc(prev);
Ldc(1);
Sub();
Stloc(prev);
Ldloc(prev);
Ldloca(slice);
Call(s_spanGetLengthMethod);
BgeUn(innerLoopEnd);
Ldloca(slice);
Ldloc(prev);
Call(s_spanGetItemMethod);
LdindU2();
EmitMatchCharacterClass(target.LoopNode.Str!);
BrtrueFar(innerLoopBody);
MarkLabel(innerLoopEnd);
if (target.LoopNode.M > 0)
{
// If we found fewer than needed, loop around to try again. The loop doesn't overlap with the literal,
// so we can start from after the last place the literal matched.
// if ((i - prev - 1) < target.LoopNode.M)
// {
// pos += i + 1;
// continue;
// }
Label metMinimum = DefineLabel();
Ldloc(i);
Ldloc(prev);
Sub();
Ldc(1);
Sub();
Ldc(target.LoopNode.M);
Bge(metMinimum);
Ldloc(pos);
Ldloc(i);
Add();
Ldc(1);
Add();
Stloc(pos);
BrFar(loopBody);
MarkLabel(metMinimum);
}
// We have a winner. The starting position is just after the last position that failed to match the loop.
// We also store the position after the loop into runtrackpos (an extra, unused field on RegexRunner) in order
// to communicate this position to the match algorithm such that it can skip the loop.
// base.runtextpos = pos + prev + 1;
Ldthis();
Ldloc(pos);
Ldloc(prev);
Add();
Ldc(1);
Add();
Stfld(s_runtextposField);
// base.runtrackpos = pos + i;
Ldthis();
Ldloc(pos);
Ldloc(i);
Add();
Stfld(s_runtrackposField);
// return true;
Ldc(1);
Ret();
// }
MarkLabel(loopEnd);
// base.runtextpos = inputSpan.Length;
// return false;
BrFar(returnFalse);
}
}