in src/Microsoft.Diagnostics.Runtime/Implementation/DACNameParser.cs [36:600]
public static string? Parse(string? name)
{
if (name == null)
return null;
if (name.Length == 0)
return name;
try
{
// This is the primary method of the parser. It operates as a simple state machine storing information about types encountered as
// they are encountered and back-propagating information to types as it is discovered.
//
// The BEST way to debug/understand this method is to simply step through it while watching genericArgs and nameSegments in the debugger watch window. The TypeNameSegment
// has a DebuggerDisplayAttribute so you should see the state of the type as it goes through transitions (i.e. going from simple to generic with a known count of args, as
// its unknown args are filled in seeing them appear in the debugger display string as we patch in the actual type names, etc...)
ParsingState currentState = ParsingState.ParsingTypeName;
// The name segment lists hold types as we are constructing them. These can be complete types (such as System.String) or partial types (like List<T> before we have parsed what T is)
// or nested types (like the +Entry part of Dictionary<TKey, TEntry>+Entry) which may be later unified with the type they are nested in (we represent parses like Dictionary`2+Entry
// as two TypeNameSegments (Dictionary`2 and Entry) and unify them later since when we extract Dictionary we don't yet know its generic arity or the fact it is the outer type in
// a nested type).
//
// The nameSegments relate to top-level type names, the genericArgs relate to generic argument lists as we are parsing them. The genericArgs entries are back-propagated both to earlier
// generic args as well as to types in the nameSegments list, as we complete the argument list parsing.
List<TypeNameSegment>? nameSegments = null;
List<TypeNameSegment>? genericArgs = null;
// Local helper methods to help minimize code duplication
void EnsureNameSegmentList()
{
nameSegments ??= new List<TypeNameSegment>();
}
void EnsureGenericArgList()
{
genericArgs ??= new List<TypeNameSegment>();
}
int curPos = 0;
int parsingNestedClassDepth = 0;
int parsingGenericArgListDepth = 0;
Stack<bool> isCurrentArgAssemblyQualified = new Stack<bool>();
// local type name parsing method that doesn't modify the currentState variable that drive the state machine. Used by a few of the typename parsing paths that need to
// have knowledge of their context to operate properly, but don't want to duplicate this code. Returns a non-empty string in the special case where the input string
// should be the output string, returns string.Empty in all other cases and expects callers to continue to parsing progress by setting currentState/curPos directly
// or calling DetermineNextStateAndPos.
string ParseTypeNameNoStateAdvance()
{
// We are parsing a type name, this is the initial state as well as one entered into every time we are extracting a single type name through the course of parsing.
// This handles parsing nested types as well as generic param types.
//
// Parsing of type names is non-extractive, i.e. we don't substring the input string or create any new heap allocations to track them (apart from the two local
// Lists), we simply remember the extents of the name (start/end).
int start;
(start, curPos) = GetTypeNameExtent(name, curPos, parsingGenericArgList: parsingGenericArgListDepth != 0);
// Special case: Check if after parsing the type name we have exhausted the string, if so it means the input string is the output string, so just return it
// without allocating a copy.
if (ReturnOriginalDACString(curPos, name.Length, nameSegments))
return name;
bool typeIsNestedClass = (parsingNestedClassDepth != 0);
if (parsingGenericArgListDepth == 0)
{
// We are parsing a top-level type name/sequence
EnsureNameSegmentList();
#pragma warning disable CS8602 // EnsureNameSegmentList call above ensures that nameSegments is never null here
nameSegments.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingArgDepth: 0));
#pragma warning restore CS8602
}
else
{
// We are parsing a generic list (potentially nested lists in the case where a generic param is itself generic)
EnsureGenericArgList();
#pragma warning disable CS8602 // EnsureGenericArgList call above ensures that genericArgs is never null here
genericArgs.Add(new TypeNameSegment(name, (start, curPos), typeIsNestedClass, parsingGenericArgListDepth));
#pragma warning restore CS8602
}
if (parsingNestedClassDepth != 0)
parsingNestedClassDepth--;
return string.Empty;
}
while (ShouldContinueParsing(currentState))
{
switch (currentState)
{
case ParsingState.ParsingTypeName:
{
string parsedResult = ParseTypeNameNoStateAdvance();
if (!string.IsNullOrEmpty(parsedResult))
return parsedResult;
// Two cases we need to look for, we just finished parsing the type name of an assembly qualified generic arg, in which case we need to transition into the ParsingGenericArgAssemblySpecifier state.
// The other is the case of non-traditional/obfuscated generic names. FSharp (and seemingly some obfuscators) will name generic types in ways that do not have arity specifiers. This can easily lead
// us to believe, when we encounter a '[' that we must be dealing with an array decl because we haven't yet seen an arity specifier. So we try to detect this below. First, if ANY args/typenames above
// us have unfulfilled generics it means we HAVE seen an arity specifier, so we aren't in this case special case, so don't look further. If they pass that test then we need to make sure the next
// symbol after the [ isn't an ] or an , (as both of those represent sequences in array specifiers). If all those checks pass assume this is a non-traditional generic type and calculate the arity
// manually and parse the args.
if (name[curPos] == ArgSeparator)
{
if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek())
{
currentState = ParsingState.ParsingGenericArgAssemblySpecifier;
break;
}
}
else if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier)
{
if (!DoAnyArgsOrTypeNamesHaveUnfulfilledGenericArguments(nameSegments, genericArgs))
{
// It's possible this is an FSharp or obfuscated generic name, lacking an arity specifier (e.g. something like Microsoft.FSharp.Collections.ArrayModule+Parallel+sortingFunc@2439-1[TKey,TValue]).
// Or it could be an array decl, we will want to manually check for the former here (the latter will be correctly detected inside DetermineNextStateAndPos).
if ((curPos != name.Length - 1) && (name[curPos + 1] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier && name[curPos + 1] != ArgSeparator))
{
// This looks like a generic specified without an arity specifier. So let's manually calculate the arity ourselves. It is a little tricky in that the argument list can itself have generics
// in it, but essentially we just need to count the outermost commas, ignoring any commas in nested generic type decls, and apply that to the most recent argument/type name as its expected
// generic param count then force ourselves into the ParsingGenericArgs state to parse the arg list.
int genericArity = ManuallyCalculateArity(name, curPos + 1);
List<TypeNameSegment>? targetList = ((genericArgs != null) && (genericArgs.Count != 0)) ? genericArgs : nameSegments;
if (targetList != null)
{
// NOTE: TypeNameSegment is a struct to avoid heap allocations, that means we have to extract / modify / re-store to ensure the updated state gets back into whatever
// list this came from.
int targetIndex = targetList.Count - 1;
TypeNameSegment seg = targetList[targetIndex];
seg.SetExpectedGenericArgCount(genericArity);
targetList[targetIndex] = seg;
}
currentState = ParsingState.ParsingGenericArgs;
break;
}
}
}
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
case ParsingState.ParsingAssemblyQualifiedGenericArgName:
case ParsingState.ParsingNonAssemblyQualifiedGenericArgName:
{
// Generic params come in two flavors, one is assembly qualified, like this List`1[[System.Boolean, mscorlib]], the other is non-assembly qualified which will lack the
// [ and ] and just be a type name, like this List`1[TFoo]. Of course in multi argument generics these styles can be mixed and of course we can nest generics whose
// args are generic whose args are generic, etc... arbitrarily deeply. The primary tricky bit is when consuming a param from the input we need to consume the type name
// (in both cases) and potentially the trailing assembly name. We use isCurrentArgAssemblyQualified as a way to track whether the arg we are currently processing
// will have a trailing assembly name after it. We need a stack since we can, and do, frequently, generics whose args are generic (often numerous levels deep). Each
// arg on the descent of processing such types has its own unique 'is assembly qualified name' state.
// NOTE: We ignore the return type, the special case it handles isn't possible here in a well-formed name and we will only get well formed names from the DAC
ParseTypeNameNoStateAdvance();
isCurrentArgAssemblyQualified.Push(currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName);
// After a generic arg type name the only legit input states are:
//
// 1) We are entirely done with the string (curPos == name.Length).
// 2) The next token is an argument separator (,)
// 3) The next token is the end of the argument list (])
// 4) The next token is a generic arity specifier for this arg (`<some number>)
// 5) The next token is an array specifier ([)
//
// Anything else is an error
if (curPos == name.Length ||
(name[curPos] != ArgSeparator &&
name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier &&
name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier &&
name[curPos] != GenericAritySpecifier))
{
currentState = ParsingState.Error;
break;
}
// an argument can itself be generic, so we may encounter an arity specifier while parsing the arg name, make sure we don't get tripped up by that
if (name[curPos] == GenericAritySpecifier)
{
// NOTE: NOT done with this arg so leave our entry on the isCurrentArgAssemblyQualified in place
currentState = ParsingState.ParsingGenericArgCount;
curPos += 1;
break;
}
if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier)
{
// This has to be an array, if it were an assembly qualified name we would have first seen an ArgSeperator, if it were a generic arg list we would have first seen a GenericAritySpecifier
currentState = ParsingState.ParsingArraySpecifier;
break;
}
// Skip the assembly name portion if one exists
if (currentState == ParsingState.ParsingAssemblyQualifiedGenericArgName)
{
while (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier)
{
curPos++;
}
// Since we are now pointing at the end of the assembly qualified name (the ]), advance past it so the checks below will be correct for both
// assembly qualified and non-assembly qualified names
if (curPos != name.Length)
{
curPos++;
}
}
// We shouldn't have exhausted our input, if we have, fail
if (curPos == name.Length)
{
// We are done with this arg, so pop our entry
isCurrentArgAssemblyQualified.Pop();
currentState = ParsingState.Error;
break;
}
// We're done with the arg list, so just call into DetermineNextStateAndPos which will do the right thing
if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier)
{
// NOTE: We don't pop our entry because DetermineNextStateAndPos will put us into ResolveParsedGenericList, and we need to know
// that info in there. That code will clean up our entry when it finishes processing the arg.
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
// We have encountered another argument, so figure out our new state and yield back to the main loop
if (name[curPos] == ArgSeparator)
{
// We are done with this arg, so pop our entry
isCurrentArgAssemblyQualified.Pop();
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
// Should never get here
//
// We are done with this arg, so pop our entry
isCurrentArgAssemblyQualified.Pop();
currentState = ParsingState.Error;
break;
}
case ParsingState.ParsingNestedClass:
{
// We are starting to parse a nested type name, just record the nested class depth (we have to handle multiple levels of nested classes), and
// transition back to the type name parsing state.
parsingNestedClassDepth++;
currentState = ParsingState.ParsingTypeName;
break;
}
case ParsingState.ParsingGenericArgCount:
{
// Parse the arity of the generic type. Note: we do this 'in place' i.e. without extracting the count substring, it's unfortunate but int.Parse does not include
// an overload that operates in place based on start/length.
int genericArgCount;
(genericArgCount, curPos) = ParseGenericArityCountFromStringInPlace(name, curPos);
List<TypeNameSegment>? targetList = ((genericArgs != null) && (genericArgs.Count != 0)) ? genericArgs : nameSegments;
if (targetList != null)
{
// NOTE: TypeNameSegment is a struct to avoid heap allocations, that means we have to extract / modify / re-store to ensure the updated state gets back into whatever
// list this came from.
int targetIndex = targetList.Count - 1;
TypeNameSegment seg = targetList[targetIndex];
seg.SetExpectedGenericArgCount(genericArgCount);
targetList[targetIndex] = seg;
}
else
{
currentState = ParsingState.Error;
break;
}
// Double check that we aren't looking at non-closed generic type (see Github bug #897).
if (curPos != name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier && name[curPos] != NestedClassSpecifier)
{
int targetIndex = targetList.Count - 1;
TypeNameSegment seg = targetList[targetIndex];
seg.MarkAsNonClosedGenericType();
targetList[targetIndex] = seg;
if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek())
{
Debug.Assert(name[curPos] == ArgSeparator, $"{nameof(isCurrentArgAssemblyQualified)} is true but the non-closed generic we are processing was not followed by a '{ArgSeparator}'.");
if (name[curPos] != ArgSeparator)
{
currentState = ParsingState.Error;
break;
}
currentState = ParsingState.ParsingGenericArgAssemblySpecifier;
break;
}
}
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
case ParsingState.ParsingGenericArgAssemblySpecifier:
{
// Nothing to do here, really, just skip the assembly name specified in the generic arg type
while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier)
curPos++;
if (curPos != name.Length)
curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char
// Done with this arg, so clean up our stack
isCurrentArgAssemblyQualified.Pop();
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
case ParsingState.ParsingGenericArgs:
{
// Start parsing the list of generic types, this just entails marking that we are parsing a generic arg list. NOTE: to support nested generic arg lists we
// have to keep track of list count, not just a simple bool are/aren't parsing.
parsingGenericArgListDepth++;
if (name[curPos + 1] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier)
{
currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName;
curPos += 2;
}
else
{
currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName;
curPos += 1;
}
break;
}
case ParsingState.ParsingArraySpecifier:
{
// Parse the array specifier, this mainly is to catch multi-dimensional arrays
// There is always at least a single dimension in arrays, every comma counts as one more
int arrayDimensions = 1;
// Calculate the array dimensions
while ((curPos < name.Length) && (name[curPos] != ']'))
{
if (name[curPos] == ArgSeparator)
arrayDimensions++;
curPos++;
}
// Consume the final ] of the array specifier, unless we are at the end of the string already
if (curPos != name.Length)
curPos++;
if (parsingGenericArgListDepth != 0 || nameSegments != null)
{
// NOTE: TypeNameSegment is a struct to avoid heap allocations, that means we have to extract / modify / re-store to ensure the updated state gets back into whatever
// list this came from.
List<TypeNameSegment>? targetList = parsingGenericArgListDepth != 0 ? genericArgs : nameSegments;
if (targetList != null)
{
int targetIndex = targetList.Count - 1;
TypeNameSegment targetSegment = targetList[targetIndex];
targetSegment.SetArrayDimensions(arrayDimensions);
targetList[targetIndex] = targetSegment;
}
else
{
currentState = ParsingState.Error;
break;
}
// NOTE: We have to speculatively determine our next state. This is because in the case that we have an array of array situation
// we need to leave our isCurrentArgAssemblyQualified entry on the stack, however if we do NOT have that situation then
// we may need to rerun DetermineNextStateAndPos after cleaning up a trailing assembly name if the current isCurrentArgAssemblyQualified
// is true.
ParsingState potentialNewState;
int potentialNewPos;
(potentialNewState, potentialNewPos) = DetermineNextStateAndPos(name, curPos);
if (potentialNewState != ParsingState.ParsingArraySpecifier && potentialNewState != ParsingState.Done)
{
if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek())
{
// If we aren't still parsing an array specifier then we will need to clean up a trailing assembly name (if there is one), and redo
// DetermineNextStateAndPos, since the presence of ', <some assembly name>]' will fool it into thinking we should be in a
// ParsingNonAssemblyQualifiedGenericArgName state.
while (curPos < name.Length && name[curPos] != GenericArgListAssemblyQualifiedTypeNameOrArrayEndSpecifier)
curPos++;
if (curPos != name.Length)
curPos++; // we hit the ] which closes the fully qualified name, so advance to the next char
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
}
else
{
currentState = potentialNewState;
curPos = potentialNewPos;
}
// Done with this arg
if (isCurrentArgAssemblyQualified.Count != 0)
isCurrentArgAssemblyQualified.Pop();
}
else
{
currentState = potentialNewState;
curPos = potentialNewPos;
}
if (genericArgs == null || genericArgs.Count == 0 && currentState == ParsingState.Done)
{
// Special case: Return original string in cases like this:
//
// System.String[,,,] or System.Int32[][]
if (ReturnOriginalDACString(curPos, name.Length, nameSegments))
return name;
}
}
else
{
Debug.Fail("Inside ParsingArraySpecifier but we don't think we are parsing generic params and have nothing on the top-level name segment list.");
currentState = ParsingState.Error;
}
break;
}
case ParsingState.ResolveParsedGenericList:
{
// We are done with this level of arguments in terms of parsing, now we just have to apply them to the types they belong with (from previous parsing levels or the
// top-level).
parsingGenericArgListDepth--;
bool isArgAssemblyQualified = isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek();
if (genericArgs == null || genericArgs.Count == 0)
{
// Done with this arg so clean up its entry
isCurrentArgAssemblyQualified.Pop();
// For top-level types with multiple-level generic arg lists (so a type with a generic arg which itself is a generic type) as we unwind the nested generic args
// lists we can end up wth no more work to do upon exiting a level (because we already propagated the info backwards before getting here), in which case, do nothing.
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
bool succeeded = ResolveParsedGenericList(parsingGenericArgListDepth, nameSegments, genericArgs);
if (!succeeded)
{
// Done with this arg so clean up its entry
isCurrentArgAssemblyQualified.Pop();
currentState = ParsingState.Error;
break;
}
if (curPos == name.Length)
{
// Its possible we are resolving the outermost generic, in which case the isCurrentArgAssemblyQualified stack will be empty, so guard against that
if (isCurrentArgAssemblyQualified.Count != 0)
isCurrentArgAssemblyQualified.Pop();
currentState = ParsingState.Done;
break;
}
if (name[curPos] == ArgSeparator)
{
// If we are in the middle of parsing some number of possibly nested assembly qualified type names, and we close out a generic arg list and land on a , it either means
// it will be followed by the assembly name OR by another type name that itself is or is not qualified. So, account for both cases.
if (isArgAssemblyQualified)
{
// NOTE: We will pop isCurrentArgAssemblyQualified in the handler for ParsingGenericArgAssemblySpecifier
//
// Should be an assembly name, so skip it
currentState = ParsingState.ParsingGenericArgAssemblySpecifier;
break;
}
// Done with this arg so clean up its entry
isCurrentArgAssemblyQualified.Pop();
// move past the comma
curPos++;
// Advance past any whitespace in the param list
int potentiallyNewPos = MoveCurPosPastWhitespaceOrFail(name, curPos);
if (potentiallyNewPos < 0)
{
currentState = ParsingState.Error;
break;
}
curPos = potentiallyNewPos;
// We are moving on from the resolved generic and we have encountered a comma. We popped the state of the generic argument already, but it's possible
// that beyond this comma lies the assembly qualified name of the previous param (which could be a generic type that the just resolved generic was a param for).
// So if we see there is another param still (isCurrentArgAssemblyQualified.Count != 0) and that param is assembly qualified (isCurrentArgAssemblyQualified.Peek())
// then we need to enter a state to consume that assembly name
if (isCurrentArgAssemblyQualified.Count != 0 && isCurrentArgAssemblyQualified.Peek())
{
// NOTE: The handler for ParsingGenericArgAssemblySpecifier will handle popping the isCurrentArgAssemblyQualified state for the arg
// it will be working on.
currentState = ParsingState.ParsingGenericArgAssemblySpecifier;
break;
}
// we either have another qualified name, or a non-qualified one
if (name[curPos] == GenericArgListAssemblyQualifiedTypeNameOrArrayStartSpecifier)
{
currentState = ParsingState.ParsingAssemblyQualifiedGenericArgName;
curPos += 1;
break;
}
else
{
currentState = ParsingState.ParsingNonAssemblyQualifiedGenericArgName;
break;
}
}
// Done with this arg so clean up its entry
if (isCurrentArgAssemblyQualified.Count != 0)
isCurrentArgAssemblyQualified.Pop();
(currentState, curPos) = DetermineNextStateAndPos(name, curPos);
break;
}
}
}
if (currentState == ParsingState.Error || nameSegments == null)
{
// If we have encountered something we failed on, return the DAC string so at least there is SOMETHING
Debug.WriteLine($"Failed Parsing DAC Name: {name}");
return name;
}
//Build the final result from all the type name segments we have
StringBuilder result = new();
foreach (TypeNameSegment segment in nameSegments)
segment.ToString(result);
return result.ToString();
}
catch (Exception e)
{
Debug.WriteLine($"Encountered an exception while parsing name {name}: {e}");
return name;
}
}