src/Shared/FileMatcher.cs (1,175 lines of code) (raw):

// Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. //----------------------------------------------------------------------- // </copyright> // <summary>Functions for matching file names with patterns.</summary> //----------------------------------------------------------------------- using System; using System.IO; using System.Text; using System.Diagnostics; using System.Linq; using System.Text.RegularExpressions; using System.Threading; using System.Globalization; using System.Collections.Generic; namespace Microsoft.Build.Shared { /// <summary> /// Functions for matching file names with patterns. /// </summary> internal static class FileMatcher { private const string recursiveDirectoryMatch = "**"; private const string dotdot = ".."; private static readonly string s_directorySeparator = new string(Path.DirectorySeparatorChar, 1); private static readonly string s_thisDirectory = "." + s_directorySeparator; private static readonly char[] s_wildcardCharacters = { '*', '?' }; private static readonly char[] s_wildcardAndSemicolonCharacters = { '*', '?', ';' }; // on OSX both System.IO.Path separators are '/', so we have to use the literals internal static readonly char[] directorySeparatorCharacters = { '/', '\\' }; internal static readonly string[] directorySeparatorStrings = directorySeparatorCharacters.Select(c => c.ToString()).ToArray(); internal static readonly GetFileSystemEntries s_defaultGetFileSystemEntries = new GetFileSystemEntries(GetAccessibleFileSystemEntries); private static readonly DirectoryExists s_defaultDirectoryExists = new DirectoryExists(Directory.Exists); /// <summary> /// Cache of the list of invalid path characters, because this method returns a clone (for security reasons) /// which can cause significant transient allocations /// </summary> private static readonly char[] s_invalidPathChars = Path.GetInvalidPathChars(); internal const RegexOptions DefaultRegexOptions = RegexOptions.IgnoreCase; /// <summary> /// The type of entity that GetFileSystemEntries should return. /// </summary> internal enum FileSystemEntity { Files, Directories, FilesAndDirectories }; /// <summary> /// Delegate defines the GetFileSystemEntries signature that GetLongPathName uses /// to enumerate directories on the file system. /// </summary> /// <param name="entityType">Files, Directories, or Files and Directories</param> /// <param name="path">The path to search.</param> /// <param name="pattern">The file pattern.</param> /// <param name="projectDirectory"></param> /// <param name="stripProjectDirectory"></param> /// <returns>The array of filesystem entries.</returns> internal delegate string[] GetFileSystemEntries(FileSystemEntity entityType, string path, string pattern, string projectDirectory, bool stripProjectDirectory); /// <summary> /// Determines whether the given path has any wild card characters. /// </summary> /// <param name="filespec"></param> /// <returns></returns> internal static bool HasWildcards(string filespec) { return -1 != filespec.IndexOfAny(s_wildcardCharacters); } /// <summary> /// Determines whether the given path has any wild card characters or any semicolons. /// </summary> internal static bool HasWildcardsSemicolonItemOrPropertyReferences(string filespec) { return ( (-1 != filespec.IndexOfAny(s_wildcardAndSemicolonCharacters)) || filespec.Contains("$(") || filespec.Contains("@(") ); } /// <summary> /// Get the files and\or folders specified by the given path and pattern. /// </summary> /// <param name="entityType">Whether Files, Directories or both.</param> /// <param name="path">The path to search.</param> /// <param name="pattern">The pattern to search.</param> /// <param name="projectDirectory">The directory for the project within which the call is made</param> /// <param name="stripProjectDirectory">If true the project directory should be stripped</param> /// <returns></returns> private static string[] GetAccessibleFileSystemEntries(FileSystemEntity entityType, string path, string pattern, string projectDirectory, bool stripProjectDirectory) { path = FileUtilities.FixFilePath(path); string[] files = null; switch (entityType) { case FileSystemEntity.Files: files = GetAccessibleFiles(path, pattern, projectDirectory, stripProjectDirectory); break; case FileSystemEntity.Directories: files = GetAccessibleDirectories(path, pattern); break; case FileSystemEntity.FilesAndDirectories: files = GetAccessibleFilesAndDirectories(path, pattern); break; default: ErrorUtilities.VerifyThrow(false, "Unexpected filesystem entity type."); break; } return files; } /// <summary> /// Returns an array of file system entries matching the specified search criteria. Inaccessible or non-existent file /// system entries are skipped. /// </summary> /// <param name="path"></param> /// <param name="pattern"></param> /// <returns>Array of matching file system entries (can be empty).</returns> private static string[] GetAccessibleFilesAndDirectories(string path, string pattern) { string[] entries = null; if (Directory.Exists(path)) { try { entries = Directory.GetFileSystemEntries(path, pattern); } // for OS security catch (UnauthorizedAccessException) { // do nothing } // for code access security catch (System.Security.SecurityException) { // do nothing } } if (entries == null) { entries = new string[0]; } return entries; } /// <summary> /// Same as Directory.GetFiles(...) except that files that /// aren't accessible are skipped instead of throwing an exception. /// /// Other exceptions are passed through. /// </summary> /// <param name="path">The path.</param> /// <param name="filespec">The pattern.</param> /// <param name="projectDirectory">The project directory</param> /// <param name="stripProjectDirectory"></param> /// <returns>Files that can be accessed.</returns> private static string[] GetAccessibleFiles ( string path, string filespec, // can be null string projectDirectory, bool stripProjectDirectory ) { try { // look in current directory if no path specified string dir = ((path.Length == 0) ? s_thisDirectory : path); // get all files in specified directory, unless a file-spec has been provided string[] files = (filespec == null) ? Directory.GetFiles(dir) : Directory.GetFiles(dir, filespec); // If the Item is based on a relative path we need to strip // the current directory from the front if (stripProjectDirectory) { RemoveProjectDirectory(files, projectDirectory); } // Files in the current directory are coming back with a ".\" // prepended to them. We need to remove this; it breaks the // IDE, which expects just the filename if it is in the current // directory. But only do this if the original path requested // didn't itself contain a ".\". else if (!path.StartsWith(s_thisDirectory, StringComparison.Ordinal)) { RemoveInitialDotSlash(files); } return files; } catch (System.Security.SecurityException) { // For code access security. return new string[0]; } catch (System.UnauthorizedAccessException) { // For OS security. return new string[0]; } } /// <summary> /// Same as Directory.GetDirectories(...) except that files that /// aren't accessible are skipped instead of throwing an exception. /// /// Other exceptions are passed through. /// </summary> /// <param name="path">The path.</param> /// <param name="pattern">Pattern to match</param> /// <returns>Accessible directories.</returns> private static string[] GetAccessibleDirectories ( string path, string pattern ) { try { string[] directories = null; if (pattern == null) { directories = Directory.GetDirectories((path.Length == 0) ? s_thisDirectory : path); } else { directories = Directory.GetDirectories((path.Length == 0) ? s_thisDirectory : path, pattern); } // Subdirectories in the current directory are coming back with a ".\" // prepended to them. We need to remove this; it breaks the // IDE, which expects just the filename if it is in the current // directory. But only do this if the original path requested // didn't itself contain a ".\". if (!path.StartsWith(s_thisDirectory, StringComparison.Ordinal)) { RemoveInitialDotSlash(directories); } return directories; } catch (System.Security.SecurityException) { // For code access security. return new string[0]; } catch (System.UnauthorizedAccessException) { // For OS security. return new string[0]; } } /// <summary> /// Given a path name, get its long version. /// </summary> /// <param name="path">The short path.</param> /// <returns>The long path.</returns> internal static string GetLongPathName ( string path ) { return GetLongPathName(path, s_defaultGetFileSystemEntries); } /// <summary> /// Given a path name, get its long version. /// </summary> /// <param name="path">The short path.</param> /// <param name="getFileSystemEntries">Delegate.</param> /// <returns>The long path.</returns> internal static string GetLongPathName ( string path, GetFileSystemEntries getFileSystemEntries ) { if (path.IndexOf("~", StringComparison.Ordinal) == -1) { // A path with no '~' must not be a short name. return path; } ErrorUtilities.VerifyThrow(!HasWildcards(path), "GetLongPathName does not handle wildcards and was passed '{0}'.", path); string[] parts = path.Split(directorySeparatorCharacters); string pathRoot; int startingElement = 0; bool isUnc = path.StartsWith(s_directorySeparator + s_directorySeparator, StringComparison.Ordinal); if (isUnc) { pathRoot = s_directorySeparator + s_directorySeparator; pathRoot += parts[2]; pathRoot += s_directorySeparator; pathRoot += parts[3]; pathRoot += s_directorySeparator; startingElement = 4; } else { // Is it relative? if (path.Length > 2 && path[1] == ':') { // Not relative pathRoot = parts[0] + s_directorySeparator; startingElement = 1; } else { // Relative pathRoot = String.Empty; startingElement = 0; } } // Build up an array of parts. These elements may be "" if there are // extra slashes. string[] longParts = new string[parts.Length - startingElement]; string longPath = pathRoot; for (int i = startingElement; i < parts.Length; ++i) { // If there is a zero-length part, then that means there was an extra slash. if (parts[i].Length == 0) { longParts[i - startingElement] = String.Empty; } else { if (parts[i].IndexOf("~", StringComparison.Ordinal) == -1) { // If there's no ~, don't hit the disk. longParts[i - startingElement] = parts[i]; longPath = Path.Combine(longPath, parts[i]); } else { // getFileSystemEntries(...) returns an empty array if longPath doesn't exist. string[] entries = getFileSystemEntries(FileSystemEntity.FilesAndDirectories, longPath, parts[i], null, false); if (0 == entries.Length) { // The next part doesn't exist. Therefore, no more of the path will exist. // Just return the rest. for (int j = i; j < parts.Length; ++j) { longParts[j - startingElement] = parts[j]; } break; } // Since we know there are no wild cards, this should be length one. ErrorUtilities.VerifyThrow(entries.Length == 1, "Unexpected number of entries ({3}) found when enumerating '{0}' under '{1}'. Original path was '{2}'", parts[i], longPath, path, entries.Length); // Entries[0] contains the full path. longPath = entries[0]; // We just want the trailing node. longParts[i - startingElement] = Path.GetFileName(longPath); } } } return pathRoot + String.Join(s_directorySeparator, longParts); } /// <summary> /// Given a filespec, split it into left-most 'fixed' dir part, middle 'wildcard' dir part, and filename part. /// The filename part may have wildcard characters in it. /// </summary> /// <param name="filespec">The filespec to be decomposed.</param> /// <param name="fixedDirectoryPart">Receives the fixed directory part.</param> /// <param name="wildcardDirectoryPart">The wildcard directory part.</param> /// <param name="filenamePart">The filename part.</param> /// <param name="getFileSystemEntries">Delegate.</param> internal static void SplitFileSpec ( string filespec, out string fixedDirectoryPart, out string wildcardDirectoryPart, out string filenamePart, GetFileSystemEntries getFileSystemEntries ) { PreprocessFileSpecForSplitting ( filespec, out fixedDirectoryPart, out wildcardDirectoryPart, out filenamePart ); /* * Handle the special case in which filenamePart is '**'. * In this case, filenamePart becomes '*.*' and the '**' is appended * to the end of the wildcardDirectory part. * This is so that later regular expression matching can accurately * pull out the different parts (fixed, wildcard, filename) of given * file specs. */ if (recursiveDirectoryMatch == filenamePart) { wildcardDirectoryPart += recursiveDirectoryMatch; wildcardDirectoryPart += s_directorySeparator; filenamePart = "*.*"; } fixedDirectoryPart = FileMatcher.GetLongPathName(fixedDirectoryPart, getFileSystemEntries); } /// <summary> /// Do most of the grunt work of splitting the filespec into parts. /// Does not handle post-processing common to the different matching /// paths. /// </summary> /// <param name="filespec">The filespec to be decomposed.</param> /// <param name="fixedDirectoryPart">Receives the fixed directory part.</param> /// <param name="wildcardDirectoryPart">The wildcard directory part.</param> /// <param name="filenamePart">The filename part.</param> private static void PreprocessFileSpecForSplitting ( string filespec, out string fixedDirectoryPart, out string wildcardDirectoryPart, out string filenamePart ) { filespec = FileUtilities.FixFilePath(filespec); int indexOfLastDirectorySeparator = filespec.LastIndexOfAny(directorySeparatorCharacters); if (-1 == indexOfLastDirectorySeparator) { /* * No dir separator found. This is either this form, * * Source.cs * *.cs * * or this form, * * ** */ fixedDirectoryPart = String.Empty; wildcardDirectoryPart = String.Empty; filenamePart = filespec; return; } int indexOfFirstWildcard = filespec.IndexOfAny(s_wildcardCharacters); if ( -1 == indexOfFirstWildcard || indexOfFirstWildcard > indexOfLastDirectorySeparator ) { /* * There is at least one dir separator, but either there is no wild card or the * wildcard is after the dir separator. * * The form is one of these: * * dir1\Source.cs * dir1\*.cs * * Where the trailing spec is meant to be a filename. Or, * * dir1\** * * Where the trailing spec is meant to be any file recursively. */ // We know the fixed director part now. fixedDirectoryPart = filespec.Substring(0, indexOfLastDirectorySeparator + 1); wildcardDirectoryPart = String.Empty; filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1); return; } /* * Find the separator right before the first wildcard. */ string filespecLeftOfWildcard = filespec.Substring(0, indexOfFirstWildcard); int indexOfSeparatorBeforeWildCard = filespecLeftOfWildcard.LastIndexOfAny(directorySeparatorCharacters); if (-1 == indexOfSeparatorBeforeWildCard) { /* * There is no separator before the wildcard, so the form is like this: * * dir?\Source.cs * * or this, * * dir?\** */ fixedDirectoryPart = String.Empty; wildcardDirectoryPart = filespec.Substring(0, indexOfLastDirectorySeparator + 1); filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1); return; } /* * There is at least one wildcard and one dir separator, split parts out. */ fixedDirectoryPart = filespec.Substring(0, indexOfSeparatorBeforeWildCard + 1); wildcardDirectoryPart = filespec.Substring(indexOfSeparatorBeforeWildCard + 1, indexOfLastDirectorySeparator - indexOfSeparatorBeforeWildCard); filenamePart = filespec.Substring(indexOfLastDirectorySeparator + 1); } /// <summary> /// Removes the leading ".\" from all of the paths in the array. /// </summary> /// <param name="paths">Paths to remove .\ from.</param> private static void RemoveInitialDotSlash ( string[] paths ) { for (int i = 0; i < paths.Length; i++) { if (paths[i].StartsWith(s_thisDirectory, StringComparison.Ordinal)) { paths[i] = paths[i].Substring(2); } } } /// <summary> /// Checks if the char is a DirectorySeparatorChar or a AltDirectorySeparatorChar /// </summary> /// <param name="c"></param> /// <returns></returns> internal static bool IsDirectorySeparator(char c) { return (c == Path.DirectorySeparatorChar || c == Path.AltDirectorySeparatorChar); } /// <summary> /// Removes the current directory converting the file back to relative path /// </summary> /// <param name="paths">Paths to remove current directory from.</param> /// <param name="projectDirectory"></param> internal static void RemoveProjectDirectory ( string[] paths, string projectDirectory ) { bool directoryLastCharIsSeparator = IsDirectorySeparator(projectDirectory[projectDirectory.Length - 1]); for (int i = 0; i < paths.Length; i++) { if (paths[i].StartsWith(projectDirectory, StringComparison.Ordinal)) { // If the project directory did not end in a slash we need to check to see if the next char in the path is a slash if (!directoryLastCharIsSeparator) { //If the next char after the project directory is not a slash, skip this path if (paths[i].Length <= projectDirectory.Length || !IsDirectorySeparator(paths[i][projectDirectory.Length])) { continue; } paths[i] = paths[i].Substring(projectDirectory.Length + 1); } else { paths[i] = paths[i].Substring(projectDirectory.Length); } } } } struct RecursiveStepResult { public string[] Files; public string[] Subdirs; public string RemainingWildcardDirectory; } class FilesSearchData { public FilesSearchData( string filespec, // can be null int extensionLengthToEnforce, // only relevant when filespec is not null Regex regexFileMatch, // can be null bool needsRecursion ) { Filespec = filespec; ExtensionLengthToEnforce = extensionLengthToEnforce; RegexFileMatch = regexFileMatch; NeedsRecursion = needsRecursion; } /// <summary> /// The filespec. /// </summary> public string Filespec { get; } public int ExtensionLengthToEnforce { get; } /// <summary> /// Wild-card matching. /// </summary> public Regex RegexFileMatch { get; } /// <summary> /// If true, then recursion is required. /// </summary> public bool NeedsRecursion { get; } } struct RecursionState { /// <summary> /// The directory to search in /// </summary> public string BaseDirectory; /// <summary> /// The remaining, wildcard part of the directory. /// </summary> public string RemainingWildcardDirectory; /// <summary> /// Data about a search that does not change as the search recursively traverses directories /// </summary> public FilesSearchData SearchData; } /// <summary> /// Get all files that match either the file-spec or the regular expression. /// </summary> /// <param name="listOfFiles">List of files that gets populated.</param> /// <param name="recursionState">Information about the search</param> /// <param name="projectDirectory"></param> /// <param name="stripProjectDirectory"></param> /// <param name="getFileSystemEntries">Delegate.</param> /// <param name="searchesToExclude">Patterns to exclude from the results</param> /// <param name="searchesToExcludeInSubdirs">exclude patterns that might activate farther down the directory tree. Keys assume paths are normalized with forward slashes and no trailing slashes</param> private static void GetFilesRecursive ( IList<string> listOfFiles, RecursionState recursionState, string projectDirectory, bool stripProjectDirectory, GetFileSystemEntries getFileSystemEntries, IList<RecursionState> searchesToExclude, Dictionary<string, List<RecursionState>> searchesToExcludeInSubdirs ) { ErrorUtilities.VerifyThrow((recursionState.SearchData.Filespec== null) || (recursionState.SearchData.RegexFileMatch == null), "File-spec overrides the regular expression -- pass null for file-spec if you want to use the regular expression."); ErrorUtilities.VerifyThrow((recursionState.SearchData.Filespec != null) || (recursionState.SearchData.RegexFileMatch != null), "Need either a file-spec or a regular expression to match files."); ErrorUtilities.VerifyThrow(recursionState.RemainingWildcardDirectory != null, "Expected non-null remaning wildcard directory."); // Determine if any of searchesToExclude is necessarily a superset of the results that will be returned. // This means all results will be excluded and we should bail out now. if (searchesToExclude != null) { foreach (var searchToExclude in searchesToExclude) { // The BaseDirectory of all the exclude searches should be the same as the include one Debug.Assert(FileUtilities.PathsEqual(searchToExclude.BaseDirectory, recursionState.BaseDirectory), "Expected exclude search base directory to match include search base directory"); // We can exclude all results in this folder if: if ( // We are matching files based on a filespec and not a regular expression searchToExclude.SearchData.Filespec != null && // The wildcard path portion of the excluded search matches the include search FileUtilities.PathsEqual(searchToExclude.RemainingWildcardDirectory, recursionState.RemainingWildcardDirectory) && // The exclude search will match ALL filenames OR (searchToExclude.SearchData.Filespec == "*" || searchToExclude.SearchData.Filespec == "*.*" || // The exclude search filename pattern matches the include search's pattern (searchToExclude.SearchData.Filespec == recursionState.SearchData.Filespec && searchToExclude.SearchData.ExtensionLengthToEnforce == recursionState.SearchData.ExtensionLengthToEnforce))) { // We won't get any results from this search that we would end up keeping return; } } } RecursiveStepResult nextStep = GetFilesRecursiveStep( recursionState, projectDirectory, stripProjectDirectory, getFileSystemEntries); RecursiveStepResult[] excludeNextSteps = null; if (searchesToExclude != null) { excludeNextSteps = new RecursiveStepResult[searchesToExclude.Count]; for (int i = 0; i < searchesToExclude.Count; i++) { excludeNextSteps[i] = GetFilesRecursiveStep( searchesToExclude[i], projectDirectory, stripProjectDirectory, getFileSystemEntries); } } if (nextStep.Files != null) { HashSet<string> filesToExclude = null; if (excludeNextSteps != null) { filesToExclude = new HashSet<string>(); foreach (var excludeStep in excludeNextSteps) { if (excludeStep.Files != null) { foreach (var file in excludeStep.Files) { filesToExclude.Add(file); } } } } foreach (var file in nextStep.Files) { if (filesToExclude == null || !filesToExclude.Contains(file)) { listOfFiles.Add(file); } } } if (nextStep.Subdirs != null) { foreach (string subdir in nextStep.Subdirs) { // RecursionState is a struct so this copies it var newRecursionState = recursionState; newRecursionState.BaseDirectory = subdir; newRecursionState.RemainingWildcardDirectory = nextStep.RemainingWildcardDirectory; List<RecursionState> newSearchesToExclude = null; if (excludeNextSteps != null) { newSearchesToExclude = new List<RecursionState>(); for (int i = 0; i < excludeNextSteps.Length; i++) { if (excludeNextSteps[i].Subdirs != null && excludeNextSteps[i].Subdirs.Any(excludedDir => FileUtilities.PathsEqual(excludedDir, subdir))) { RecursionState thisExcludeStep = searchesToExclude[i]; thisExcludeStep.BaseDirectory = subdir; thisExcludeStep.RemainingWildcardDirectory = excludeNextSteps[i].RemainingWildcardDirectory; newSearchesToExclude.Add(thisExcludeStep); } } } if (searchesToExcludeInSubdirs != null) { List<RecursionState> searchesForSubdir; // The normalization fixes https://github.com/Microsoft/msbuild/issues/917 // and is a partial fix for https://github.com/Microsoft/msbuild/issues/724 if (searchesToExcludeInSubdirs.TryGetValue(subdir.NormalizeForPathComparison(), out searchesForSubdir)) { // We've found the base directory that these exclusions apply to. So now add them as normal searches if (newSearchesToExclude == null) { newSearchesToExclude = new List<RecursionState>(); } newSearchesToExclude.AddRange(searchesForSubdir); } } // We never want to strip the project directory from the leaves, because the current // process directory maybe different GetFilesRecursive( listOfFiles, newRecursionState, projectDirectory, stripProjectDirectory, getFileSystemEntries, newSearchesToExclude, searchesToExcludeInSubdirs); } } } private static RecursiveStepResult GetFilesRecursiveStep ( RecursionState recursionState, string projectDirectory, bool stripProjectDirectory, GetFileSystemEntries getFileSystemEntries ) { RecursiveStepResult ret = new RecursiveStepResult(); /* * Get the matching files. */ bool considerFiles = false; // Only consider files if... if (recursionState.RemainingWildcardDirectory.Length == 0) { // We've reached the end of the wildcard directory elements. considerFiles = true; } else if (recursionState.RemainingWildcardDirectory.IndexOf(recursiveDirectoryMatch, StringComparison.Ordinal) == 0) { // or, we've reached a "**" so everything else is matched recursively. considerFiles = true; } if (considerFiles) { string[] files = getFileSystemEntries(FileSystemEntity.Files, recursionState.BaseDirectory, recursionState.SearchData.Filespec, projectDirectory, stripProjectDirectory); bool needToProcessEachFile = recursionState.SearchData.Filespec == null || recursionState.SearchData.ExtensionLengthToEnforce != 0; if (needToProcessEachFile) { List<string> listOfFiles = new List<string>(); foreach (string file in files) { if ((recursionState.SearchData.Filespec != null) || // if no file-spec provided, match the file to the regular expression // PERF NOTE: Regex.IsMatch() is an expensive operation, so we avoid it whenever possible recursionState.SearchData.RegexFileMatch.IsMatch(file)) { if ((recursionState.SearchData.Filespec == null) || // if we used a file-spec with a "loosely" defined extension (recursionState.SearchData.ExtensionLengthToEnforce == 0) || // discard all files that do not have extensions of the desired length (Path.GetExtension(file).Length == recursionState.SearchData.ExtensionLengthToEnforce)) { listOfFiles.Add(file); } } } ret.Files = listOfFiles.ToArray(); } else { ret.Files = files; } } /* * Recurse into subdirectories. */ if (recursionState.SearchData.NeedsRecursion && recursionState.RemainingWildcardDirectory.Length > 0) { // Find the next directory piece. string pattern = null; if (!IsRecursiveDirectoryMatch(recursionState.RemainingWildcardDirectory)) { int indexOfNextSlash = recursionState.RemainingWildcardDirectory.IndexOfAny(directorySeparatorCharacters); ErrorUtilities.VerifyThrow(indexOfNextSlash != -1, "Slash should be guaranteed."); pattern = recursionState.RemainingWildcardDirectory.Substring(0, indexOfNextSlash); if (pattern == recursiveDirectoryMatch) { // If pattern turned into **, then there's no choice but to enumerate everything. pattern = null; recursionState.RemainingWildcardDirectory = recursiveDirectoryMatch; } else { // Peel off the leftmost directory piece. So for example, if remainingWildcardDirectory // contains: // // ?emp\foo\**\bar // // then put '?emp' into pattern. Then put the remaining part, // // foo\**\bar // // back into remainingWildcardDirectory. // This is a performance optimization. We don't want to enumerate everything if we // don't have to. recursionState.RemainingWildcardDirectory = recursionState.RemainingWildcardDirectory.Substring(indexOfNextSlash + 1); } } ret.RemainingWildcardDirectory = recursionState.RemainingWildcardDirectory; ret.Subdirs = getFileSystemEntries(FileSystemEntity.Directories, recursionState.BaseDirectory, pattern, null, false); } return ret; } /// <summary> /// Given a file spec, create a regular expression that will match that /// file spec. /// /// PERF WARNING: this method is called in performance-critical /// scenarios, so keep it fast and cheap /// </summary> /// <param name="fixedDirectoryPart">The fixed directory part.</param> /// <param name="wildcardDirectoryPart">The wildcard directory part.</param> /// <param name="filenamePart">The filename part.</param> /// <param name="isLegalFileSpec">Receives whether this pattern is legal or not.</param> /// <returns>The regular expression string.</returns> private static string RegularExpressionFromFileSpec ( string fixedDirectoryPart, string wildcardDirectoryPart, string filenamePart, out bool isLegalFileSpec ) { isLegalFileSpec = true; /* * The code below uses tags in the form <:tag:> to encode special information * while building the regular expression. * * This format was chosen because it's not a legal form for filespecs. If the * filespec comes in with either "<:" or ":>", return isLegalFileSpec=false to * prevent intrusion into the special processing. */ if ((fixedDirectoryPart.IndexOf("<:", StringComparison.Ordinal) != -1) || (fixedDirectoryPart.IndexOf(":>", StringComparison.Ordinal) != -1) || (wildcardDirectoryPart.IndexOf("<:", StringComparison.Ordinal) != -1) || (wildcardDirectoryPart.IndexOf(":>", StringComparison.Ordinal) != -1) || (filenamePart.IndexOf("<:", StringComparison.Ordinal) != -1) || (filenamePart.IndexOf(":>", StringComparison.Ordinal) != -1)) { isLegalFileSpec = false; return String.Empty; } /* * Its not legal for there to be a ".." after a wildcard. */ if (wildcardDirectoryPart.Contains(dotdot)) { isLegalFileSpec = false; return String.Empty; } /* * Trailing dots in file names have to be treated specially. * We want: * * *. to match foo * * but 'foo' doesn't have a trailing '.' so we need to handle this while still being careful * not to match 'foo.txt' */ if (filenamePart.EndsWith(".", StringComparison.Ordinal)) { filenamePart = filenamePart.Replace("*", "<:anythingbutdot:>"); filenamePart = filenamePart.Replace("?", "<:anysinglecharacterbutdot:>"); filenamePart = filenamePart.Substring(0, filenamePart.Length - 1); } /* * Now, build up the starting filespec but put tags in to identify where the fixedDirectory, * wildcardDirectory and filenamePart are. Also tag the beginning of the line and the end of * the line, so that we can identify patterns by whether they're on one end or the other. */ StringBuilder matchFileExpression = new StringBuilder(); matchFileExpression.Append("<:bol:>"); matchFileExpression.Append("<:fixeddir:>").Append(fixedDirectoryPart).Append("<:endfixeddir:>"); matchFileExpression.Append("<:wildcarddir:>").Append(wildcardDirectoryPart).Append("<:endwildcarddir:>"); matchFileExpression.Append("<:filename:>").Append(filenamePart).Append("<:endfilename:>"); matchFileExpression.Append("<:eol:>"); /* * Call out our special matching characters. */ foreach (var separator in directorySeparatorStrings) { matchFileExpression.Replace(separator, "<:dirseparator:>"); } /* * Capture the leading \\ in UNC paths, so that the doubled slash isn't * reduced in a later step. */ matchFileExpression.Replace("<:fixeddir:><:dirseparator:><:dirseparator:>", "<:fixeddir:><:uncslashslash:>"); /* * Iteratively reduce four cases involving directory separators * * (1) <:dirseparator:>.<:dirseparator:> -> <:dirseparator:> * This is an identity, so for example, these two are equivalent, * * dir1\.\dir2 == dir1\dir2 * * (2) <:dirseparator:><:dirseparator:> -> <:dirseparator:> * Double directory separators are treated as a single directory separator, * so, for example, this is an identity: * * f:\dir1\\dir2 == f:\dir1\dir2 * * The single exemption is for UNC path names, like this: * * \\server\share != \server\share * * This case is handled by the <:uncslashslash:> which was substituted in * a prior step. * * (3) <:fixeddir:>.<:dirseparator:>.<:dirseparator:> -> <:fixeddir:>.<:dirseparator:> * A ".\" at the beginning of a line is equivalent to nothing, so: * * .\.\dir1\file.txt == .\dir1\file.txt * * (4) <:dirseparator:>.<:eol:> -> <:eol:> * A "\." at the end of a line is equivalent to nothing, so: * * dir1\dir2\. == dir1\dir2 * */ int sizeBefore; do { sizeBefore = matchFileExpression.Length; // NOTE: all these replacements will necessarily reduce the expression length i.e. length will either reduce or // stay the same through this loop matchFileExpression.Replace("<:dirseparator:>.<:dirseparator:>", "<:dirseparator:>"); matchFileExpression.Replace("<:dirseparator:><:dirseparator:>", "<:dirseparator:>"); matchFileExpression.Replace("<:fixeddir:>.<:dirseparator:>.<:dirseparator:>", "<:fixeddir:>.<:dirseparator:>"); matchFileExpression.Replace("<:dirseparator:>.<:endfilename:>", "<:endfilename:>"); matchFileExpression.Replace("<:filename:>.<:endfilename:>", "<:filename:><:endfilename:>"); ErrorUtilities.VerifyThrow(matchFileExpression.Length <= sizeBefore, "Expression reductions cannot increase the length of the expression."); } while (matchFileExpression.Length < sizeBefore); /* * Collapse **\** into **. */ do { sizeBefore = matchFileExpression.Length; matchFileExpression.Replace(recursiveDirectoryMatch + "<:dirseparator:>" + recursiveDirectoryMatch, recursiveDirectoryMatch); ErrorUtilities.VerifyThrow(matchFileExpression.Length <= sizeBefore, "Expression reductions cannot increase the length of the expression."); } while (matchFileExpression.Length < sizeBefore); /* * Call out legal recursion operators: * * fixed-directory + **\ * \**\ * **\** * */ do { sizeBefore = matchFileExpression.Length; matchFileExpression.Replace("<:dirseparator:>" + recursiveDirectoryMatch + "<:dirseparator:>", "<:middledirs:>"); matchFileExpression.Replace("<:wildcarddir:>" + recursiveDirectoryMatch + "<:dirseparator:>", "<:wildcarddir:><:leftdirs:>"); ErrorUtilities.VerifyThrow(matchFileExpression.Length <= sizeBefore, "Expression reductions cannot increase the length of the expression."); } while (matchFileExpression.Length < sizeBefore); /* * By definition, "**" must appear alone between directory slashes. If there is any remaining "**" then this is not * a valid filespec. */ // NOTE: this condition is evaluated left-to-right -- this is important because we want the length BEFORE stripping // any "**"s remaining in the expression if (matchFileExpression.Length > matchFileExpression.Replace(recursiveDirectoryMatch, null).Length) { isLegalFileSpec = false; return String.Empty; } /* * Remaining call-outs not involving "**" */ matchFileExpression.Replace("*.*", "<:anynonseparator:>"); matchFileExpression.Replace("*", "<:anynonseparator:>"); matchFileExpression.Replace("?", "<:singlecharacter:>"); /* * Escape all special characters defined for regular expresssions. */ matchFileExpression.Replace("\\", "\\\\"); // Must be first. matchFileExpression.Replace("$", "\\$"); matchFileExpression.Replace("(", "\\("); matchFileExpression.Replace(")", "\\)"); matchFileExpression.Replace("*", "\\*"); matchFileExpression.Replace("+", "\\+"); matchFileExpression.Replace(".", "\\."); matchFileExpression.Replace("[", "\\["); matchFileExpression.Replace("?", "\\?"); matchFileExpression.Replace("^", "\\^"); matchFileExpression.Replace("{", "\\{"); matchFileExpression.Replace("|", "\\|"); /* * Now, replace call-outs with their regex equivalents. */ matchFileExpression.Replace("<:middledirs:>", "((/)|(\\\\)|(/.*/)|(/.*\\\\)|(\\\\.*\\\\)|(\\\\.*/))"); matchFileExpression.Replace("<:leftdirs:>", "((.*/)|(.*\\\\)|())"); matchFileExpression.Replace("<:rightdirs:>", ".*"); matchFileExpression.Replace("<:anything:>", ".*"); matchFileExpression.Replace("<:anythingbutdot:>", "[^\\.]*"); matchFileExpression.Replace("<:anysinglecharacterbutdot:>", "[^\\.]."); matchFileExpression.Replace("<:anynonseparator:>", "[^/\\\\]*"); matchFileExpression.Replace("<:singlecharacter:>", "."); matchFileExpression.Replace("<:dirseparator:>", "[/\\\\]+"); matchFileExpression.Replace("<:uncslashslash:>", @"\\\\"); matchFileExpression.Replace("<:bol:>", "^"); matchFileExpression.Replace("<:eol:>", "$"); matchFileExpression.Replace("<:fixeddir:>", "(?<FIXEDDIR>"); matchFileExpression.Replace("<:endfixeddir:>", ")"); matchFileExpression.Replace("<:wildcarddir:>", "(?<WILDCARDDIR>"); matchFileExpression.Replace("<:endwildcarddir:>", ")"); matchFileExpression.Replace("<:filename:>", "(?<FILENAME>"); matchFileExpression.Replace("<:endfilename:>", ")"); return matchFileExpression.ToString(); } /// <summary> /// Given a filespec, get the information needed for file matching. /// </summary> /// <param name="filespec">The filespec.</param> /// <param name="regexFileMatch">Receives the regular expression.</param> /// <param name="needsRecursion">Receives the flag that is true if recursion is required.</param> /// <param name="isLegalFileSpec">Receives the flag that is true if the filespec is legal.</param> /// <param name="getFileSystemEntries">Delegate.</param> internal static void GetFileSpecInfoWithRegexObject ( string filespec, out Regex regexFileMatch, out bool needsRecursion, out bool isLegalFileSpec, GetFileSystemEntries getFileSystemEntries ) { string fixedDirectoryPart; string wildcardDirectoryPart; string filenamePart; string matchFileExpression; GetFileSpecInfo(filespec, out fixedDirectoryPart, out wildcardDirectoryPart, out filenamePart, out matchFileExpression, out needsRecursion, out isLegalFileSpec, getFileSystemEntries); regexFileMatch = isLegalFileSpec ? new Regex(matchFileExpression, DefaultRegexOptions) : null; } internal delegate Tuple<string, string, string> FixupParts( string fixedDirectoryPart, string recursiveDirectoryPart, string filenamePart); /// <summary> /// Given a filespec, parse it and construct the regular expression string. /// </summary> /// <param name="filespec">The filespec.</param> /// <param name="fixedDirectoryPart">Receives the fixed directory part.</param> /// <param name="wildcardDirectoryPart">Receives the wildcard directory part.</param> /// <param name="filenamePart">Receives the filename part.</param> /// <param name="matchFileExpression">Receives the regular expression.</param> /// <param name="needsRecursion">Receives the flag that is true if recursion is required.</param> /// <param name="isLegalFileSpec">Receives the flag that is true if the filespec is legal.</param> /// <param name="getFileSystemEntries">Delegate.</param> /// <param name="fixupParts">hook method to further change the parts</param> internal static void GetFileSpecInfo ( string filespec, out string fixedDirectoryPart, out string wildcardDirectoryPart, out string filenamePart, out string matchFileExpression, out bool needsRecursion, out bool isLegalFileSpec, GetFileSystemEntries getFileSystemEntries, FixupParts fixupParts = null ) { isLegalFileSpec = true; needsRecursion = false; fixedDirectoryPart = String.Empty; wildcardDirectoryPart = String.Empty; filenamePart = String.Empty; matchFileExpression = null; if (!RawFileSpecIsValid(filespec)) { isLegalFileSpec = false; return; } /* * Now break up the filespec into constituent parts--fixed, wildcard and filename. */ SplitFileSpec(filespec, out fixedDirectoryPart, out wildcardDirectoryPart, out filenamePart, getFileSystemEntries); if (fixupParts != null) { var newParts = fixupParts(fixedDirectoryPart, wildcardDirectoryPart, filenamePart); // todo use named tuples when they'll be available fixedDirectoryPart = newParts.Item1; wildcardDirectoryPart = newParts.Item2; filenamePart = newParts.Item3; } /* * Get a regular expression for matching files that will be found. */ matchFileExpression = RegularExpressionFromFileSpec(fixedDirectoryPart, wildcardDirectoryPart, filenamePart, out isLegalFileSpec); /* * Was the filespec valid? If not, then just return now. */ if (!isLegalFileSpec) { return; } /* * Determine whether recursion will be required. */ needsRecursion = (wildcardDirectoryPart.Length != 0); } internal static bool RawFileSpecIsValid(string filespec) { // filespec cannot contain illegal characters if (-1 != filespec.IndexOfAny(s_invalidPathChars)) { return false; } /* * Check for patterns in the filespec that are explicitly illegal. * * Any path with "..." in it is illegal. */ if (-1 != filespec.IndexOf("...", StringComparison.Ordinal)) { return false; } /* * If there is a ':' anywhere but the second character, this is an illegal pattern. * Catches this case among others, * * http://www.website.com * */ int rightmostColon = filespec.LastIndexOf(":", StringComparison.Ordinal); if ( -1 != rightmostColon && 1 != rightmostColon ) { return false; } return true; } /// <summary> /// The results of a match between a filespec and a file name. /// </summary> internal sealed class Result { /// <summary> /// Default constructor. /// </summary> internal Result() { // do nothing } internal bool isLegalFileSpec; // initially false internal bool isMatch; // initially false internal bool isFileSpecRecursive; // initially false internal string fixedDirectoryPart = String.Empty; internal string wildcardDirectoryPart = String.Empty; internal string filenamePart = String.Empty; } /// <summary> /// Given a pattern (filespec) and a candidate filename (fileToMatch) /// return matching information. /// </summary> /// <param name="filespec">The filespec.</param> /// <param name="fileToMatch">The candidate to match against.</param> /// <returns>The result class.</returns> internal static Result FileMatch ( string filespec, string fileToMatch ) { Result matchResult = new Result(); fileToMatch = GetLongPathName(fileToMatch, s_defaultGetFileSystemEntries); Regex regexFileMatch; GetFileSpecInfoWithRegexObject ( filespec, out regexFileMatch, out matchResult.isFileSpecRecursive, out matchResult.isLegalFileSpec, s_defaultGetFileSystemEntries ); if (matchResult.isLegalFileSpec) { GetRegexMatchInfo( fileToMatch, regexFileMatch, out matchResult.isMatch, out matchResult.fixedDirectoryPart, out matchResult.wildcardDirectoryPart, out matchResult.filenamePart); } return matchResult; } internal static void GetRegexMatchInfo( string fileToMatch, Regex fileSpecRegex, out bool isMatch, out string fixedDirectoryPart, out string wildcardDirectoryPart, out string filenamePart) { Match match = fileSpecRegex.Match(fileToMatch); isMatch = match.Success; fixedDirectoryPart = string.Empty; wildcardDirectoryPart = String.Empty; filenamePart = string.Empty; if (isMatch) { fixedDirectoryPart = match.Groups["FIXEDDIR"].Value; wildcardDirectoryPart = match.Groups["WILDCARDDIR"].Value; filenamePart = match.Groups["FILENAME"].Value; } } /// <summary> /// Given a filespec, find the files that match. /// Will never throw IO exceptions: if there is no match, returns the input verbatim. /// </summary> /// <param name="projectDirectoryUnescaped">The project directory.</param> /// <param name="filespecUnescaped">Get files that match the given file spec.</param> /// <param name="excludeSpecsUnescaped">Exclude files that match this file spec.</param> /// <returns>The array of files.</returns> internal static string[] GetFiles ( string projectDirectoryUnescaped, string filespecUnescaped, IEnumerable<string> excludeSpecsUnescaped = null ) { string[] files = GetFiles(projectDirectoryUnescaped, filespecUnescaped, excludeSpecsUnescaped, s_defaultGetFileSystemEntries, s_defaultDirectoryExists); return files; } enum SearchAction { RunSearch, ReturnFileSpec, ReturnEmptyList, } static SearchAction GetFileSearchData(string projectDirectoryUnescaped, string filespecUnescaped, GetFileSystemEntries getFileSystemEntries, DirectoryExists directoryExists, out bool stripProjectDirectory, out RecursionState result) { stripProjectDirectory = false; result = new RecursionState(); string fixedDirectoryPart; string wildcardDirectoryPart; string filenamePart; string matchFileExpression; bool needsRecursion; bool isLegalFileSpec; GetFileSpecInfo ( filespecUnescaped, out fixedDirectoryPart, out wildcardDirectoryPart, out filenamePart, out matchFileExpression, out needsRecursion, out isLegalFileSpec, getFileSystemEntries ); /* * If the filespec is invalid, then just return now. */ if (!isLegalFileSpec) { return SearchAction.ReturnFileSpec; } // The projectDirectory is not null only if we are running the evaluation from // inside the engine (i.e. not from a task) if (projectDirectoryUnescaped != null) { if (fixedDirectoryPart != null) { string oldFixedDirectoryPart = fixedDirectoryPart; try { fixedDirectoryPart = Path.Combine(projectDirectoryUnescaped, fixedDirectoryPart); } catch (ArgumentException) { return SearchAction.ReturnEmptyList; } stripProjectDirectory = !String.Equals(fixedDirectoryPart, oldFixedDirectoryPart, StringComparison.OrdinalIgnoreCase); } else { fixedDirectoryPart = projectDirectoryUnescaped; stripProjectDirectory = true; } } /* * If the fixed directory part doesn't exist, then this means no files should be * returned. */ if (fixedDirectoryPart.Length > 0 && !directoryExists(fixedDirectoryPart)) { return SearchAction.ReturnEmptyList; } // determine if we need to use the regular expression to match the files // PERF NOTE: Constructing a Regex object is expensive, so we avoid it whenever possible bool matchWithRegex = // if we have a directory specification that uses wildcards, and (wildcardDirectoryPart.Length > 0) && // the specification is not a simple "**" !IsRecursiveDirectoryMatch(wildcardDirectoryPart); // then we need to use the regular expression // if we're not using the regular expression, get the file pattern extension string extensionPart = matchWithRegex ? null : Path.GetExtension(filenamePart); // check if the file pattern would cause Windows to match more loosely on the extension // NOTE: Windows matches loosely in two cases (in the absence of the * wildcard in the extension): // 1) if the extension ends with the ? wildcard, it matches files with shorter extensions also e.g. "file.tx?" would // match both "file.txt" and "file.tx" // 2) if the extension is three characters, and the filename contains the * wildcard, it matches files with longer // extensions that start with the same three characters e.g. "*.htm" would match both "file.htm" and "file.html" bool needToEnforceExtensionLength = (extensionPart != null) && (extensionPart.IndexOf('*') == -1) && (extensionPart.EndsWith("?", StringComparison.Ordinal) || ((extensionPart.Length == (3 + 1 /* +1 for the period */)) && (filenamePart.IndexOf('*') != -1))); var searchData = new FilesSearchData( // if using the regular expression, ignore the file pattern (matchWithRegex ? null : filenamePart), (needToEnforceExtensionLength ? extensionPart.Length : 0), // if using the file pattern, ignore the regular expression (matchWithRegex ? new Regex(matchFileExpression, RegexOptions.IgnoreCase) : null), needsRecursion); result.SearchData = searchData; result.BaseDirectory = fixedDirectoryPart; result.RemainingWildcardDirectory = wildcardDirectoryPart; return SearchAction.RunSearch; } static string[] CreateArrayWithSingleItemIfNotExcluded(string filespecUnescaped, IEnumerable<string> excludeSpecsUnescaped) { if (excludeSpecsUnescaped != null) { foreach (string excludeSpec in excludeSpecsUnescaped) { // Try a path equality check first to: // - avoid the expensive regex // - maintain legacy behaviour where an illegal filespec is treated as a normal string if (FileUtilities.PathsEqual(filespecUnescaped, excludeSpec)) { return new string[0]; } var match = FileMatch(excludeSpec, filespecUnescaped); if (match.isLegalFileSpec && match.isMatch) { return new string[0]; } } } return new[] { filespecUnescaped }; } /// <summary> /// Given a filespec, find the files that match. /// Will never throw IO exceptions: if there is no match, returns the input verbatim. /// </summary> /// <param name="projectDirectoryUnescaped">The project directory.</param> /// <param name="filespecUnescaped">Get files that match the given file spec.</param> /// <param name="excludeSpecsUnescaped">Exclude files that match this file spec.</param> /// <param name="getFileSystemEntries">Get files that match the given file spec.</param> /// <param name="directoryExists">Determine whether a directory exists.</param> /// <returns>The array of files.</returns> internal static string[] GetFiles ( string projectDirectoryUnescaped, string filespecUnescaped, IEnumerable<string> excludeSpecsUnescaped, GetFileSystemEntries getFileSystemEntries, DirectoryExists directoryExists ) { // For performance. Short-circuit iff there is no wildcard. // Perf Note: Doing a [Last]IndexOfAny(...) is much faster than compiling a // regular expression that does the same thing, regardless of whether // filespec contains one of the characters. // Choose LastIndexOfAny instead of IndexOfAny because it seems more likely // that wildcards will tend to be towards the right side. if (!HasWildcards(filespecUnescaped)) { return CreateArrayWithSingleItemIfNotExcluded(filespecUnescaped, excludeSpecsUnescaped); } // UNDONE (perf): Short circuit the complex processing when we only have a path and a wildcarded filename /* * Analyze the file spec and get the information we need to do the matching. */ bool stripProjectDirectory; RecursionState state; var action = GetFileSearchData(projectDirectoryUnescaped, filespecUnescaped, getFileSystemEntries, directoryExists, out stripProjectDirectory, out state); if (action == SearchAction.ReturnEmptyList) { return new string[0]; } else if (action == SearchAction.ReturnFileSpec) { return CreateArrayWithSingleItemIfNotExcluded(filespecUnescaped, excludeSpecsUnescaped); } else if (action != SearchAction.RunSearch) { // This means the enum value wasn't valid (or a new one was added without updating code correctly) throw new NotSupportedException(action.ToString()); } List<RecursionState> searchesToExclude = null; // Exclude searches which will become active when the recursive search reaches their BaseDirectory. // The BaseDirectory of the exclude search is the key for this dictionary. Dictionary<string, List<RecursionState>> searchesToExcludeInSubdirs = null; HashSet<string> resultsToExclude = null; if (excludeSpecsUnescaped != null) { searchesToExclude = new List<RecursionState>(); foreach (string excludeSpec in excludeSpecsUnescaped) { // This is ignored, we always use the include pattern's value for stripProjectDirectory bool excludeStripProjectDirectory; RecursionState excludeState; var excludeAction = GetFileSearchData(projectDirectoryUnescaped, excludeSpec, getFileSystemEntries, directoryExists, out excludeStripProjectDirectory, out excludeState); if (excludeAction == SearchAction.ReturnFileSpec) { if (resultsToExclude == null) { resultsToExclude = new HashSet<string>(); } resultsToExclude.Add(excludeSpec); continue; } else if (excludeAction == SearchAction.ReturnEmptyList) { // Nothing to do continue; } else if (excludeAction != SearchAction.RunSearch) { // This means the enum value wasn't valid (or a new one was added without updating code correctly) throw new NotSupportedException(excludeAction.ToString()); } var excludeBaseDirectoryNormalized = excludeState.BaseDirectory.NormalizeForPathComparison(); var includeBaseDirectoryNormalized = state.BaseDirectory.NormalizeForPathComparison(); if (excludeBaseDirectoryNormalized != includeBaseDirectoryNormalized) { // What to do if the BaseDirectory for the exclude search doesn't match the one for inclusion? // - If paths don't match (one isn't a prefix of the other), then ignore the exclude search. Examples: // - c:\Foo\ - c:\Bar\ // - c:\Foo\Bar\ - C:\Foo\Baz\ // - c:\Foo\ - c:\Foo2\ if (excludeBaseDirectoryNormalized.Length == includeBaseDirectoryNormalized.Length) { // Same length, but different paths. Ignore this exclude search continue; } else if (excludeBaseDirectoryNormalized.Length > includeBaseDirectoryNormalized.Length) { if (!excludeBaseDirectoryNormalized.StartsWith(includeBaseDirectoryNormalized)) { // Exclude path is longer, but doesn't start with include path. So ignore it. continue; } // - The exclude BaseDirectory is somewhere under the include BaseDirectory. So // keep the exclude search, but don't do any processing on it while recursing until the baseDirectory // in the recursion matches the exclude BaseDirectory. Examples: // - Include - Exclude // - C:\git\msbuild\ - c:\git\msbuild\obj\ // - C:\git\msbuild\ - c:\git\msbuild\src\Common\ if (searchesToExcludeInSubdirs == null) { searchesToExcludeInSubdirs = new Dictionary<string, List<RecursionState>>(); } List<RecursionState> listForSubdir; if (!searchesToExcludeInSubdirs.TryGetValue(excludeBaseDirectoryNormalized, out listForSubdir)) { listForSubdir = new List<RecursionState>(); // The normalization fixes https://github.com/Microsoft/msbuild/issues/917 // and is a partial fix for https://github.com/Microsoft/msbuild/issues/724 searchesToExcludeInSubdirs[excludeBaseDirectoryNormalized] = listForSubdir; } listForSubdir.Add(excludeState); } else { // Exclude base directory length is less than include base directory length. if (!state.BaseDirectory.StartsWith(excludeState.BaseDirectory)) { // Include path is longer, but doesn't start with the exclude path. So ignore exclude path // (since it won't match anything under the include path) continue; } // Now check the wildcard part if (excludeState.RemainingWildcardDirectory.Length == 0) { // The wildcard part is empty, so ignore the exclude search, as it's looking for files non-recursively // in a folder higher up than the include baseDirectory. // Example: include="c:\git\msbuild\src\Framework\**\*.cs" exclude="c:\git\msbuild\*.cs" continue; } else if (IsRecursiveDirectoryMatch(excludeState.RemainingWildcardDirectory)) { // The wildcard part is exactly "**\", so the exclude pattern will apply to everything in the include // pattern, so simply update the exclude's BaseDirectory to be the same as the include baseDirectory // Example: include="c:\git\msbuild\src\Framework\**\*.*" exclude="c:\git\msbuild\**\*.bak" excludeState.BaseDirectory = state.BaseDirectory; searchesToExclude.Add(excludeState); } else { // The wildcard part is non-empty and not "**\", so we will need to match it with a Regex. Fortunately // these conditions mean that it needs to be matched with a Regex anyway, so here we will update the // BaseDirectory to be the same as the exclude BaseDirectory, and change the wildcard part to be "**\" // because we don't know where the different parts of the exclude wildcard part would be matched. // Example: include="c:\git\msbuild\src\Framework\**\*.*" exclude="c:\git\msbuild\**\bin\**\*.*" Debug.Assert(excludeState.SearchData.RegexFileMatch != null, "Expected Regex to be used for exclude file matching"); excludeState.BaseDirectory = state.BaseDirectory; excludeState.RemainingWildcardDirectory = recursiveDirectoryMatch + s_directorySeparator; searchesToExclude.Add(excludeState); } } } else { searchesToExclude.Add(excludeState); } } } if (searchesToExclude != null && searchesToExclude.Count == 0) { searchesToExclude = null; } /* * Even though we return a string[] we work internally with an IList. * This is because it's cheaper to add items to an IList and this code * might potentially do a lot of that. */ var listOfFiles = new List<string>(); /* * Now get the files that match, starting at the lowest fixed directory. */ try { GetFilesRecursive( listOfFiles, state, projectDirectoryUnescaped, stripProjectDirectory, getFileSystemEntries, searchesToExclude, searchesToExcludeInSubdirs); } catch (Exception ex) when (ExceptionHandling.IsIoRelatedException(ex)) { // Assume it's not meant to be a path return CreateArrayWithSingleItemIfNotExcluded(filespecUnescaped, excludeSpecsUnescaped); } /* * Build the return array. */ var files = resultsToExclude != null ? listOfFiles.Where(f => !resultsToExclude.Contains(f)).ToArray() : listOfFiles.ToArray(); return files; } private static bool IsRecursiveDirectoryMatch(string path) => path.TrimTrailingSlashes() == recursiveDirectoryMatch; } }