private bool TryGetObservedFileAccesses()

in Public/Src/Engine/Processes/SandboxedProcessPipExecutor.cs [3790:4249]


        private bool TryGetObservedFileAccesses(
            SandboxedProcessResult result,
            HashSet<AbsolutePath> allInputPathsUnderSharedOpaques,
            out List<AbsolutePath> unobservedOutputs,
            out IReadOnlyDictionary<AbsolutePath, IReadOnlyCollection<FileArtifactWithAttributes>> sharedDynamicDirectoryWriteAccesses,
            out SortedReadOnlyArray<ObservedFileAccess, ObservedFileAccessExpandedPathComparer> observedAccesses,
            out IReadOnlySet<AbsolutePath> createdDirectories)
        {
            unobservedOutputs = null;
            if (result.ExplicitlyReportedFileAccesses == null || result.ExplicitlyReportedFileAccesses.Count == 0)
            {
                unobservedOutputs = m_pip.FileOutputs.Where(f => RequireOutputObservation(f)).Select(f => f.Path).ToList();
                sharedDynamicDirectoryWriteAccesses = CollectionUtilities.EmptyDictionary<AbsolutePath, IReadOnlyCollection<FileArtifactWithAttributes>>();
                observedAccesses = SortedReadOnlyArray<ObservedFileAccess, ObservedFileAccessExpandedPathComparer>.FromSortedArrayUnsafe(
                        ReadOnlyArray<ObservedFileAccess>.Empty,
                        new ObservedFileAccessExpandedPathComparer(m_context.PathTable.ExpandedPathComparer));
                createdDirectories = CollectionUtilities.EmptySet<AbsolutePath>();

                return true;
            }

            bool sharedOutputDirectoriesAreRedirected = m_pip.NeedsToRunInContainer && m_pip.ContainerIsolationLevel.IsolateSharedOpaqueOutputDirectories();

            // Note that we are enumerating an unordered set to produce the array of observed paths.
            // As noted in SandboxedProcessPipExecutionResult, the caller must assume no particular order.
            // Since observed accesses contribute to a descriptor value (rather than a hashed key), this is fine; no normalization needed.
            // Since we're projecting many acceses into groups per path into just paths, we need a temporary dictionary.
            // TODO: Allocations ahoy!
            using (PooledObjectWrapper<Dictionary<AbsolutePath, CompactSet<ReportedFileAccess>>> accessesByPathWrapper = ProcessPools.ReportedFileAccessesByPathPool.GetInstance())
            {
                Dictionary<AbsolutePath, CompactSet<ReportedFileAccess>> accessesByPath = accessesByPathWrapper.Instance;
                var excludedToolsAndPaths = new HashSet<(AbsolutePath, AbsolutePath)>();

                foreach (ReportedFileAccess reported in result.ExplicitlyReportedFileAccesses)
                {
                    Contract.Assert(
                        reported.Status == FileAccessStatus.Allowed || reported.Method == FileAccessStatusMethod.FileExistenceBased,
                        "Explicitly reported accesses are defined to be successful or denied only based on file existence");

                    // Enumeration probes have a corresponding Enumeration access (also explicitly reported).
                    // Presently we are interested in capturing the existence of enumerations themselves rather than what was seen
                    // (and for NtQueryDirectoryFile, we can't always report the individual probes anyway).
                    if (reported.RequestedAccess == RequestedAccess.EnumerationProbe)
                    {
                        // If it is an incremental tool and the pip allows preserving outputs, then do not ignore because
                        // the tool may depend on the directory membership.
                        if (!IsIncrementalToolAccess(reported))
                        {
                            continue;
                        }
                    }

                    AbsolutePath parsedPath;

                    // We want an AbsolutePath for the full access. This may not be parse-able due to the accessed path
                    // being invalid, or a path format we do not understand. Note that TryParseAbsolutePath logs as appropriate
                    // in the latter case.
                    if (!reported.TryParseAbsolutePath(m_context, m_loggingContext, m_pip, out parsedPath))
                    {
                        continue;
                    }

                    bool shouldExclude = false;

                    // Remove special accesses see Bug: #121875.
                    // Some perform file accesses, which don't yet fall into any configurable file access manifest category.
                    // These special tools/cases should be allowlisted, but we already have customers deployed specs without
                    // using allowlists.
                    if (GetSpecialCaseRulesForCoverageAndSpecialDevices(parsedPath))
                    {
                        shouldExclude = true;
                    }
                    else
                    {
                        if (AbsolutePath.TryCreate(m_context.PathTable, reported.Process.Path, out AbsolutePath processPath)
                            && (excludedToolsAndPaths.Contains((processPath, parsedPath))
                                || GetSpecialCaseRulesForSpecialTools(processPath, parsedPath)))
                        {
                            shouldExclude = true;
                            excludedToolsAndPaths.Add((processPath, parsedPath));
                        }
                    }

                    accessesByPath.TryGetValue(parsedPath, out CompactSet<ReportedFileAccess> existingAccessesToPath);
                    accessesByPath[parsedPath] = !shouldExclude ? existingAccessesToPath.Add(reported) : existingAccessesToPath;
                }

                foreach (var output in m_pip.FileOutputs)
                {
                    if (!accessesByPath.ContainsKey(output.Path))
                    {
                        if (RequireOutputObservation(output))
                        {
                            unobservedOutputs ??= new List<AbsolutePath>();
                            unobservedOutputs.Add(output.Path);
                        }
                    }
                    else
                    {
                        accessesByPath.Remove(output.Path);
                    }
                }

                using (PooledObjectWrapper<Dictionary<AbsolutePath, HashSet<AbsolutePath>>> dynamicWriteAccessWrapper = ProcessPools.DynamicWriteAccesses.GetInstance())
                using (PooledObjectWrapper<Dictionary<AbsolutePath, ObservedFileAccess>> accessesUnsortedWrapper = ProcessPools.AccessUnsorted.GetInstance())
                using (var excludedPathsWrapper = Pools.GetAbsolutePathSet())
                using (var maybeUnresolvedAbsentAccessessWrapper = Pools.GetAbsolutePathSet())
                using (var fileExistenceDenialsWrapper = Pools.GetAbsolutePathSet())
                using (var createdDirectoriesMutableWrapper = Pools.GetAbsolutePathSet())
                {
                    var fileExistenceDenials = fileExistenceDenialsWrapper.Instance;
                    var createdDirectoriesMutable = createdDirectoriesMutableWrapper.Instance;
                    var maybeUnresolvedAbsentAccesses = maybeUnresolvedAbsentAccessessWrapper.Instance;

                    // Initializes all shared directories in the pip with no accesses
                    var dynamicWriteAccesses = dynamicWriteAccessWrapper.Instance;
                    foreach (var sharedDirectory in m_sharedOpaqueDirectoryRoots.Keys)
                    {
                        dynamicWriteAccesses[sharedDirectory] = new HashSet<AbsolutePath>();
                    }

                    // Remove all the special file accesses that need removal.
                    RemoveEmptyOrInjectableFileAccesses(accessesByPath);

                    var accessesUnsorted = accessesUnsortedWrapper.Instance;
                    foreach (KeyValuePair<AbsolutePath, CompactSet<ReportedFileAccess>> entry in accessesByPath)
                    {
                        bool? isDirectoryLocation = null;
                        bool hasEnumeration = false;
                        bool isProbe = true;

                        // There is always at least one access for reported path by construction
                        // Since the set of accesses occur on the same path, the manifest path is
                        // the same for all of them. We only need to query one of them.
                        ReportedFileAccess firstAccess = entry.Value.First();

                        // Discard entries that have a single MacLookup report on a path that contains an intermediate directory symlink.
                        // Reason: observed accesses computed here should only contain fully expanded paths to avoid ambiguity;
                        //         on Mac, all access reports except for MacLookup report fully expanded paths, so only MacLookup paths need to be curated
                        if (entry.Value.Count == 1 &&
                            firstAccess.Operation == ReportedFileOperation.MacLookup &&
                            firstAccess.ManifestPath.IsValid &&
                            CheckIfPathContainsSymlinks(firstAccess.ManifestPath.GetParent(m_context.PathTable)))
                        {
                            Counters.IncrementCounter(SandboxedProcessCounters.DirectorySymlinkPathsDiscardedCount);
                            continue;
                        }

                        bool isPathCandidateToBeOwnedByASharedOpaque = false;
                        foreach (var access in entry.Value)
                        {
                            // Detours reports a directory probe with a trailing backslash.
                            isDirectoryLocation =
                                // If the path is available and ends with a trailing backlash, we know that represents
                                // a directory
                                ((isDirectoryLocation == null || isDirectoryLocation.Value) &&
                                 access.Path != null && access.Path.EndsWith("\\", StringComparison.OrdinalIgnoreCase))
                                ||
                                // If FILE_ATTRIBUTE_DIRECTORY flag is present, that means detours understood the operation
                                // as happening on a directory.
                                // TODO: this flag is not properly propagated for all detours operations.
                                access.FlagsAndAttributes.HasFlag(FlagsAndAttributes.FILE_ATTRIBUTE_DIRECTORY);
                            
                            // To treat the paths as file probes, all accesses to the path must be the probe access.
                            isProbe &= access.RequestedAccess == RequestedAccess.Probe;

                            if (access.RequestedAccess == RequestedAccess.Probe && IsIncrementalToolAccess(access))
                            {
                                isProbe = false;
                            }

                            // TODO: Remove this when WDG can grog this feature with no flag.
                            if (m_sandboxConfig.UnsafeSandboxConfiguration.ExistingDirectoryProbesAsEnumerations ||
                                access.RequestedAccess == RequestedAccess.Enumerate)
                            {
                                hasEnumeration = true;
                            }

                            // if the access is a write on a file (that is, not on a directory), then the path is a candidate to be part of a shared opaque
                            isPathCandidateToBeOwnedByASharedOpaque |=
                                access.RequestedAccess.HasFlag(RequestedAccess.Write) &&
                                !access.FlagsAndAttributes.HasFlag(FlagsAndAttributes.FILE_ATTRIBUTE_DIRECTORY) &&
                                !access.IsDirectoryCreationOrRemoval();

                            if (m_pip.AllowUndeclaredSourceReads &&
                                access.RequestedAccess.HasFlag(RequestedAccess.Write) &&
                                access.IsDirectoryEffectivelyCreated())
                            {
                                createdDirectoriesMutable.Add(entry.Key);
                            }

                            // If the access is a shared opaque candidate and it was denied based on file existence, keep track of it
                            if (isPathCandidateToBeOwnedByASharedOpaque && access.Method == FileAccessStatusMethod.FileExistenceBased && access.Status == FileAccessStatus.Denied)
                            {
                                fileExistenceDenials.Add(entry.Key);
                            }
                        }

                        // if the path is still a candidate to be part of a shared opaque, that means there was at least a write to that path. If the path is then
                        // in the cone of a shared opaque, then it is a dynamic write access
                        bool? isAccessUnderASharedOpaque = null;
                        if (isPathCandidateToBeOwnedByASharedOpaque && IsAccessUnderASharedOpaque(
                                firstAccess,
                                dynamicWriteAccesses,
                                out AbsolutePath sharedDynamicDirectoryRoot))
                        {
                            dynamicWriteAccesses[sharedDynamicDirectoryRoot].Add(entry.Key);
                            isAccessUnderASharedOpaque = true;
                            // This is a known output, so don't store it
                            continue;
                        }
                        // if the candidate was discarded because it was not under a shared opaque, make sure the set of denials based on file existence is also kept in sync
                        else if (isPathCandidateToBeOwnedByASharedOpaque)
                        {
                            fileExistenceDenials.Remove(entry.Key);
                        }

                        // The following two lines need to be removed in order to report file accesses for
                        // undeclared files and sealed directories. But since this is a breaking change, we do
                        // it under an unsafe flag.
                        if (m_sandboxConfig.UnsafeSandboxConfiguration.IgnoreUndeclaredAccessesUnderSharedOpaques)
                        {
                            // If the access occurred under any of the pip shared opaque outputs, and the access is not happening on any known input paths (neither dynamic nor static)
                            // then we just skip reporting the access. Together with the above step, this means that no accesses under shared opaques that represent outputs are actually
                            // reported as observed accesses. This matches the same behavior that occurs on static outputs.
                            if (!allInputPathsUnderSharedOpaques.Contains(entry.Key) &&
                                (isAccessUnderASharedOpaque == true || IsAccessUnderASharedOpaque(firstAccess, dynamicWriteAccesses, out _)))
                            {
                                continue;
                            }
                        }

                        // Absent accesses may still contain reparse points. If we are fully resolving them, keep track of them for further processing
                        if (!hasEnumeration && EnableFullReparsePointResolving(m_configuration, m_pip) && entry.Value.All(fa => fa.Error == NativeIOConstants.ErrorPathNotFound))
                        {
                            maybeUnresolvedAbsentAccesses.Add(entry.Key);
                        }

                        ObservationFlags observationFlags = ObservationFlags.None;

                        if (isProbe)
                        {
                            observationFlags |= ObservationFlags.FileProbe;
                        }

                        if (isDirectoryLocation != null && isDirectoryLocation.Value)
                        {
                            observationFlags |= ObservationFlags.DirectoryLocation;
                        }

                        if (hasEnumeration)
                        {
                            observationFlags |= ObservationFlags.Enumeration;
                        }

                        accessesUnsorted.Add(entry.Key, new ObservedFileAccess(entry.Key, observationFlags, entry.Value));
                    }

                    // AccessesUnsorted might include various accesses to directories leading to the files inside of shared opaques,
                    // mainly CreateDirectory and ProbeDirectory. To make strong fingerprint computation more stable, we are excluding such
                    // accesses from the list that is passed into the ObservedInputProcessor (as a result, they will not be a part of the path set).
                    //
                    // Example, given this path: '\sod\dir1\dir2\file.txt', we will exclude accesses to dir1 and dir2 only.
                    var excludedPaths = excludedPathsWrapper.Instance;
                    foreach (var sod in dynamicWriteAccesses)
                    {
                        foreach (var file in sod.Value)
                        {
                            var pathElement = file.GetParent(m_context.PathTable);

                            while (pathElement.IsValid && pathElement != sod.Key && excludedPaths.Add(pathElement))
                            {
                                pathElement = pathElement.GetParent(m_context.PathTable);
                            }
                        }
                    }

                    createdDirectories = createdDirectoriesMutable.ToReadOnlySet();

                    var mutableWriteAccesses = new Dictionary<AbsolutePath, IReadOnlyCollection<FileArtifactWithAttributes>>(dynamicWriteAccesses.Count);

                    // We know that all accesses here were write accesses, but we don't actually know if in the end the corresponding file
                    // still exists or whether the file was replaced with a directory afterwards. E.g.:
                    // * the tool could have created a file but removed it right after
                    // * the tool could have created a file but then removed it and created a directory
                    // We only care about the access if its final shape is not a directory
                    bool reparsePointProduced = false;

                    using (var existenceAssertionsWrapper = Pools.GetFileArtifactSet())
                    {
                        HashSet<FileArtifact> existenceToAssert = existenceAssertionsWrapper.Instance;

                        foreach (var kvp in dynamicWriteAccesses)
                        {
                            // Let's validate here the existence assertions for shared opaques
                            // Exclusive opaque content is unknown at this point, so it is validated at a later stage
                            Contract.Assert(existenceToAssert.Count == 0);
                            var assertions = m_pipGraphFileSystemView?.GetExistenceAssertionsUnderOpaqueDirectory(m_sharedOpaqueDirectoryRoots[kvp.Key]);
                            // This is null for some tests
                            if (assertions != null)
                            {
                                existenceToAssert.AddRange(assertions);
                            }

                            var fileWrites = new List<FileArtifactWithAttributes>(kvp.Value.Count);
                            mutableWriteAccesses[kvp.Key] = fileWrites;
                            foreach (AbsolutePath writeAccess in kvp.Value)
                            {
                                string outputPath;
                                if (sharedOutputDirectoriesAreRedirected)
                                {
                                    outputPath = m_processInContainerManager.GetRedirectedOpaqueFile(writeAccess, kvp.Key, m_containerConfiguration).ToString(m_pathTable);
                                }
                                else
                                {
                                    outputPath = writeAccess.ToString(m_pathTable);
                                }

                                var maybeResult = FileUtilities.TryProbePathExistence(outputPath, followSymlink: false, out var isReparsePoint);
                                reparsePointProduced |= isReparsePoint;

                                if (!maybeResult.Succeeded)
                                {
                                    Tracing.Logger.Log.CannotProbeOutputUnderSharedOpaque(
                                        m_loggingContext,
                                        m_pip.GetDescription(m_context),
                                        writeAccess.ToString(m_pathTable),
                                        maybeResult.Failure.DescribeIncludingInnerFailures());

                                    sharedDynamicDirectoryWriteAccesses = CollectionUtilities.EmptyDictionary<AbsolutePath, IReadOnlyCollection<FileArtifactWithAttributes>>();
                                    observedAccesses = CollectionUtilities.EmptySortedReadOnlyArray<ObservedFileAccess, ObservedFileAccessExpandedPathComparer>(new ObservedFileAccessExpandedPathComparer(m_context.PathTable.ExpandedPathComparer));

                                    return false;
                                }

                                switch (maybeResult.Result)
                                {
                                    case PathExistence.ExistsAsDirectory:
                                        // Directories are not reported as explicit content, since we don't have the functionality today to persist them in the cache.
                                        continue;
                                    case PathExistence.ExistsAsFile:
                                        // If outputs are redirected, we don't want to store a tombstone file
                                        if (!sharedOutputDirectoriesAreRedirected || !FileUtilities.IsWciTombstoneFile(outputPath))
                                        {
                                            // If the written file was a denied write based on file existence, that means an undeclared file was overriden.
                                            // This file could be an allowed undeclared source or a file completely alien to the build, not mentioned at all.
                                            var artifact = FileArtifact.CreateOutputFile(writeAccess);
                                            fileWrites.Add(FileArtifactWithAttributes.Create(
                                                artifact,
                                                FileExistence.Required,
                                                isUndeclaredFileRewrite: fileExistenceDenials.Contains(writeAccess)));

                                            // We found an output, remove it from the set of assertions to verify
                                            existenceToAssert.Remove(artifact);
                                        }
                                        break;
                                    case PathExistence.Nonexistent:
                                        fileWrites.Add(FileArtifactWithAttributes.Create(FileArtifact.CreateOutputFile(writeAccess), FileExistence.Temporary));
                                        break;
                                }
                            }
                            
                            // There are some outputs that were asserted as belonging to the shared opaque that were not found
                            if (existenceToAssert.Count != 0)
                            {
                                Tracing.Logger.Log.ExistenceAssertionUnderOutputDirectoryFailed(
                                    m_loggingContext,
                                    m_pip.GetDescription(m_context),
                                    existenceToAssert.First().Path.ToString(m_pathTable),
                                    kvp.Key.ToString(m_pathTable));

                                sharedDynamicDirectoryWriteAccesses = CollectionUtilities.EmptyDictionary<AbsolutePath, IReadOnlyCollection<FileArtifactWithAttributes>>();
                                observedAccesses = CollectionUtilities.EmptySortedReadOnlyArray<ObservedFileAccess, ObservedFileAccessExpandedPathComparer>(new ObservedFileAccessExpandedPathComparer(m_context.PathTable.ExpandedPathComparer));

                                return false;
                            }
                        }
                    }
                    sharedDynamicDirectoryWriteAccesses = mutableWriteAccesses;

                    // Consider the scenario where path/dir/file gets probed but at probing time the path is absent. Afterwards, a dir junction path/dir gets created, pointing
                    // to path/target, and then path/target/file is created. Since path/dir/file was absent at probing time, detours doesn't resolve it because there is nothing
                    // to resolve. However, the creation of the dir junction and file makes path/dir/file existing but unresolved. However, path/dir/file won't be there on cache lookup, the probe will
                    // come back as absent and therefore we get a consistent cache miss.
                    // Let's try to make sure then that unresolved absent probes don't end up in the observed path set. Here we address the case when reparse point are produced by the same pip. Cross-pip
                    // scenarios are not addressed here.

                    if (EnableFullReparsePointResolving(m_configuration, m_pip) && reparsePointProduced)
                    {
                        foreach(AbsolutePath absentAccess in maybeUnresolvedAbsentAccesses)
                        {
                            // If the access is still absent, there is nothing to resolve
                            if (FileUtilities.TryProbePathExistence(absentAccess.ToString(m_pathTable), followSymlink: false) is var existence &&
                                (!existence.Succeeded || existence.Result == PathExistence.Nonexistent))
                            {
                                continue;
                            }

                            // If the resolved path is the same as the original one, the probe didn't contain reparse points
                            var resolvedPath = m_reparsePointResolver.ResolveIntermediateDirectoryReparsePoints(absentAccess);
                            if (resolvedPath == absentAccess)
                            {
                                continue;
                            }

                            // We have an access that was originally absent, now it is present, and contains unresolved reparse points. Let exclude it from the
                            // acceses.
                            excludedPaths.Add(absentAccess);

                            // We only include a synthetic resolved one if the path is not an output of the pip (we never report accesses on outputs)
                            // It is not expected that a pip contains too many output directories, so going through each of them should be fine.
                            if (dynamicWriteAccesses.All(kvp => !kvp.Value.Contains(resolvedPath)))
                            {
                                m_fileAccessManifest.TryFindManifestPathFor(resolvedPath, out AbsolutePath manifestPath, out _);
                                
                                // Generate equivalent accesses with the resolved path
                                foreach (ReportedFileAccess originalAccess in accessesByPath[absentAccess])
                                {
                                    ReportedFileAccess syntheticAccess = originalAccess.CreateWithPathAndAttributes(resolvedPath == manifestPath ? null : resolvedPath.ToString(m_pathTable), manifestPath, originalAccess.FlagsAndAttributes);

                                    // Check if there is already an access with that path, and add to it in that case
                                    if (accessesUnsorted.TryGetValue(resolvedPath, out var observedFileAccess))
                                    {
                                        accessesUnsorted[resolvedPath] = new ObservedFileAccess(resolvedPath, observedFileAccess.ObservationFlags, observedFileAccess.Accesses.Add(syntheticAccess));
                                    }
                                    else
                                    {
                                        accessesUnsorted.Add(resolvedPath, new ObservedFileAccess(resolvedPath, ObservationFlags.FileProbe, new CompactSet<ReportedFileAccess>().Add(syntheticAccess)));
                                    }
                                }
                            }
                        }
                    }

                    var filteredAccessesUnsorted = accessesUnsorted.Values.Where(shouldIncludeAccess);

                    observedAccesses = SortedReadOnlyArray<ObservedFileAccess, ObservedFileAccessExpandedPathComparer>.CloneAndSort(
                        filteredAccessesUnsorted,
                        new ObservedFileAccessExpandedPathComparer(m_context.PathTable.ExpandedPathComparer));

                    return true;

                    bool shouldIncludeAccess(ObservedFileAccess access)
                    {
                        // if not in the excludedPaths set --> include
                        if (!excludedPaths.Contains(access.Path))
                        {
                            return true;
                        }

                        // else, include IFF:
                        //   (1) access is a directory enumeration, AND
                        //   (2) the directory was not created by this pip
                        return
                            access.ObservationFlags.HasFlag(ObservationFlags.Enumeration)
                            && !access.Accesses.Any(rfa => rfa.IsDirectoryCreation());
                    }
                }
            }
        }