func WalkWithSymlinks()

in cmd/zc_traverser_local.go [204:389]


func WalkWithSymlinks(appCtx context.Context, fullPath string, walkFunc filepath.WalkFunc, symlinkHandling common.SymlinkHandlingType, errorChannel chan ErrorFileInfo) (err error) {

	// We want to re-queue symlinks up in their evaluated form because filepath.Walk doesn't evaluate them for us.
	// So, what is the plan of attack?
	// Because we can't create endless channels, we create an array instead and use it as a queue.
	// Furthermore, we use a map as a hashset to avoid re-walking any paths we already know.
	type walkItem struct {
		fullPath     string // We need the full, symlink-resolved path to walk against.
		relativeBase string // We also need the relative base path we found the symlink at.
	}

	fullPath, err = filepath.Abs(fullPath)
	if err != nil {
		return err
	}

	walkQueue := []walkItem{{fullPath: fullPath, relativeBase: ""}}

	// do NOT put fullPath: true into the map at this time, because we want to match the semantics of filepath.Walk, where the walkfunc is called for the root
	// When following symlinks, our current implementation tracks folders and files.  Which may consume GB's of RAM when there are 10s of millions of files.
	var seenPaths seenPathsRecorder = &nullSeenPathsRecorder{} // uses no RAM
	if symlinkHandling.Follow() {                              // only if we're following we need to worry about this
		seenPaths = &realSeenPathsRecorder{make(map[string]struct{})} // have to use the RAM if we are dealing with symlinks, to prevent cycles
	}

	for len(walkQueue) > 0 {
		queueItem := walkQueue[0]
		walkQueue = walkQueue[1:]
		// walk contents of this queueItem in parallel
		// (for simplicity of coding, we don't parallelize across multiple queueItems)
		parallel.Walk(appCtx, queueItem.fullPath, EnumerationParallelism, EnumerationParallelStatFiles, func(filePath string, fileInfo os.FileInfo, fileError error) error {
			if fileError != nil {
				WarnStdoutAndScanningLog(fmt.Sprintf("Accessing '%s' failed with error: %s", filePath, fileError.Error()))
				writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: fileError})
				return nil
			}
			computedRelativePath := strings.TrimPrefix(cleanLocalPath(filePath), cleanLocalPath(queueItem.fullPath))
			computedRelativePath = cleanLocalPath(common.GenerateFullPath(queueItem.relativeBase, computedRelativePath))
			computedRelativePath = strings.TrimPrefix(computedRelativePath, common.AZCOPY_PATH_SEPARATOR_STRING)

			if computedRelativePath == "." {
				computedRelativePath = ""
			}

			if fileInfo == nil {
				err := fmt.Errorf("fileInfo is nil for file %s", filePath)
				WarnStdoutAndScanningLog(err.Error())
				return nil
			}

			if fileInfo.Mode()&os.ModeSymlink != 0 {
				if symlinkHandling.Preserve() {
					// Handle it like it's not a symlink
					result, err := filepath.Abs(filePath)

					if err != nil {
						WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get absolute path of %s: %s", filePath, err))
						return nil
					}

					err = walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), fileInfo, fileError)
					// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
					skipped, err := getProcessingError(err)

					// If the file was skipped, don't record it.
					if !skipped {
						seenPaths.Record(common.ToExtendedPath(result))
					}

					return err
				}

				if symlinkHandling.None() {
					return nil // skip it
				}

				/*
				 * There is one case where symlink can point to outside of sharepoint(symlink is absolute path). In that case
				 * we need to throw error. Its very unlikely same file or folder present on the agent side.
				 * In that case it anywaythrow the error.
				 *
				 * TODO: Need to handle this case.
				 */
				result, err := UnfurlSymlinks(filePath)

				if err != nil {
					err = fmt.Errorf("failed to resolve symlink %s: %w", filePath, err)
					WarnStdoutAndScanningLog(err.Error())
					writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
					return nil
				}

				result, err = filepath.Abs(result)
				if err != nil {
					err = fmt.Errorf("failed to get absolute path of symlink result %s: %w", filePath, err)
					WarnStdoutAndScanningLog(err.Error())
					writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
					return nil
				}

				slPath, err := filepath.Abs(filePath)
				if err != nil {
					err = fmt.Errorf("failed to get absolute path of %s: %w", filePath, err)
					WarnStdoutAndScanningLog(err.Error())
					writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
					return nil
				}

				rStat, err := os.Stat(result)
				if err != nil {
					err = fmt.Errorf("failed to get properties of symlink target at %s: %w", result, err)
					WarnStdoutAndScanningLog(err.Error())
					writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
					return nil
				}

				if rStat.IsDir() {
					if !seenPaths.HasSeen(result) {
						err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), symlinkTargetFileInfo{rStat, fileInfo.Name()}, fileError)
						// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
						skipped, err := getProcessingError(err)

						if !skipped { // Don't go any deeper (or record it) if we skipped it.
							seenPaths.Record(common.ToExtendedPath(result))
							seenPaths.Record(common.ToExtendedPath(slPath)) // Note we've seen the symlink as well. We shouldn't ever have issues if we _don't_ do this because we'll just catch it by symlink result
							walkQueue = append(walkQueue, walkItem{
								fullPath:     result,
								relativeBase: computedRelativePath,
							})
						}
						// enumerate the FOLDER now (since its presence in seenDirs will prevent its properties getting enumerated later)
						return err
					} else {
						WarnStdoutAndScanningLog(fmt.Sprintf("Ignored already linked directory pointed at %s (link at %s)", result, common.GenerateFullPath(fullPath, computedRelativePath)))
					}
				} else {
					// It's a symlink to a file and we handle cyclic symlinks.
					// (this does create the inconsistency that if there are two symlinks to the same file we will process it twice,
					// but if there are two symlinks to the same directory we will process it only once. Because only directories are
					// deduped to break cycles.  For now, we are living with the inconsistency. The alternative would be to "burn" more
					// RAM by putting filepaths into seenDirs too, but that could be a non-trivial amount of RAM in big directories trees).
					targetFi := symlinkTargetFileInfo{rStat, fileInfo.Name()}

					err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), targetFi, fileError)
					_, err = getProcessingError(err)
					return err
				}
				return nil
			} else {
				// not a symlink
				result, err := filepath.Abs(filePath)

				if err != nil {
					err = fmt.Errorf("failed to get absolute path of %s: %w", filePath, err)
					WarnStdoutAndScanningLog(err.Error())
					writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
					return nil
				}

				if !seenPaths.HasSeen(result) {
					err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), fileInfo, fileError)
					// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
					skipped, err := getProcessingError(err)

					// If the file was skipped, don't record it.
					if !skipped {
						seenPaths.Record(common.ToExtendedPath(result))
					}

					return err
				} else {
					if fileInfo.IsDir() {
						// We can't output a warning here (and versions 10.3.x never did)
						// because we'll hit this for the directory that is the direct (root) target of any symlink, so any warning here would be a red herring.
						// In theory there might be cases when a warning here would be correct - but they are rare and too hard to identify in our code
					} else {
						WarnStdoutAndScanningLog(fmt.Sprintf("Ignored already seen file located at %s (found at %s)", filePath, common.GenerateFullPath(fullPath, computedRelativePath)))
					}
					return nil
				}
			}
		})
	}

	return
}