in cmd/zc_traverser_local.go [204:389]
func WalkWithSymlinks(appCtx context.Context, fullPath string, walkFunc filepath.WalkFunc, symlinkHandling common.SymlinkHandlingType, errorChannel chan ErrorFileInfo) (err error) {
// We want to re-queue symlinks up in their evaluated form because filepath.Walk doesn't evaluate them for us.
// So, what is the plan of attack?
// Because we can't create endless channels, we create an array instead and use it as a queue.
// Furthermore, we use a map as a hashset to avoid re-walking any paths we already know.
type walkItem struct {
fullPath string // We need the full, symlink-resolved path to walk against.
relativeBase string // We also need the relative base path we found the symlink at.
}
fullPath, err = filepath.Abs(fullPath)
if err != nil {
return err
}
walkQueue := []walkItem{{fullPath: fullPath, relativeBase: ""}}
// do NOT put fullPath: true into the map at this time, because we want to match the semantics of filepath.Walk, where the walkfunc is called for the root
// When following symlinks, our current implementation tracks folders and files. Which may consume GB's of RAM when there are 10s of millions of files.
var seenPaths seenPathsRecorder = &nullSeenPathsRecorder{} // uses no RAM
if symlinkHandling.Follow() { // only if we're following we need to worry about this
seenPaths = &realSeenPathsRecorder{make(map[string]struct{})} // have to use the RAM if we are dealing with symlinks, to prevent cycles
}
for len(walkQueue) > 0 {
queueItem := walkQueue[0]
walkQueue = walkQueue[1:]
// walk contents of this queueItem in parallel
// (for simplicity of coding, we don't parallelize across multiple queueItems)
parallel.Walk(appCtx, queueItem.fullPath, EnumerationParallelism, EnumerationParallelStatFiles, func(filePath string, fileInfo os.FileInfo, fileError error) error {
if fileError != nil {
WarnStdoutAndScanningLog(fmt.Sprintf("Accessing '%s' failed with error: %s", filePath, fileError.Error()))
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: fileError})
return nil
}
computedRelativePath := strings.TrimPrefix(cleanLocalPath(filePath), cleanLocalPath(queueItem.fullPath))
computedRelativePath = cleanLocalPath(common.GenerateFullPath(queueItem.relativeBase, computedRelativePath))
computedRelativePath = strings.TrimPrefix(computedRelativePath, common.AZCOPY_PATH_SEPARATOR_STRING)
if computedRelativePath == "." {
computedRelativePath = ""
}
if fileInfo == nil {
err := fmt.Errorf("fileInfo is nil for file %s", filePath)
WarnStdoutAndScanningLog(err.Error())
return nil
}
if fileInfo.Mode()&os.ModeSymlink != 0 {
if symlinkHandling.Preserve() {
// Handle it like it's not a symlink
result, err := filepath.Abs(filePath)
if err != nil {
WarnStdoutAndScanningLog(fmt.Sprintf("Failed to get absolute path of %s: %s", filePath, err))
return nil
}
err = walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), fileInfo, fileError)
// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
skipped, err := getProcessingError(err)
// If the file was skipped, don't record it.
if !skipped {
seenPaths.Record(common.ToExtendedPath(result))
}
return err
}
if symlinkHandling.None() {
return nil // skip it
}
/*
* There is one case where symlink can point to outside of sharepoint(symlink is absolute path). In that case
* we need to throw error. Its very unlikely same file or folder present on the agent side.
* In that case it anywaythrow the error.
*
* TODO: Need to handle this case.
*/
result, err := UnfurlSymlinks(filePath)
if err != nil {
err = fmt.Errorf("failed to resolve symlink %s: %w", filePath, err)
WarnStdoutAndScanningLog(err.Error())
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
return nil
}
result, err = filepath.Abs(result)
if err != nil {
err = fmt.Errorf("failed to get absolute path of symlink result %s: %w", filePath, err)
WarnStdoutAndScanningLog(err.Error())
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
return nil
}
slPath, err := filepath.Abs(filePath)
if err != nil {
err = fmt.Errorf("failed to get absolute path of %s: %w", filePath, err)
WarnStdoutAndScanningLog(err.Error())
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
return nil
}
rStat, err := os.Stat(result)
if err != nil {
err = fmt.Errorf("failed to get properties of symlink target at %s: %w", result, err)
WarnStdoutAndScanningLog(err.Error())
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
return nil
}
if rStat.IsDir() {
if !seenPaths.HasSeen(result) {
err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), symlinkTargetFileInfo{rStat, fileInfo.Name()}, fileError)
// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
skipped, err := getProcessingError(err)
if !skipped { // Don't go any deeper (or record it) if we skipped it.
seenPaths.Record(common.ToExtendedPath(result))
seenPaths.Record(common.ToExtendedPath(slPath)) // Note we've seen the symlink as well. We shouldn't ever have issues if we _don't_ do this because we'll just catch it by symlink result
walkQueue = append(walkQueue, walkItem{
fullPath: result,
relativeBase: computedRelativePath,
})
}
// enumerate the FOLDER now (since its presence in seenDirs will prevent its properties getting enumerated later)
return err
} else {
WarnStdoutAndScanningLog(fmt.Sprintf("Ignored already linked directory pointed at %s (link at %s)", result, common.GenerateFullPath(fullPath, computedRelativePath)))
}
} else {
// It's a symlink to a file and we handle cyclic symlinks.
// (this does create the inconsistency that if there are two symlinks to the same file we will process it twice,
// but if there are two symlinks to the same directory we will process it only once. Because only directories are
// deduped to break cycles. For now, we are living with the inconsistency. The alternative would be to "burn" more
// RAM by putting filepaths into seenDirs too, but that could be a non-trivial amount of RAM in big directories trees).
targetFi := symlinkTargetFileInfo{rStat, fileInfo.Name()}
err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), targetFi, fileError)
_, err = getProcessingError(err)
return err
}
return nil
} else {
// not a symlink
result, err := filepath.Abs(filePath)
if err != nil {
err = fmt.Errorf("failed to get absolute path of %s: %w", filePath, err)
WarnStdoutAndScanningLog(err.Error())
writeToErrorChannel(errorChannel, ErrorFileInfo{FilePath: filePath, FileInfo: fileInfo, ErrorMsg: err})
return nil
}
if !seenPaths.HasSeen(result) {
err := walkFunc(common.GenerateFullPath(fullPath, computedRelativePath), fileInfo, fileError)
// Since this doesn't directly manipulate the error, and only checks for a specific error, it's OK to use in a generic function.
skipped, err := getProcessingError(err)
// If the file was skipped, don't record it.
if !skipped {
seenPaths.Record(common.ToExtendedPath(result))
}
return err
} else {
if fileInfo.IsDir() {
// We can't output a warning here (and versions 10.3.x never did)
// because we'll hit this for the directory that is the direct (root) target of any symlink, so any warning here would be a red herring.
// In theory there might be cases when a warning here would be correct - but they are rare and too hard to identify in our code
} else {
WarnStdoutAndScanningLog(fmt.Sprintf("Ignored already seen file located at %s (found at %s)", filePath, common.GenerateFullPath(fullPath, computedRelativePath)))
}
return nil
}
}
})
}
return
}