agent/inventory/gatherers/file/dataProvider.go (129 lines of code) (raw):

package file import ( "encoding/json" "errors" "fmt" "io/ioutil" "os" "path/filepath" "strings" "github.com/aliyun/aliyun_assist_client/agent/log" "github.com/aliyun/aliyun_assist_client/agent/inventory/model" ) type filterObj struct { Path string Pattern []string Recursive bool DirScanLimit *int } type fileInfoObject struct { fi os.FileInfo path string } var getFullPath func(path string, mapping func(string) string) (string, error) // Limits to help keep file information under item size limit and prevent long scanning. // The Dir Limits can be configured through input parameters const FileCountLimit = 500 const FileCountLimitExceeded = "File Count Limit Exceeded" const DirScanLimit = 5000 const DirScanLimitExceeded = "Directory Scan Limit Exceeded" var DirScanLimitError = errors.New(DirScanLimitExceeded) var FileCountLimitError = errors.New(FileCountLimitExceeded) var readDirFunc = readDir var getFilesFunc = getFiles var existsPath = exists var filepathWalk = filepath.Walk var getMetaDataFunc = getMetaData // readDir is a wrapper on ioutil.ReadDir for easy testability func readDir(dirname string) ([]os.FileInfo, error) { return ioutil.ReadDir(dirname) } //removeDuplicates deduplicates the input array of model.FileData func removeDuplicatesFileData(elements []model.FileData) (result []model.FileData) { // Use map to record duplicates as we find them. encountered := map[model.FileData]bool{} for v := range elements { if !encountered[elements[v]] { // Record this element as an encountered element. encountered[elements[v]] = true // Append to result slice. result = append(result, elements[v]) } } // Return the new slice. return result } //removeDuplicatesString deduplicates array of strings func removeDuplicatesString(elements []string) (result []string) { encountered := map[string]bool{} for _, element := range elements { if !encountered[element] { encountered[element] = true result = append(result, element) } } return result } //exists check if the file path exists func exists(path string) (bool, error) { _, err := os.Stat(path) if err == nil { return true, nil } if os.IsNotExist(err) { return false, nil } return false, err } func getFiles(path string, pattern []string, recursive bool, fileLimit int, dirLimit int) (validFiles []string, err error) { var ex bool ex, err = existsPath(path) if err != nil { log.GetLogger().Error(err) return } if !ex { log.GetLogger().Error(fmt.Errorf("Path %v does not exist!", path)) return } dirScanCount := 0 if recursive { err = filepathWalk(path, func(fp string, fi os.FileInfo, err error) error { if err != nil { log.GetLogger().Error(err) return nil } if fi.IsDir() { dirScanCount++ if dirScanCount > dirLimit { log.GetLogger().Errorf("Scanned maximum allowed directories. Returning collected files") return DirScanLimitError } return nil } if fileMatchesAnyPattern(pattern, fi.Name()) { validFiles = append(validFiles, fp) if len(validFiles) > fileLimit { log.GetLogger().Errorf("Found more than limit of %d files", FileCountLimit) return FileCountLimitError } } return nil }) } else { files, readDirErr := readDirFunc(path) if readDirErr != nil { log.GetLogger().Error(readDirErr) err = readDirErr return } dirScanCount++ for _, fi := range files { if fi.IsDir() { continue } if fileMatchesAnyPattern(pattern, fi.Name()) { validFiles = append(validFiles, filepath.Join(path, fi.Name())) if len(validFiles) > fileLimit { log.GetLogger().Errorf("Found more than limit of %d files", FileCountLimit) err = FileCountLimitError return } } } } log.GetLogger().Debugf("DirScanned %d", dirScanCount) return } //getAllMeta processes the filter, gets paths of all filtered files, and get file info of all files func getAllMeta(config model.Config) (data []model.FileData, err error) { jsonBody := []byte(strings.Replace(config.Filters, `\`, `/`, -1)) //this is to convert the backslash in windows path to slash var filterList []filterObj if err = json.Unmarshal(jsonBody, &filterList); err != nil { log.GetLogger().Error(err) return } var fileList []string for _, filter := range filterList { var fullPath string var getPathErr error var dirScanLimit int if fullPath, getPathErr = getFullPath(filter.Path, os.Getenv); getPathErr != nil { log.GetLogger().Error(getPathErr) continue } fileLimit := FileCountLimit - len(fileList) if filter.DirScanLimit == nil { dirScanLimit = DirScanLimit } else { dirScanLimit = *filter.DirScanLimit } log.GetLogger().Debugf("Dir Scan Limit %d", dirScanLimit) foundFiles, getFilesErr := getFilesFunc(fullPath, filter.Pattern, filter.Recursive, fileLimit, dirScanLimit) // We should only break, if we get limit error, otherwise we should continue collecting other data if getFilesErr != nil { log.GetLogger().Error(getFilesErr) if getFilesErr == FileCountLimitError || getFilesErr == DirScanLimitError { return nil, getFilesErr } } fileList = append(fileList, foundFiles...) fileList = removeDuplicatesString(fileList) } if len(fileList) > 0 { data, err = getMetaDataFunc(fileList) } log.GetLogger().Debugf("Collected Files %d", len(data)) return } //fileMatchesAnyPattern returns true if file name matches any pattern specified func fileMatchesAnyPattern(pattern []string, fname string) bool { for _, item := range pattern { matched, matchErr := filepath.Match(item, fname) if matchErr != nil { log.GetLogger().Error(matchErr) continue } if matched { return true } } return false } //collectFileData returns a list of file information based on the given configuration func collectFileData(config model.Config) (data []model.FileData, err error) { getFullPath = expand data, err = getAllMeta(config) log.GetLogger().WithError(err).Debugf("collected %d file data", len(data)) return }