in lib/backend/hdfsbackend/client.go [173:276]
func (c *Client) List(prefix string, opts ...backend.ListOption) (*backend.ListResult, error) {
options := backend.DefaultListOptions()
for _, opt := range opts {
opt(options)
}
if options.Paginated {
return nil, errors.New("pagination not supported")
}
root := path.Join(c.pather.BasePath(), prefix)
listJobs := make(chan string)
results := make(chan listResult)
done := make(chan struct{})
var wg sync.WaitGroup
for i := 0; i < c.config.ListConcurrency; i++ {
wg.Add(1)
go func() {
c.lister(done, listJobs, results)
wg.Done()
}()
}
defer func() {
close(done)
if c.config.testing {
// Waiting might be delayed if an early error is encountered but
// other goroutines are waiting on a long http timeout. Thus, we
// only wait for each spawned goroutine to exit during testing to
// assert that no goroutines leak.
wg.Wait()
}
}()
var files []string
// Pending tracks the number of directories which are pending exploration.
// Invariant: there will be a result received for every increment made to
// pending.
pending := 1
listJobs <- root
for pending > 0 {
res := <-results
pending--
if res.err != nil {
if httputil.IsNotFound(res.err) {
continue
}
return nil, res.err
}
var dirs []string
for _, fs := range res.list {
p := path.Join(res.dir, fs.PathSuffix)
// TODO(codyg): This is an ugly hack to avoid walking through non-tags
// during Docker catalog. Ideally, only tags are located in the repositories
// directory, however in WBU2 HDFS, there are blobs here as well. At some
// point, we must migrate the data into a structure which cleanly divides
// blobs and tags (like we do in S3).
if _ignoreRegex.MatchString(p) {
continue
}
// TODO(codyg): Another ugly hack to speed up catalog performance by stopping
// early when we hit tags...
if _stopRegex.MatchString(p) {
p = path.Join(p, "tags/dummy/current/link")
fs.Type = "FILE"
}
if fs.Type == "DIRECTORY" {
// Flat directory structures are common, so accumulate directories and send
// them to the listers in a single goroutine (as opposed to a goroutine per
// directory).
dirs = append(dirs, p)
} else {
name, err := c.pather.NameFromBlobPath(p)
if err != nil {
log.With("path", p).Errorf("Error converting blob path into name: %s", err)
continue
}
files = append(files, name)
}
}
if len(dirs) > 0 {
// We cannot send list jobs and receive results in the same thread, else
// deadlock will occur.
wg.Add(1)
go func() {
c.sendAll(done, dirs, listJobs)
wg.Done()
}()
pending += len(dirs)
}
}
return &backend.ListResult{
Names: files,
}, nil
}