in internal/vfs/zip/archive.go [112:171]
func (a *zipArchive) readArchive(url string) {
defer close(a.done)
// readArchive with a timeout separate from openArchive's
ctx, cancel := context.WithTimeout(context.Background(), a.openTimeout)
defer cancel()
a.resource, a.err = httprange.NewResource(ctx, url, a.fs.httpClient)
if a.err != nil {
log.WithFields(log.Fields{
"archive_url": url,
}).WithError(a.err).Infoln("read zip archive request failed")
metrics.ZipOpened.WithLabelValues("error").Inc()
return
}
var archive *zip.Reader
// load all archive files into memory using a cached ranged reader
a.reader = httprange.NewRangedReader(a.resource)
a.reader.WithCachedReader(ctx, func() {
archive, a.err = zip.NewReader(a.reader, a.resource.Size)
})
if archive == nil || a.err != nil {
log.WithFields(log.Fields{
"archive_url": url,
}).WithError(a.err).Infoln("loading zip archive files into memory failed")
metrics.ZipOpened.WithLabelValues("error").Inc()
return
}
// TODO: Improve preprocessing of zip archives https://gitlab.com/gitlab-org/gitlab-pages/-/issues/432
for _, file := range archive.File {
// Each Modified timestamp contains a pointer to a unique timezone
// object. This wastes a lot of memory. By setting the timezone to UTC on
// each timestamp, we allow the unique timezone objects to be
// garbage-collected. Also see
// https://gitlab.com/gitlab-org/gitlab-pages/-/issues/702.
file.Modified = file.Modified.UTC()
if file.Mode().IsDir() {
a.directories[file.Name] = &file.FileHeader
} else {
a.files[file.Name] = file
}
a.addPathDirectory(file.Name)
}
// Each file stores a pointer to the zip.reader.
// The file slice is not used so we null it out
// to reduce memory consumption.
archive.File = nil
fileCount := float64(len(a.files))
metrics.ZipOpened.WithLabelValues("ok").Inc()
metrics.ZipOpenedEntriesCount.Add(fileCount)
metrics.ZipArchiveEntriesCached.Add(fileCount)
}