func()

in internal/vfs/zip/archive.go [112:171]


func (a *zipArchive) readArchive(url string) {
	defer close(a.done)

	// readArchive with a timeout separate from openArchive's
	ctx, cancel := context.WithTimeout(context.Background(), a.openTimeout)
	defer cancel()

	a.resource, a.err = httprange.NewResource(ctx, url, a.fs.httpClient)
	if a.err != nil {
		log.WithFields(log.Fields{
			"archive_url": url,
		}).WithError(a.err).Infoln("read zip archive request failed")
		metrics.ZipOpened.WithLabelValues("error").Inc()
		return
	}

	var archive *zip.Reader

	// load all archive files into memory using a cached ranged reader
	a.reader = httprange.NewRangedReader(a.resource)
	a.reader.WithCachedReader(ctx, func() {
		archive, a.err = zip.NewReader(a.reader, a.resource.Size)
	})

	if archive == nil || a.err != nil {
		log.WithFields(log.Fields{
			"archive_url": url,
		}).WithError(a.err).Infoln("loading zip archive files into memory failed")
		metrics.ZipOpened.WithLabelValues("error").Inc()
		return
	}

	// TODO: Improve preprocessing of zip archives https://gitlab.com/gitlab-org/gitlab-pages/-/issues/432
	for _, file := range archive.File {
		// Each Modified timestamp contains a pointer to a unique timezone
		// object. This wastes a lot of memory. By setting the timezone to UTC on
		// each timestamp, we allow the unique timezone objects to be
		// garbage-collected. Also see
		// https://gitlab.com/gitlab-org/gitlab-pages/-/issues/702.
		file.Modified = file.Modified.UTC()

		if file.Mode().IsDir() {
			a.directories[file.Name] = &file.FileHeader
		} else {
			a.files[file.Name] = file
		}

		a.addPathDirectory(file.Name)
	}

	// Each file stores a pointer to the zip.reader.
	// The file slice is not used so we null it out
	// to reduce memory consumption.
	archive.File = nil

	fileCount := float64(len(a.files))
	metrics.ZipOpened.WithLabelValues("ok").Inc()
	metrics.ZipOpenedEntriesCount.Add(fileCount)
	metrics.ZipArchiveEntriesCached.Add(fileCount)
}