internal/extraction/extraction.go (155 lines of code) (raw):
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.
package extraction
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
"strings"
"github.com/elastic/eck-diagnostics/internal/archive"
"github.com/elastic/eck-diagnostics/internal/log"
)
var logger = log.Logger
// RemoteSource describes a remote (i.e. in-cluster) source of diagnostic data in a Pod that has run the Elastic
// stack support-diagnostics and is waiting for the diagnostic data to be extracted.
type RemoteSource struct {
Namespace string // de-normalized for convenience
PodName string
Typ string
ResourceName string
PodOutputDir string
}
// sourceDirPrefix the directory prefix the stack support-diagnostics tool uses in the archive it creates.
func (j *RemoteSource) sourceDirPrefix() string {
prefix := "api-diagnostics"
switch j.Typ {
case "kibana", "logstash":
prefix = fmt.Sprintf("%s-%s", j.Typ, prefix)
case "agent":
prefix = ""
}
return prefix
}
// outputDirPrefix the directory hierarchy we want to use in the archive created by this tool. It should be the Namespace
// of the resource we are creating diagnostics for followed by the type (elasticsearch or kibana currently) and the name
// of the resource.
func (j *RemoteSource) outputDirPrefix() string {
return archive.Path(j.Namespace, j.Typ, j.ResourceName)
}
// UntarIntoZip extracts the files transferred via tar from the Pod into the given ZipFile.
func UntarIntoZip(reader *io.PipeReader, job RemoteSource, file *archive.ZipFile, verbose bool) error {
tarReader := tar.NewReader(reader)
for {
header, err := tarReader.Next()
if err != nil {
if !errors.Is(err, io.EOF) {
return err
}
break
}
remoteFilename := header.Name
// remove the path prefix on the Pod
relOutputDir := fmt.Sprintf("%s/", strings.TrimPrefix(job.PodOutputDir, "/"))
relativeFilename := strings.TrimPrefix(remoteFilename, relOutputDir)
// stack diagnostics create output in a directory called api-diagnostics-{{.Timestamp}}
if !strings.HasPrefix(relativeFilename, job.sourceDirPrefix()) {
if verbose {
logger.Printf("Ignoring file %s in tar from %s diagnostics\n", header.Name, job.ResourceName)
}
continue
}
manifest := archive.StackDiagnosticManifest{DiagType: job.Typ}
switch {
case strings.HasSuffix(relativeFilename, "tar.gz"):
manifest.DiagPath = job.outputDirPrefix()
if err := RepackageTarGzip(tarReader, job.outputDirPrefix(), file); err != nil {
return err
}
case strings.HasSuffix(relativeFilename, ".zip"):
manifest.DiagPath = job.outputDirPrefix()
if err := RepackageZip(tarReader, job.outputDirPrefix(), file); err != nil {
return err
}
default:
path := archive.Path(job.Namespace, job.Typ, job.ResourceName, relativeFilename)
manifest.DiagPath = path
out, err := file.Create(path)
if err != nil {
return err
}
// accept decompression bomb for CLI as we control the src
if _, err := io.Copy(out, tarReader); err != nil { //nolint:gosec
return err
}
}
file.AddManifestEntry(manifest)
}
return nil
}
// RepackageTarGzip repackages the *.tar.gz archives produced by the support diagnostics tool into the given ZipFile.
func RepackageTarGzip(in io.Reader, outputDirPrefix string, zipFile *archive.ZipFile) error {
gzReader, err := gzip.NewReader(in)
if err != nil {
return err
}
topLevelDir := ""
tarReader := tar.NewReader(gzReader)
for {
header, err := tarReader.Next()
if err != nil {
if !errors.Is(err, io.EOF) {
return err
}
break
}
switch header.Typeflag {
case tar.TypeDir:
if topLevelDir == "" {
topLevelDir = header.Name
}
continue
case tar.TypeReg:
out, err := zipFile.Create(toOutputPath(header.Name, topLevelDir, outputDirPrefix))
if err != nil {
return err
}
// accept decompression bomb for CLI tool as we control the src
_, err = io.Copy(out, tarReader) //nolint:gosec
if err != nil {
return err
}
}
}
return nil
}
// RepackageZip repackages the *.zip file produced by the support diagnostics tool into the zip file produced by this tool.
func RepackageZip(in io.Reader, outputDirPrefix string, zipFile *archive.ZipFile) error {
// it seems the only way to repack a zip archive is to completely read it into memory first
b := new(bytes.Buffer)
if _, err := b.ReadFrom(in); err != nil {
return err
}
zipReader, err := zip.NewReader(bytes.NewReader(b.Bytes()), int64(b.Len()))
if err != nil {
return err
}
// api-diagnostics creates a common top folder we don't need when repackaging
topLevelDir := ""
for _, f := range zipReader.File {
// skip all the directory entries
if f.UncompressedSize64 == 0 {
continue
}
// extract the tld first time round
if topLevelDir == "" {
topLevelDir = archive.RootDir(f.Name)
}
out, err := zipFile.Create(toOutputPath(f.Name, topLevelDir, outputDirPrefix))
if err != nil {
return err
}
if err := copyFromZip(f, out); err != nil {
return err
}
}
return nil
}
// copyFromZip writes the contents of file f from a zip file into out.
func copyFromZip(f *zip.File, out io.Writer) error {
rc, err := f.Open()
if err != nil {
return err
}
defer rc.Close()
if _, err := io.Copy(out, rc); err != nil { //nolint:gosec
return err
}
return nil
}
// toOutputPath removes the path prefix topLevelDir from original and re-bases it in outputDirPrefix.
func toOutputPath(original, topLevelDir, outputDirPrefix string) string {
// topLevelDir should always be a Unix-style path like /api-diagnostics-20210907-133527 so a simple trim should suffice
// and avoids filepath.* functions that would insert platform specific path elements that are incompatible with
// the ZIP format.
return archive.Path(outputDirPrefix, strings.TrimPrefix(original, topLevelDir))
}