internal/pkg/diagnostics/diagnostics.go (519 lines of code) (raw):

// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one // or more contributor license agreements. Licensed under the Elastic License 2.0; // you may not use this file except in compliance with the Elastic License 2.0. package diagnostics import ( "archive/zip" "bytes" "context" "errors" "fmt" "io" "io/fs" "os" "path/filepath" "reflect" "runtime/pprof" "strings" "time" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/go-ucfg" "gopkg.in/yaml.v3" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/release" "github.com/elastic/elastic-agent/pkg/component" "github.com/elastic/elastic-agent/version" ) const ( // ContentTypeDirectory should be used to indicate that a directory should be made in the resulting bundle ContentTypeDirectory = "directory" // REDACTED is used to replace sensative fields REDACTED = "<REDACTED>" agentName = "elastic-agent" ) // DiagCPU* are contstants to describe the CPU profile that is collected when the --cpu-profile flag is used with the diagnostics command, or the diagnostics action contains "CPU" in the additional_metrics list. const ( DiagCPUName = "cpuprofile" DiagCPUFilename = "cpu.pprof" DiagCPUDescription = "CPU profile" DiagCPUContentType = "application/octet-stream" DiagCPUDuration = 30 * time.Second ) // Hook is a hook that gets used when diagnostic information is requested from the Elastic Agent. type Hook struct { Name string Filename string Description string ContentType string Hook func(ctx context.Context) []byte } // Hooks is a set of diagnostic hooks. type Hooks []Hook // GlobalHooks returns the global hooks that can be used at anytime with no other references. func GlobalHooks() Hooks { return Hooks{ { Name: "version", Filename: "version.txt", Description: "version information", ContentType: "application/yaml", Hook: func(_ context.Context) []byte { v := release.Info() o, err := yaml.Marshal(v) if err != nil { return []byte(fmt.Sprintf("error: %q", err)) } return o }, }, { Name: "package version", Filename: "package.version", Description: "Package Version", ContentType: "text/plain", Hook: func(_ context.Context) []byte { pkgVersionPath, err := version.GetAgentPackageVersionFilePath() if err != nil { return []byte(fmt.Sprintf("error: %q", err)) } fileBytes, err := os.ReadFile(pkgVersionPath) if err != nil { return []byte(fmt.Sprintf("error: %q", err)) } return fileBytes }, }, { Name: "goroutine", Filename: "goroutine.pprof.gz", Description: "stack traces of all current goroutines", ContentType: "application/octet-stream", Hook: pprofDiag("goroutine", 0), }, { Name: "heap", Filename: "heap.pprof.gz", Description: "a sampling of memory allocations of live objects", ContentType: "application/octet-stream", Hook: pprofDiag("heap", 0), }, { Name: "allocs", Filename: "allocs.pprof.gz", Description: "a sampling of all past memory allocations", ContentType: "application/octet-stream", Hook: pprofDiag("allocs", 0), }, { Name: "threadcreate", Filename: "threadcreate.pprof.gz", Description: "stack traces that led to the creation of new OS threads", ContentType: "application/octet-stream", Hook: pprofDiag("threadcreate", 0), }, { Name: "block", Filename: "block.pprof.gz", Description: "stack traces that led to blocking on synchronization primitives", ContentType: "application/octet-stream", Hook: pprofDiag("block", 0), }, { Name: "mutex", Filename: "mutex.pprof.gz", Description: "stack traces of holders of contended mutexes", ContentType: "application/octet-stream", Hook: pprofDiag("mutex", 0), }, } } func pprofDiag(name string, debug int) func(context.Context) []byte { return func(_ context.Context) []byte { var w bytes.Buffer err := pprof.Lookup(name).WriteTo(&w, debug) if err != nil { // error is returned as the content return []byte(fmt.Sprintf("failed to write pprof to bytes buffer: %s", err)) } return w.Bytes() } } // CreateCPUProfile will gather a CPU profile over a given time duration. func CreateCPUProfile(ctx context.Context, period time.Duration) ([]byte, error) { var writeBuf bytes.Buffer err := pprof.StartCPUProfile(&writeBuf) if err != nil { return nil, fmt.Errorf("error starting CPU profile: %w", err) } tc := time.After(period) select { case <-ctx.Done(): pprof.StopCPUProfile() return nil, ctx.Err() case <-tc: break } pprof.StopCPUProfile() return writeBuf.Bytes(), nil } // ZipArchive creates a zipped diagnostics bundle using the passed writer with the passed diagnostics and local logs. // If any error is encountered when writing the contents of the archive it is returned. func ZipArchive( errOut, w io.Writer, topPath string, agentDiag []client.DiagnosticFileResult, unitDiags []client.DiagnosticUnitResult, compDiags []client.DiagnosticComponentResult, excludeEvents bool) error { ts := time.Now().UTC() zw := zip.NewWriter(w) defer zw.Close() // Write agent diagnostics content for _, ad := range agentDiag { zf, err := zw.CreateHeader(&zip.FileHeader{ Name: ad.Filename, Method: zip.Deflate, Modified: ad.Generated, }) if err != nil { return fmt.Errorf("error creating header for agent diagnostics: %w", err) } err = writeRedacted(errOut, zf, ad.Filename, ad) if err != nil { return fmt.Errorf("error writing file for agent diagnostics: %w", err) } } // Handle unit diagnostics // structure each unit into its own component directory compDirs := make(map[string][]client.DiagnosticUnitResult) for _, ud := range unitDiags { compDir := strings.ReplaceAll(ud.ComponentID, "/", "-") compDirs[compDir] = append(compDirs[compDir], ud) } componentResults := map[string]client.DiagnosticComponentResult{} // handle component diagnostics for _, comp := range compDiags { compDir := strings.ReplaceAll(comp.ComponentID, "/", "-") componentResults[compDir] = comp } // write each units diagnostics into its own directory // layout becomes components/<component-id>/<unit-id>/<filename> _, err := zw.CreateHeader(&zip.FileHeader{ Name: "components/", Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("error creating .zip header for components/ directory: %w", err) } // iterate over components for dirName, units := range compDirs { _, err := zw.CreateHeader(&zip.FileHeader{ Name: fmt.Sprintf("components/%s/", dirName), Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("error creating .zip header for component directory: %w", err) } // create component diags if comp, ok := componentResults[dirName]; ok { // check for component-level errors if comp.Err != nil { err = writeErrorResult(zw, fmt.Sprintf("components/%s/error.txt", dirName), comp.Err.Error()) if err != nil { return fmt.Errorf("error while writing error result for component %s: %w", comp.ComponentID, err) } } else { for _, res := range comp.Results { filePath := fmt.Sprintf("components/%s/%s", dirName, res.Filename) resFileWriter, err := zw.CreateHeader(&zip.FileHeader{ Name: filePath, Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("error creating .zip header for %s: %w", res.Filename, err) } err = writeRedacted(errOut, resFileWriter, filePath, res) if err != nil { return fmt.Errorf("error writing %s in zip file: %w", res.Filename, err) } } } } // create unit diags for _, ud := range units { unitDir := strings.ReplaceAll(strings.TrimPrefix(ud.UnitID, ud.ComponentID+"-"), "/", "-") _, err := zw.CreateHeader(&zip.FileHeader{ Name: fmt.Sprintf("components/%s/%s/", dirName, unitDir), Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("error creating .zip header for unit directory: %w", err) } // check for unit-level errors if ud.Err != nil { err = writeErrorResult(zw, fmt.Sprintf("components/%s/%s/error.txt", dirName, unitDir), ud.Err.Error()) if err != nil { return fmt.Errorf("error while writing error result for unit %s: %w", ud.UnitID, err) } continue } for _, fr := range ud.Results { filePath := fmt.Sprintf("components/%s/%s/%s", dirName, unitDir, fr.Filename) w, err := zw.CreateHeader(&zip.FileHeader{ Name: filePath, Method: zip.Deflate, Modified: fr.Generated, }) if err != nil { return err } err = writeRedacted(errOut, w, filePath, fr) if err != nil { return err } } } } // Gather Logs: return zipLogs(zw, ts, topPath, excludeEvents) } func writeErrorResult(zw *zip.Writer, path string, errBody string) error { ts := time.Now().UTC() w, err := zw.CreateHeader(&zip.FileHeader{ Name: path, Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("error writing header for error.txt file for component: %w", err) } _, err = w.Write([]byte(fmt.Sprintf("%s\n", errBody))) if err != nil { return fmt.Errorf("error writing error.txt file for component: %w", err) } return nil } func writeRedacted(errOut, resultWriter io.Writer, fullFilePath string, fileResult client.DiagnosticFileResult) error { out := &fileResult.Content // Should we support json too? if fileResult.ContentType == "application/yaml" { var unmarshalled any err := yaml.Unmarshal(fileResult.Content, &unmarshalled) if err != nil { // Best effort, output a warning but still include the file fmt.Fprintf(errOut, "[WARNING] Could not redact %s due to unmarshalling error: %s\n", fullFilePath, err) } else { switch t := unmarshalled.(type) { // could be a plain string, we only redact if this is a proper map case map[string]any: t = Redact(t, errOut) redacted, err := yaml.Marshal(t) if err != nil { // Best effort, output a warning but still include the file fmt.Fprintf(errOut, "[WARNING] Could not redact %s due to marshalling error: %s\n", fullFilePath, err) } else { out = &redacted } default: } } } _, err := resultWriter.Write(*out) return err } // redactMap sensitive values from the underlying map // the whole generic function here is out of paranoia. Although extremely unlikely, // we have no way of guaranteeing we'll get a "normal" map[string]interface{}, // since the diagnostic interface is a bit of a free-for-all func redactMap[K comparable](errOut io.Writer, inputMap map[K]interface{}) map[K]interface{} { if inputMap == nil { return nil } for rootKey, rootValue := range inputMap { if rootValue != nil { switch cast := rootValue.(type) { case map[string]interface{}: rootValue = redactMap(errOut, cast) case map[interface{}]interface{}: rootValue = redactMap(errOut, cast) case map[int]interface{}: rootValue = redactMap(errOut, cast) case string: if keyString, ok := any(rootKey).(string); ok { if redactKey(keyString) { rootValue = REDACTED } } default: // in cases where we got some weird kind of map we couldn't parse, print a warning if reflect.TypeOf(rootValue).Kind() == reflect.Map { fmt.Fprintf(errOut, "[WARNING]: file may be partly redacted, could not cast value %v of type %T", rootKey, rootValue) } } } inputMap[rootKey] = rootValue } return inputMap } func redactKey(k string) bool { // "routekey" shouldn't be redacted. // Add any other exceptions here. if k == "routekey" { return false } k = strings.ToLower(k) return strings.Contains(k, "certificate") || strings.Contains(k, "passphrase") || strings.Contains(k, "password") || strings.Contains(k, "token") || strings.Contains(k, "key") || strings.Contains(k, "secret") } func zipLogs(zw *zip.Writer, ts time.Time, topPath string, excludeEvents bool) error { homePath := paths.HomeFrom(topPath) dataPath := paths.DataFrom(topPath) currentDir := filepath.Base(homePath) if !paths.IsVersionHome() { // running in a container with custom top path set // logs are directly under top path return zipLogsWithPath(homePath, currentDir, true, excludeEvents, zw, ts) } dataDir, err := os.Open(dataPath) if err != nil { return err } defer dataDir.Close() subdirs, err := dataDir.Readdirnames(0) if err != nil { return err } dirPrefix := fmt.Sprintf("%s-", agentName) for _, dir := range subdirs { if !strings.HasPrefix(dir, dirPrefix) { continue } collectServices := dir == currentDir path := filepath.Join(dataPath, dir) if err := zipLogsWithPath(path, dir, collectServices, excludeEvents, zw, ts); err != nil { return err } } return nil } // zipLogs walks paths.Logs() and copies the file structure into zw in "logs/" func zipLogsWithPath(pathsHome, commitName string, collectServices, excludeEvents bool, zw *zip.Writer, ts time.Time) error { _, err := zw.CreateHeader(&zip.FileHeader{ Name: "logs/", Method: zip.Deflate, Modified: ts, }) if err != nil { return err } if collectServices { if err := collectServiceComponentsLogs(zw); err != nil { return fmt.Errorf("failed to collect endpoint-security logs: %w", err) } } _, err = zw.CreateHeader(&zip.FileHeader{ Name: "logs/" + commitName + "/", Method: zip.Deflate, Modified: ts, }) if err != nil { return err } // using Data() + "/logs", for some reason default paths/Logs() is the home dir... logPath := filepath.Join(pathsHome, "logs") + string(filepath.Separator) return filepath.WalkDir(logPath, func(path string, d fs.DirEntry, fErr error) error { if errors.Is(fErr, fs.ErrNotExist) { return nil } if fErr != nil { return fmt.Errorf("unable to walk log dir: %w", fErr) } // name is the relative dir/file name replacing any filepath seperators with / // this will clean log names on windows machines and will nop on *nix name := filepath.ToSlash(strings.TrimPrefix(path, logPath)) if name == "" { return nil } // Skip events logs, if necessary // name can either be the folder name 'events' or the folder plus // the file name like 'events/elastic-agent-events-log.ndjson' // we need to skip both. if excludeEvents && strings.HasPrefix(name, "events") { return nil } name = filepath.Join(commitName, name) if d.IsDir() { _, err := zw.CreateHeader(&zip.FileHeader{ Name: "logs/" + filepath.ToSlash(name) + "/", Method: zip.Deflate, Modified: ts, }) if err != nil { return fmt.Errorf("unable to create log directory in archive: %w", err) } return nil } return saveLogs(name, path, zw) }) } func collectServiceComponentsLogs(zw *zip.Writer) error { platform, err := component.LoadPlatformDetail() if err != nil { return fmt.Errorf("failed to gather system information: %w", err) } specs, err := component.LoadRuntimeSpecs(paths.Components(), platform) if err != nil { return fmt.Errorf("failed to detect inputs and outputs: %w", err) } for _, spec := range specs.ServiceSpecs() { if spec.Spec.Service.Log == nil || spec.Spec.Service.Log.Path == "" { // no log path set in specification continue } logPath := filepath.Dir(spec.Spec.Service.Log.Path) + string(filepath.Separator) err = filepath.WalkDir(logPath, func(path string, d fs.DirEntry, fErr error) error { if fErr != nil { if errors.Is(fErr, fs.ErrNotExist) { return nil } return fmt.Errorf("unable to walk log directory %q for service input %s: %w", logPath, spec.InputType, fErr) } name := filepath.ToSlash(strings.TrimPrefix(path, logPath)) if name == "" { return nil } if d.IsDir() { return nil } return saveLogs("services/"+name, path, zw) }) if err != nil { return err } } return nil } func saveLogs(name string, logPath string, zw *zip.Writer) error { ts := time.Now().UTC() lf, err := os.Open(logPath) if err != nil { return fmt.Errorf("unable to open log file: %w", err) } defer lf.Close() if li, err := lf.Stat(); err == nil { ts = li.ModTime() } zf, err := zw.CreateHeader(&zip.FileHeader{ Name: "logs/" + filepath.ToSlash(name), Method: zip.Deflate, Modified: ts, }) if err != nil { return err } _, err = io.Copy(zf, lf) if err != nil { return err } return nil } // Redact redacts sensitive values from the passed mapStr. func Redact(mapStr map[string]any, errOut io.Writer) map[string]any { return redactMap(errOut, RedactSecretPaths(mapStr, errOut)) } // RedactSecretPaths will check the passed mapStr input for a secret_paths attribute. // If found it will replace the value for every key in the paths list with <REDACTED> and return the resulting map. // Any issues or errors will be written to the errOut writer. func RedactSecretPaths(mapStr map[string]any, errOut io.Writer) map[string]any { v, ok := mapStr["secret_paths"] if !ok { return mapStr } arr, ok := v.([]interface{}) if !ok { fmt.Fprintln(errOut, "No output redaction: secret_paths attribute is not a list.") return mapStr } cfg := ucfg.MustNewFrom(mapStr, ucfg.PathSep(".")) for _, v := range arr { key, ok := v.(string) if !ok { fmt.Fprintf(errOut, "No output redaction for %q: expected type string, is type %T.\n", v, v) continue } if ok, err := cfg.Has(key, -1, ucfg.PathSep(".")); err != nil { fmt.Fprintf(errOut, "Error redacting secret path %q: %v.\n", key, err) } else if ok { err := cfg.SetString(key, -1, REDACTED, ucfg.PathSep(".")) if err != nil { fmt.Fprintf(errOut, "No output redaction for %q: %v.\n", key, err) } } else { fmt.Fprintf(errOut, "Unable to find secret path %q for redaction.\n", key) } } result, err := config.MustNewConfigFrom(cfg).ToMapStr() if err != nil { return mapStr } return result }