metric/system/cgroup/cgv2/memory.go (215 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package cgv2 import ( "bufio" "bytes" "errors" "fmt" "os" "path/filepath" "reflect" "strings" "github.com/elastic/elastic-agent-libs/opt" "github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup/cgcommon" ) // MemorySubsystem contains the metrics and limits from the "memory" subsystem. type MemorySubsystem struct { ID string `json:"id,omitempty"` // ID of the cgroup. Path string `json:"path,omitempty"` // Path to the cgroup relative to the cgroup subsystem's mountpoint. Mem MemoryData `json:"mem" struct:"mem"` // Memory usage by tasks in this cgroup. MemSwap MemoryData `json:"memsw" struct:"memsw"` // Memory plus swap usage by tasks in this cgroup. Stats MemoryStat `json:"stats" struct:"stats"` // A wide range of memory statistics. } // MemoryData contains basic metrics for the V2 controller type MemoryData struct { Events Events `json:"events" struct:"events"` Usage opt.Bytes `json:"usage" struct:"usage"` Low opt.Bytes `json:"low" struct:"low"` High opt.BytesOpt `json:"high,omitempty" struct:"high,omitempty"` Max opt.BytesOpt `json:"max,omitempty" struct:"max,omitempty"` } // Events contains the data from *.events in the memory controller type Events struct { Low opt.Uint `json:"low,omitempty" struct:"low,omitempty"` High uint64 `json:"high" struct:"high"` Max uint64 `json:"max" struct:"max"` OOM opt.Uint `json:"oom,omitempty" struct:"oom,omitempty"` OOMKill opt.Uint `json:"oom_kill,omitempty" struct:"oom_kill,omitempty"` Fail opt.Uint `json:"fail,omitempty" struct:"fail,omitempty"` } // MemoryStat holds detailed stats for the memory controller type MemoryStat struct { //Amount of memory used in anonymous mappings Anon opt.Bytes `json:"anon" struct:"anon" orig:"anon"` //Amount of memory used to cache filesystem data, including tmpfs and shared memory. File opt.Bytes `json:"file" struct:"file" orig:"file"` // Amount of memory allocated to kernel stacks. KernelStack opt.Bytes `json:"kernel_stack" struct:"kernel_stack" orig:"kernel_stack"` //Amount of memory allocated for page tables. Pagetables opt.Bytes `json:"page_tables" struct:"page_tables" orig:"pagetables"` // Amount of memory used for storing per-cpu kernel data structures. PerCPU opt.Bytes `json:"per_cpu" struct:"per_cpu" orig:"percpu"` // Amount of memory used in network transmission buffers Sock opt.Bytes `json:"sock" struct:"sock" orig:"sock"` // Amount of cached filesystem data that is swap-backed, such as tmpfs, shm segments, shared anonymous mmap()s Shmem opt.Bytes `json:"shmem" struct:"shmem" orig:"shmem"` // Amount of cached filesystem data mapped with mmap() FileMapped opt.Bytes `json:"file_mapped" struct:"file_mapped" orig:"file_mapped"` //Amount of cached filesystem data that was modified but not yet written back to disk FileDirty opt.Bytes `json:"file_dirty" struct:"file_dirty" orig:"file_dirty"` // Amount of cached filesystem data that was modified and is currently being written back to disk FileWriteback opt.Bytes `json:"file_writeback" struct:"file_writeback" orig:"file_writeback"` // Amount of swap cached in memory. The swapcache is accounted against both memory and swap usage. SwapCached opt.Bytes `json:"swap_cached" struct:"swap_cached" orig:"swapcached"` // Amount of memory used in anonymous mappings backed by transparent hugepages AnonTHP opt.Bytes `json:"anon_thp" struct:"anon_thp" orig:"anon_thp"` // Amount of cached filesystem data backed by transparent hugepages FileTHP opt.Bytes `json:"file_thp" struct:"file_thp" orig:"file_thp"` // Amount of shm, tmpfs, shared anonymous mmap()s backed by transparent hugepages ShmemTHP opt.Bytes `json:"shmem_thp" struct:"shmem_thp" orig:"shmem_thp"` // Anonymous and swap cache on inactive LRU list, including tmpfs (shmem), in bytes. InactiveAnon opt.Bytes `json:"inactive_anon" struct:"inactive_anon" orig:"inactive_anon"` // Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs (shmem), in bytes. ActiveAnon opt.Bytes `json:"active_anon" struct:"active_anon" orig:"active_anon"` // File-backed memory on inactive LRU list, in bytes. InactiveFile opt.Bytes `json:"inactive_file" struct:"inactive_file" orig:"inactive_file"` // File-backed memory on active LRU list, in bytes. ActiveFile opt.Bytes `json:"active_file" struct:"active_file" orig:"active_file"` // Memory that cannot be reclaimed, in bytes. Unevictable opt.Bytes `json:"unevictable" struct:"unevictable" orig:"unevictable"` // Part of "slab" that might be reclaimed, such as dentries and inodes. SlabReclaimable opt.Bytes `json:"slab_reclaimable" struct:"slab_reclaimable" orig:"slab_reclaimable"` // Part of "slab" that cannot be reclaimed on memory pressure. SlabUnreclaimable opt.Bytes `json:"slab_unreclaimable" struct:"slab_unreclaimable" orig:"slab_unreclaimable"` // Amount of memory used for storing in-kernel data structures. Slab opt.Bytes `json:"slab" struct:"slab" orig:"slab"` // Number of refaults of previously evicted anonymous pages. WorkingSetRefaultAnon uint64 `json:"workingset_refault_anon" struct:"workingset_refault_anon" orig:"workingset_refault_anon"` // Number of refaults of previously evicted file pages. WorkingSetRefaultFile uint64 `json:"workingset_refault_file" struct:"workingset_refault_file" orig:"workingset_refault_file"` // Number of refaulted anonymous pages that were immediately activated. WorkingSetActivateAnon uint64 `json:"workingset_activate_anon" struct:"workingset_activate_anon" orig:"workingset_activate_anon"` // Number of refaulted file pages that were immediately activated. WorkingSetActivateFile uint64 `json:"workingset_activate_file" struct:"workingset_activate_file" orig:"workingset_activate_file"` // Number of restored anonymous pages which have been detected as an active workingset before they got reclaimed. WorkingSetRestoreAnon uint64 `json:"workingset_restore_anon" struct:"workingset_restore_anon" orig:"workingset_restore_anon"` // Number of restored file pages which have been detected as an active workingset before they got reclaimed. WorkingSetRestoreFile uint64 `json:"workingset_restore_file" struct:"workingset_restore_file" orig:"workingset_restore_file"` // Number of times a shadow node has been reclaimed WorkingSetNodeReclaim uint64 `json:"workingset_node_reclaim" struct:"workingset_node_reclaim" orig:"workingset_nodereclaim"` //Total number of page faults incurred PageFaults uint64 `json:"page_faults" struct:"page_faults" orig:"pgfault"` // Number of times a task in the cgroup triggered a major page fault. MajorPageFaults uint64 `json:"major_page_faults" struct:"major_page_faults" orig:"pgmajfault"` // Amount of scanned pages (in an active LRU list) PageRefill uint64 `json:"page_refill" struct:"page_refill" orig:"pgrefill"` // Amount of scanned pages (in an inactive LRU list) PageScan uint64 `json:"page_scan" struct:"page_scan" orig:"pgscan"` // Amount of reclaimed pages PageSteal uint64 `json:"page_steal" struct:"page_steal" orig:"pgsteal"` //Amount of pages moved to the active LRU list PageActivate uint64 `json:"page_activate" struct:"page_activate" orig:"pgactivate"` // Amount of pages moved to the inactive LRU list PageDeactivate uint64 `json:"page_deactivate" struct:"page_deactivate" orig:"pgdeactivate"` // Amount of pages postponed to be freed under memory pressure PageLazyFree uint64 `json:"page_lazy_free" struct:"page_lazy_free" orig:"pglazyfree"` // Amount of reclaimed lazyfree pages PageLazyFreed uint64 `json:"page_lazy_freed" struct:"page_lazy_freed" orig:"pglazyfreed"` // Number of transparent hugepages which were allocated to satisfy a page fault. THPFaultAlloc uint64 `json:"thp_fault_alloc" struct:"thp_fault_alloc" orig:"thp_fault_alloc"` // Number of transparent hugepages which were allocated to allow collapsing an existing range of pages. THPCollapseAlloc uint64 `json:"htp_collapse_alloc" struct:"htp_collapse_alloc" orig:"thp_collapse_alloc"` } // Get fetches memory subsystem metrics for V2 cgroups func (mem *MemorySubsystem) Get(path string) error { var err error mem.Mem, err = memoryData(path, "memory") if err != nil { return fmt.Errorf("error reading memory stats: %w", err) } mem.MemSwap, err = memoryData(path, "memory.swap") if err != nil { return fmt.Errorf("error reading memory.swap stats: %w", err) } mem.Stats, err = fillStatStruct(path) if err != nil { return fmt.Errorf("error fetching memory.stat: %w", err) } return nil } // memoryData reads off the the auxiliary memory stats from the memory controller func memoryData(path, file string) (MemoryData, error) { // root cgroups won't have these files. // If .high doesn't exist, assume the rest don't either. _, err := os.Stat(filepath.Join(path, file+".high")) if errors.Is(err, os.ErrNotExist) { return MemoryData{}, nil } data := MemoryData{} // High and max can be set to "max", which means "off" lowMetric, err := cgcommon.ParseUintFromFile(filepath.Join(path, file+".low")) if err != nil { return data, fmt.Errorf("error reading %s.low file: %w", file, err) } highMetric, err := maxOrValue(path, file+".high") if err != nil { return data, fmt.Errorf("error parsing %s.high file: %w", file, err) } maxMetric, err := maxOrValue(path, file+".max") if err != nil { return data, fmt.Errorf("error parsing %s.max file: %w", file, err) } currentMetric, err := cgcommon.ParseUintFromFile(filepath.Join(path, file+".current")) if err != nil { return data, fmt.Errorf("error reading %s.current file: %w", file, err) } data.Low.Bytes = lowMetric data.High.Bytes = highMetric data.Max.Bytes = maxMetric data.Usage.Bytes = currentMetric data.Events, err = fetchEventsFile(path, file+".events") if err != nil { return data, fmt.Errorf("error fetching events file for %s: %w", file, err) } return data, nil } // fetch memory.events contents func fetchEventsFile(path, file string) (Events, error) { evt := Events{} toRead := filepath.Join(path, file) f, err := os.Open(toRead) if err != nil { return evt, fmt.Errorf("error reading %s: %w", toRead, err) } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { key, val, err := cgcommon.ParseCgroupParamKeyValue(sc.Text()) if err != nil { return evt, fmt.Errorf("error parsing key from events: %w", err) } switch key { case "low": evt.Low = opt.UintWith(val) case "high": evt.High = val case "max": evt.Max = val case "oom": evt.OOM = opt.UintWith(val) case "oom_kill": evt.OOMKill = opt.UintWith(val) case "fail": evt.Fail = opt.UintWith(val) } } return evt, nil } // Some values, such as mem.max and mem.high, can be set to "max," which disables the metric. func maxOrValue(path, file string) (opt.Uint, error) { var finalMetric opt.Uint highRaw, err := os.ReadFile(filepath.Join(path, file)) if err != nil { return finalMetric, fmt.Errorf("error reading %s.high file: %w", path, err) } if strings.TrimSpace(string(highRaw)) == "max" { finalMetric = opt.NewUintNone() } else { highUint, err := cgcommon.ParseUint(highRaw) if err != nil { return finalMetric, fmt.Errorf("error parsing raw high value: %v: %w", highRaw, err) } finalMetric = opt.UintWith(highUint) } return finalMetric, nil } // fillStatStruct iteratively fills out the MemoryStat struct // This works via reflection, and it's a tad ugly, but we also have a lot of fields to fill // Note that this assumes all the values in the struct are either `uint64`, `opt.Bytes` or `opt.BytesOpt` func fillStatStruct(path string) (MemoryStat, error) { statPath := filepath.Join(path, "memory.stat") raw, err := os.ReadFile(statPath) if err != nil { return MemoryStat{}, fmt.Errorf("error reading memory.stat: %w", err) } stats := MemoryStat{} refValues := reflect.ValueOf(&stats).Elem() refTypes := reflect.TypeOf(stats) sc := bufio.NewScanner(bytes.NewReader(raw)) for sc.Scan() { //break apart the lines parts := bytes.SplitN(sc.Bytes(), []byte(" "), 2) if len(parts) != 2 { continue } intVal, err := cgcommon.ParseUint(parts[1]) if err != nil { return stats, fmt.Errorf("error parsing value %v: %w", parts[1], err) } for i := 0; i < refValues.NumField(); i++ { idxVal := refValues.Field(i) idxType := refTypes.Field(i) tagStr := idxType.Tag.Get("orig") if tagStr == string(parts[0]) { if idxVal.CanSet() { if idxVal.Kind() == reflect.Uint64 { idxVal.SetUint(intVal) } else if idxType.Type == reflect.TypeOf(opt.Bytes{}) { byteVal := opt.Bytes{Bytes: intVal} byteRef := reflect.ValueOf(byteVal) idxVal.Set(byteRef) } else if idxType.Type == reflect.TypeOf(opt.BytesOpt{}) { byteVal := opt.BytesOpt{Bytes: opt.UintWith(intVal)} byteRef := reflect.ValueOf(byteVal) idxVal.Set(byteRef) } } } } } return stats, nil }