metric/system/process/process_windows.go (349 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package process
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"syscall"
"unsafe"
xsyswindows "golang.org/x/sys/windows"
"github.com/elastic/elastic-agent-libs/opt"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
gowindows "github.com/elastic/go-windows"
"github.com/elastic/gosigar/sys/windows"
)
var (
ntQuerySystemInformation = ntdll.NewProc("NtQuerySystemInformation")
)
// FetchPids returns a map and array of pids
func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
pids, err := windows.EnumProcesses()
if err != nil {
return nil, nil, fmt.Errorf("EnumProcesses failed: %w", err)
}
procMap := make(ProcsMap, len(pids))
plist := make([]ProcState, 0, len(pids))
var wrappedErr error
// This is probably the only implementation that doesn't benefit from our
// little fillPid callback system. We'll need to iterate over everything
// manually.
for _, pid := range pids {
procMap, plist, err = procStats.pidIter(int(pid), procMap, plist)
wrappedErr = errors.Join(wrappedErr, err)
}
return procMap, plist, toNonFatal(wrappedErr)
}
// GetSelfPid is the darwin implementation; see the linux version in
// process_linux_common.go for more context.
func GetSelfPid(hostfs resolve.Resolver) (int, error) {
return os.Getpid(), nil
}
// GetInfoForPid returns basic info for the process
func GetInfoForPid(_ resolve.Resolver, pid int) (ProcState, error) {
var err error
var errs []error
state := ProcState{Pid: opt.IntWith(pid)}
if pid == 0 {
// we cannot open pid 0. Skip it and move forward.
// we will call getIdleMemory and getIdleProcessTime in FillPidMetrics()
state.Username = "NT AUTHORITY\\SYSTEM"
state.Name = "System Idle Process"
state.State = Running
return state, nil
}
name, err := getProcName(pid)
if err != nil {
errs = append(errs, fmt.Errorf("error fetching name: %w", err))
} else {
state.Name = name
}
// system/process doesn't need this here, but system/process_summary does.
status, err := getPidStatus(pid)
if err != nil {
errs = append(errs, fmt.Errorf("error fetching status: %w", err))
} else {
state.State = status
}
if err := errors.Join(errs...); err != nil {
return state, fmt.Errorf("could not get all information for PID %d: %w",
pid, err)
}
return state, nil
}
func FetchNumThreads(pid int) (int, error) {
targetProcessHandle, err := syscall.OpenProcess(
xsyswindows.PROCESS_QUERY_INFORMATION,
false,
uint32(pid))
if err != nil {
return 0, fmt.Errorf("OpenProcess failed for PID %d: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(targetProcessHandle)
}()
currentProcessHandle, err := syscall.GetCurrentProcess()
if err != nil {
return 0, fmt.Errorf("GetCurrentProcess failed for PID %d: %w", pid, err)
}
// The pseudo handle need not be closed when it is no longer
// needed, calling CloseHandle has no effect. Adding here to
// remind us to close any handles we open.
defer func() {
_ = syscall.CloseHandle(currentProcessHandle)
}()
var snapshotHandle syscall.Handle
err = PssCaptureSnapshot(targetProcessHandle, PSSCaptureThreads, 0, &snapshotHandle)
if err != nil {
return 0, fmt.Errorf("PssCaptureSnapshot failed for PID %d: %w", pid, err)
}
info := PssThreadInformation{}
buffSize := unsafe.Sizeof(info)
queryErr := PssQuerySnapshot(snapshotHandle, PssQueryThreadInformation, &info, uint32(buffSize))
freeErr := PssFreeSnapshot(currentProcessHandle, snapshotHandle)
if queryErr != nil || freeErr != nil {
//Join discards any nil errors
return 0, errors.Join(
fmt.Errorf("PssQuerySnapshot failed: %w", queryErr),
fmt.Errorf("PssFreeSnapshot failed: %w", freeErr))
}
return int(info.ThreadsCaptured), nil
}
// FillPidMetrics is the windows implementation
func FillPidMetrics(_ resolve.Resolver, pid int, state ProcState, _ func(string) bool) (ProcState, error) {
if pid == 0 {
// get metrics for idle process
return fillIdleProcess(state)
}
user, _ := getProcCredName(pid)
state.Username = user // we cannot access process token for system-owned protected processes
if ppid, err := getParentPid(pid); err == nil {
state.Ppid = opt.IntWith(ppid)
}
wss, size, err := procMem(pid)
if err != nil {
return state, fmt.Errorf("error fetching memory: %w", err)
}
state.Memory.Rss.Bytes = opt.UintWith(wss)
state.Memory.Size = opt.UintWith(size)
userTime, sysTime, startTime, err := getProcTimes(pid)
if err != nil {
return state, fmt.Errorf("error getting CPU times: %w", err)
}
state.CPU.System.Ticks = opt.UintWith(sysTime)
state.CPU.User.Ticks = opt.UintWith(userTime)
state.CPU.Total.Ticks = opt.UintWith(userTime + sysTime)
state.CPU.StartTime = unixTimeMsToTime(startTime)
return state, nil
}
// FillMetricsRequiringMoreAccess
// All calls that need more access rights than
// windows.PROCESS_QUERY_LIMITED_INFORMATION
func FillMetricsRequiringMoreAccess(pid int, state ProcState) (ProcState, error) {
argList, err := getProcArgs(pid)
if err != nil {
return state, fmt.Errorf("error fetching process args: %w", NonFatalErr{Err: err})
}
state.Args = argList
if numThreads, err := FetchNumThreads(pid); err != nil {
return state, fmt.Errorf("error fetching num threads: %w", NonFatalErr{Err: err})
} else {
state.NumThreads = opt.IntWith(numThreads)
}
return state, nil
}
func getProcArgs(pid int) ([]string, error) {
handle, err := syscall.OpenProcess(
windows.PROCESS_QUERY_LIMITED_INFORMATION|
windows.PROCESS_VM_READ,
false,
uint32(pid))
if err != nil {
return nil, fmt.Errorf("OpenProcess failed for PID %d: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
pbi, err := windows.NtQueryProcessBasicInformation(handle)
if err != nil {
return nil, fmt.Errorf("NtQueryProcessBasicInformation failed for PID %d: %w", pid, err)
}
userProcParams, err := windows.GetUserProcessParams(handle, pbi)
if err != nil {
return nil, fmt.Errorf("GetUserProcessParams failed for PID %d: %w", pid, err)
}
argsW, err := windows.ReadProcessUnicodeString(handle, &userProcParams.CommandLine)
if err != nil {
return nil, fmt.Errorf("ReadProcessUnicodeString failed for PID %d: %w", pid, err)
}
procList, err := windows.ByteSliceToStringSlice(argsW)
if err != nil {
return nil, fmt.Errorf("ByteSliceToStringSlice failed for PID %d: %w", pid, err)
}
return procList, nil
}
func getProcTimes(pid int) (uint64, uint64, uint64, error) {
handle, err := syscall.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return 0, 0, 0, fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
var cpu syscall.Rusage
if err := syscall.GetProcessTimes(handle, &cpu.CreationTime, &cpu.ExitTime, &cpu.KernelTime, &cpu.UserTime); err != nil {
return 0, 0, 0, fmt.Errorf("GetProcessTimes failed for pid=%v: %w", pid, err)
}
// Everything expects ticks, so we need to go some math.
return uint64(windows.FiletimeToDuration(&cpu.UserTime).Nanoseconds() / 1e6), uint64(windows.FiletimeToDuration(&cpu.KernelTime).Nanoseconds() / 1e6), uint64(cpu.CreationTime.Nanoseconds() / 1e6), nil
}
// procMem gets the memory usage for the given PID.
// The current implementation calls
// GetProcessMemoryInfo (https://learn.microsoft.com/en-us/windows/win32/api/psapi/nf-psapi-getprocessmemoryinfo)
// We only need `PROCESS_QUERY_LIMITED_INFORMATION` because we do not support
// Windows Server 2003 or Windows XP
func procMem(pid int) (uint64, uint64, error) {
handle, err := syscall.OpenProcess(
windows.PROCESS_QUERY_LIMITED_INFORMATION,
false,
uint32(pid))
if err != nil {
return 0, 0, fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
counters, err := windows.GetProcessMemoryInfo(handle)
if err != nil {
return 0, 0, fmt.Errorf("GetProcessMemoryInfo failed for pid=%v: %w", pid, err)
}
return uint64(counters.WorkingSetSize), uint64(counters.PrivateUsage), nil
}
// getProcName returns the process name associated with the PID.
func getProcName(pid int) (string, error) {
handle, err := syscall.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return "", fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
filename, err := windows.GetProcessImageFileName(handle)
//nolint:nilerr // safe to ignore this error
if err != nil {
if isNonFatal(err) {
// if we're able to open the handle but GetProcessImageFileName fails with access denied error,
// then the process doesn't have any executable associated with it.
return "", nil
}
return "", err
}
return filepath.Base(filename), nil
}
// getProcStatus returns the status of a process.
func getPidStatus(pid int) (PidState, error) {
handle, err := syscall.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return Unknown, fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
var exitCode uint32
err = syscall.GetExitCodeProcess(handle, &exitCode)
if err != nil {
return Unknown, fmt.Errorf("GetExitCodeProcess failed for pid=%v: %w", pid, err)
}
if exitCode == 259 { // still active
return Running, nil
}
return Sleeping, nil
}
// getParentPid returns the parent process ID of a process.
func getParentPid(pid int) (int, error) {
handle, err := syscall.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return 0, fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
procInfo, err := windows.NtQueryProcessBasicInformation(handle)
if err != nil {
return 0, fmt.Errorf("NtQueryProcessBasicInformation failed for pid=%v: %w", pid, err)
}
return int(procInfo.InheritedFromUniqueProcessID), nil
}
//nolint:unused // this is actually used while dereferencing the pointer, but results in lint failure.
type systemProcessInformation struct {
NextEntryOffset uint32
NumberOfThreads uint32
Reserved1 [48]byte
ImageName struct {
Length uint16
MaximumLength uint16
Buffer *uint16
}
BasePriority int32
UniqueProcessID xsyswindows.Handle
Reserved2 uintptr
HandleCount uint32
SessionID uint32
Reserved3 uintptr
PeakVirtualSize uint64
VirtualSize uint64
Reserved4 uint32
PeakWorkingSetSize uint64
WorkingSetSize uint64
Reserved5 uintptr
QuotaPagedPoolUsage uint64
Reserved6 uintptr
QuotaNonPagedPoolUsage uint64
PagefileUsage uint64
PeakPagefileUsage uint64
PrivatePageCount uint64
Reserved7 [6]int64
}
func getProcCredName(pid int) (string, error) {
handle, err := syscall.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return "", fmt.Errorf("OpenProcess failed for pid=%v: %w", pid, err)
}
defer func() {
_ = syscall.CloseHandle(handle)
}()
// Find process token via win32.
var token syscall.Token
err = syscall.OpenProcessToken(handle, syscall.TOKEN_QUERY, &token)
if err != nil {
return "", fmt.Errorf("OpenProcessToken failed for pid=%v: %w", pid, err)
}
// Close token to prevent handle leaks.
defer token.Close()
// Find the token user.
tokenUser, err := token.GetTokenUser()
if err != nil {
return "", fmt.Errorf("GetTokenInformation failed for pid=%v: %w", pid, err)
}
// Look up domain account by SID.
account, domain, _, err := tokenUser.User.Sid.LookupAccount("")
if err != nil {
sid, sidErr := tokenUser.User.Sid.String()
if sidErr != nil {
return "", fmt.Errorf("failed while looking up account name for pid=%v: %w", pid, err)
}
return "", fmt.Errorf("failed while looking up account name for SID=%v of pid=%v: %w", sid, pid, err)
}
return fmt.Sprintf(`%s\%s`, domain, account), nil
}
func getIdleProcessTime() (float64, float64, error) {
idle, kernel, user, err := gowindows.GetSystemTimes()
if err != nil {
return 0, 0, toNonFatal(err)
}
// Average by cpu because GetSystemTimes returns summation of across all cpus
numCpus := float64(runtime.NumCPU())
idleTime := float64(idle) / numCpus
kernelTime := float64(kernel) / numCpus
userTime := float64(user) / numCpus
// Calculate total CPU time, averaged by cpu
totalTime := idleTime + kernelTime + userTime
return totalTime, idleTime, nil
}
func getIdleProcessMemory(state ProcState) (ProcState, error) {
systemInfo := make([]byte, 1024*1024)
var returnLength uint32
_, _, err := ntQuerySystemInformation.Call(xsyswindows.SystemProcessInformation, uintptr(unsafe.Pointer(&systemInfo[0])), uintptr(len(systemInfo)), uintptr(unsafe.Pointer(&returnLength)))
// NtQuerySystemInformation returns "operation permitted successfully"(i.e. errorno 0) on success.
// Hence, we can ignore syscall.Errno(0).
if err != nil && !errors.Is(err, syscall.Errno(0)) {
return state, toNonFatal(err)
}
// Process the returned data
for offset := uintptr(0); offset < uintptr(returnLength); {
processInfo := (*systemProcessInformation)(unsafe.Pointer(&systemInfo[offset]))
if processInfo.UniqueProcessID == 0 { // PID 0 is System Idle Process
state.Memory.Rss.Bytes = opt.UintWith(processInfo.WorkingSetSize)
state.Memory.Size = opt.UintWith(processInfo.PrivatePageCount)
state.NumThreads = opt.IntWith(int(processInfo.NumberOfThreads))
break
}
offset += uintptr(processInfo.NextEntryOffset)
if processInfo.NextEntryOffset == 0 {
break
}
}
return state, nil
}
func fillIdleProcess(state ProcState) (ProcState, error) {
state, err := getIdleProcessMemory(state)
if err != nil {
return state, err
}
_, idle, err := getIdleProcessTime()
if err != nil {
return state, err
}
state.CPU.Total.Ticks = opt.UintWith(uint64(idle / 1e6))
state.CPU.Total.Value = opt.FloatWith(idle)
return state, nil
}