elastic-ebpf/GPL/Events/Helpers.h (255 lines of code) (raw):

// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause /* * Copyright (C) 2021 Elasticsearch BV * * This software is dual-licensed under the BSD 2-Clause and GPL v2 licenses. * You may choose either one of them if you use this software. */ /* $OpenBSD: strncmp.c,v 1.11 2014/06/10 04:16:57 deraadt Exp $ */ /* * Copyright (c) 1989 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef EBPF_EVENTPROBE_HELPERS_H #define EBPF_EVENTPROBE_HELPERS_H #include "EbpfEventProto.h" const volatile int consumer_pid = 0; #define MEMFD_STRING "memfd:" #define TMPFS_STRING "tmpfs" #define DEVSHM_STRING "/dev/shm" #if BPF_DEBUG_TRACE == 0 #undef bpf_printk #define bpf_printk(fmt, ...) #endif // Compiler barrier, used to prevent compile-time insns reordering and optimizations. #define barrier() asm volatile("" ::: "memory") #define DECL_FUNC_ARG(func, arg) const volatile int arg__##func##__##arg##__ = 0; #define FUNC_ARG_READ(type, func, arg) \ ({ \ type _ret; \ bpf_core_read(&_ret, sizeof(_ret), ctx + arg__##func##__##arg##__); \ _ret; \ }) /* * Reads the specified argument from struct pt_regs without dereferencing it. Note that * we first have to read the value in struct pt_regs into a volatile temporary (_dst). * Without this, LLVM can generate code like the following, which will fail to verify: * * r3 = 8 # The register value we want to read is at offset 8 in the context * r2 = r1 # r1 = ctx pointer * r2 += r3 # Increment ctx ptr to register value we're interested in * r3 = *(u64 *)(r2 +0) # Dereference it (fail) * dereference of modified ctx ptr R2 off=8 disallowed * * The verifier disallows dereferencing the context pointer when it's been * modified. This will often happen as an inlining optimization if dst is * immediately passed into a function. We instead want code like the following * to be generated: * * r2 = r1 # r1 = ctx pointer * r3 = *(u64 *)(r2 + 8) # Dereference it, putting the increment in the dereference insn * ...pass r3 to a function */ #define FUNC_ARG_READ_PTREGS(dst, func, arg) \ ({ \ int ret = 0; \ volatile typeof(dst) _dst; \ switch (arg__##func##__##arg##__) { \ case 0: \ _dst = (typeof(dst))PT_REGS_PARM1(ctx); \ break; \ case 1: \ _dst = (typeof(dst))PT_REGS_PARM2(ctx); \ break; \ case 2: \ _dst = (typeof(dst))PT_REGS_PARM3(ctx); \ break; \ case 3: \ _dst = (typeof(dst))PT_REGS_PARM4(ctx); \ break; \ case 4: \ _dst = (typeof(dst))PT_REGS_PARM5(ctx); \ break; \ default: \ ret = -1; \ }; \ dst = _dst; \ barrier(); \ ret; \ }) // value is replaced later by `probe_fill_relos()` #define DECL_FUNC_RET(func) const volatile int ret__##func##__ = 0; #define FUNC_RET_READ(type, func) \ ({ \ type _ret; \ bpf_core_read(&_ret, sizeof(_ret), ctx + ret__##func##__); \ _ret; \ }) #define DECL_FUNC_ARG_EXISTS(func, arg) const volatile bool exists__##func##__##arg##__ = false; #define FUNC_ARG_EXISTS(func, arg) exists__##func##__##arg##__ #define DECL_FIELD_OFFSET(struct, field) const volatile int off__##struct##__##field##__ = 0; #define FIELD_OFFSET(struct, field) off__##struct##__##field##__ // From linux/err.h #define MAX_ERRNO 4095 // From include/linux/tty_driver.h #define TTY_DRIVER_TYPE_PTY 0x0004 #define PTY_TYPE_MASTER 0x0001 // From include/uapi/asm-generic/termbits.h #define ECHO 0x00008 static bool IS_ERR_OR_NULL(const void *ptr) { return (!ptr) || (unsigned long)ptr >= (unsigned long)-MAX_ERRNO; } // Wrapper around bpf_probe_read_kernel_str that reads an empty string upon a read failure static long read_kernel_str_or_empty_str(void *dst, int size, const void *unsafe_ptr) { long ret = bpf_probe_read_kernel_str(dst, size, unsafe_ptr); if (ret < 0) { ((char *)dst)[0] = '\0'; return 1; } return ret; } static long ebpf_argv__fill(char *buf, size_t buf_size, const struct task_struct *task) { unsigned long start, end, size; start = BPF_CORE_READ(task, mm, arg_start); end = BPF_CORE_READ(task, mm, arg_end); if (end <= start) { buf[0] = '\0'; return 1; } size = end - start; size = size > buf_size ? buf_size : size; bpf_probe_read_user(buf, size, (void *)start); // Prevent final arg from being unterminated if buf is too small for args buf[size - 1] = '\0'; return size; } static long ebpf_env__fill(char *buf, size_t buf_size, const struct task_struct *task) { unsigned long start, end, size; start = BPF_CORE_READ(task, mm, env_start); end = BPF_CORE_READ(task, mm, env_end); if (end <= start) { buf[0] = '\0'; return 1; } size = end - start; size = size > buf_size ? buf_size : size; bpf_probe_read_user(buf, size, (void *)start); // Prevent final env from being unterminated if buf is too small for envs buf[size - 1] = '\0'; return size; } static void ebpf_tty_dev__fill(struct ebpf_tty_dev *tty_dev, const struct tty_struct *tty) { tty_dev->major = BPF_CORE_READ(tty, driver, major); tty_dev->minor = BPF_CORE_READ(tty, driver, minor_start); tty_dev->minor += BPF_CORE_READ(tty, index); struct winsize winsize = BPF_CORE_READ(tty, winsize); struct ebpf_tty_winsize ws = {}; ws.rows = winsize.ws_row; ws.cols = winsize.ws_col; tty_dev->winsize = ws; struct ktermios termios = BPF_CORE_READ(tty, termios); struct ebpf_tty_termios t = {}; t.c_iflag = termios.c_iflag; t.c_oflag = termios.c_oflag; t.c_lflag = termios.c_lflag; t.c_cflag = termios.c_cflag; tty_dev->termios = t; } static void ebpf_ctty__fill(struct ebpf_tty_dev *ctty, const struct task_struct *task) { struct tty_struct *tty = BPF_CORE_READ(task, signal, tty); ebpf_tty_dev__fill(ctty, tty); } static void ebpf_pid_info__fill(struct ebpf_pid_info *pi, const struct task_struct *task) { int e_pgid, e_sid; if (bpf_core_enum_value_exists(enum pid_type, PIDTYPE_PGID)) e_pgid = bpf_core_enum_value(enum pid_type, PIDTYPE_PGID); else e_pgid = PIDTYPE_PGID; if (bpf_core_enum_value_exists(enum pid_type, PIDTYPE_SID)) e_sid = bpf_core_enum_value(enum pid_type, PIDTYPE_SID); else e_sid = PIDTYPE_SID; pi->tid = BPF_CORE_READ(task, pid); pi->tgid = BPF_CORE_READ(task, tgid); pi->ppid = BPF_CORE_READ(task, group_leader, real_parent, tgid); pi->pgid = BPF_CORE_READ(task, group_leader, signal, pids[e_pgid], numbers[0].nr); pi->sid = BPF_CORE_READ(task, group_leader, signal, pids[e_sid], numbers[0].nr); pi->start_time_ns = BPF_CORE_READ(task, group_leader, start_time); } static void ebpf_cred_info__fill(struct ebpf_cred_info *ci, const struct task_struct *task) { ci->ruid = BPF_CORE_READ(task, cred, uid.val); ci->euid = BPF_CORE_READ(task, cred, euid.val); ci->suid = BPF_CORE_READ(task, cred, suid.val); ci->rgid = BPF_CORE_READ(task, cred, gid.val); ci->egid = BPF_CORE_READ(task, cred, egid.val); ci->sgid = BPF_CORE_READ(task, cred, sgid.val); // This check is to determine when the kernel_cap_t definition changed. // // Previously it was: // typedef struct kernel_cap_struct { // __u32 cap[_KERNEL_CAPABILITY_U32S]; // } kernel_cap_t; // // Currently it is: // typedef struct { u64 val; } kernel_cap_t; // // See https://github.com/torvalds/linux/commit/f122a08b197d076ccf136c73fae0146875812a88 // if (bpf_core_field_exists(task->cred->cap_permitted.cap)) { kernel_cap_t dest; dest.cap[0] = 0; dest.cap[1] = 0; dest = BPF_CORE_READ(task, cred, cap_permitted); ci->cap_permitted = (((u64)dest.cap[1]) << 32) + dest.cap[0]; dest.cap[0] = 0; dest.cap[1] = 0; dest = BPF_CORE_READ(task, cred, cap_effective); ci->cap_effective = (((u64)dest.cap[1]) << 32) + dest.cap[0]; } else { const struct cred *cred = BPF_CORE_READ(task, cred); const void *cap = NULL; struct new_kernel_cap_struct { u64 val; } dest; dest.val = 0; cap = &cred->cap_permitted; bpf_core_read(&dest, sizeof(struct new_kernel_cap_struct), cap); ci->cap_permitted = dest.val; dest.val = 0; cap = &cred->cap_effective; bpf_core_read(&dest, sizeof(struct new_kernel_cap_struct), cap); ci->cap_effective = dest.val; } } static void ebpf_comm__fill(char *comm, size_t len, const struct task_struct *task) { read_kernel_str_or_empty_str(comm, len, BPF_CORE_READ(task, comm)); } static void ebpf_ns__fill(struct ebpf_namespace_info *nsi, const struct task_struct *task) { struct pid *pid; int pid_level; nsi->uts_inonum = BPF_CORE_READ(task, nsproxy, uts_ns, ns.inum); nsi->ipc_inonum = BPF_CORE_READ(task, nsproxy, ipc_ns, ns.inum); nsi->mnt_inonum = BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); nsi->net_inonum = BPF_CORE_READ(task, nsproxy, net_ns, ns.inum); nsi->cgroup_inonum = BPF_CORE_READ(task, nsproxy, cgroup_ns, ns.inum); nsi->time_inonum = BPF_CORE_READ(task, nsproxy, time_ns, ns.inum); pid = BPF_CORE_READ(task, thread_pid); if (pid == NULL) { nsi->pid_inonum = 0; return; } pid_level = BPF_CORE_READ(pid, level); nsi->pid_inonum = BPF_CORE_READ(pid, numbers[pid_level].ns, ns.inum); } struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __type(key, u32); __type(value, struct ebpf_event_stats); __uint(max_entries, 1); } ringbuf_stats SEC(".maps"); static long ebpf_ringbuf_write(void *ringbuf, void *data, u64 size, u64 flags) { long r; struct ebpf_event_stats *ees; u32 zero = 0; r = bpf_ringbuf_output(ringbuf, data, size, flags); ees = bpf_map_lookup_elem(&ringbuf_stats, &zero); if (ees != NULL) r == 0 ? ees->sent++ : ees->lost++; return (r); } static bool is_kernel_thread(const struct task_struct *task) { // All kernel threads are children of kthreadd, which always has pid 2 // except on some ancient kernels (2.4x) // https://unix.stackexchange.com/a/411175 return BPF_CORE_READ(task, group_leader, real_parent, tgid) == 2; } static bool is_thread_group_leader(const struct task_struct *task) { return BPF_CORE_READ(task, pid) == BPF_CORE_READ(task, tgid); } static bool is_consumer() { int pid = bpf_get_current_pid_tgid() >> 32; return consumer_pid == pid; } static int strncmp(const char *s1, const char *s2, size_t n) { if (n == 0) return (0); do { if (*s1 != *s2++) return (*(unsigned char *)s1 - *(unsigned char *)--s2); if (*s1++ == 0) break; } while (--n != 0); return (0); } // compares first 'len' characters of str1 and str2, returns 1 if equal // NOTE: no bounds check, assumes use under eBPF verifier static int is_equal_prefix(const char *str1, const char *str2, int len) { return !strncmp(str1, str2, len); } static u64 bpf_ktime_get_boot_ns_helper() { if (bpf_core_enum_value_exists(enum bpf_func_id, BPF_FUNC_ktime_get_boot_ns)) return bpf_ktime_get_boot_ns(); else return 0; } #endif // EBPF_EVENTPROBE_HELPERS_H