elastic-ebpf/GPL/Events/PathResolver.h (150 lines of code) (raw):
// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
/*
* Copyright (C) 2021 Elasticsearch BV
*
* This software is dual-licensed under the BSD 2-Clause and GPL v2 licenses.
* You may choose either one of them if you use this software.
*/
/*
* BPF dentry resolver
*
* This file contains code needed to construct a path as a string from Linux's
* struct path object.
*
* Constructing paths in a BPF probe is complicated due to the fact that paths
* are stored as the leaf of a struct dentry chain, with each dentry
* corresponding to one path component. This means, to construct the full path
* in string form, we have to walk the chain. Doing so completely for any
* arbitrarily long path is impossible as that would require an unbounded loop,
* so we make a best effort with a bounded loop, truncating particularly long
* paths.
*/
#ifndef EBPF_EVENTPROBE_PATHRESOLVER_H
#define EBPF_EVENTPROBE_PATHRESOLVER_H
#include "vmlinux.h"
#include "Helpers.h"
#define PATH_MAX 4096
#define PATH_MAX_INDEX_MASK 4095
// Maximum number of path components we'll grab before we give up and just
// prepend "./" to the path. Even though bumping this won't increase the number
// of instructions in the resolver (loops are not unrolled), it will increase
// the number of states that need to be explored by the verifier, which, if
// high enough, will bump up against BPF_COMPLEXITY_LIMIT_INSNS in the kernel
// (set to 1,000,000 as of 5.3).
#define PATH_RESOLVER_MAX_COMPONENTS 100
#define KERNFS_NODE_COMPONENT_MAX_LEN 250
// Map used as a scratch area by the path resolver to store intermediate state
// (dentry pointers).
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, struct dentry *[PATH_RESOLVER_MAX_COMPONENTS]);
__uint(max_entries, 1);
} path_resolver_dentry_scratch_map SEC(".maps");
// Resolve a struct path to a string. Returns the size of the constructed path
// string, including the null terminator.
static size_t
ebpf_resolve_path_to_string(char *buf, struct path *path, const struct task_struct *task)
{
long size = 0;
bool truncated = true;
struct fs_struct *fs_struct = BPF_CORE_READ(task, fs);
struct path root = BPF_CORE_READ(fs_struct, root);
struct vfsmount *curr_vfsmount = BPF_CORE_READ(path, mnt);
// All struct vfsmount's are stored in a struct mount. We need fields in
// the struct mount to continue the dentry walk when we hit the root of a
// mounted filesystem.
struct mount *mnt = container_of(curr_vfsmount, struct mount, mnt);
struct dentry *curr_dentry = BPF_CORE_READ(path, dentry);
struct dentry **dentry_arr;
// Ensure we make buf an empty string early up here so if we exit with any
// sort of error, we won't leave garbage in it if it's uninitialized
buf[0] = '\0';
u32 zero = 0;
if (!(dentry_arr = bpf_map_lookup_elem(&path_resolver_dentry_scratch_map, &zero))) {
bpf_printk("Could not get path resolver scratch area");
goto out_err;
}
// Loop 1, follow the dentry chain (up to a maximum of
// PATH_RESOLVER_MAX_COMPONENTS) and store pointers to each dentry in
// dentry_arr
for (int i = 0; i < PATH_RESOLVER_MAX_COMPONENTS; i++) {
if (curr_dentry == root.dentry && curr_vfsmount == root.mnt) {
// We've reached the global root if both the current dentry and the
// current vfsmount match those of the root struct path. Fill in
// the rest of dentry_arr with NULLs so the next loop ignores the
// remaining entries.
truncated = false;
dentry_arr[i] = NULL;
continue;
}
struct dentry *parent = BPF_CORE_READ(curr_dentry, d_parent);
if (curr_dentry == parent || curr_dentry == BPF_CORE_READ(curr_vfsmount, mnt_root)) {
// We've hit the root of a mounted filesystem. The dentry walk must
// be continued from mnt_mountpoint in the current struct mount.
// Also update curr_vfsmount to point to the parent filesystem root.
curr_dentry = (struct dentry *)BPF_CORE_READ(mnt, mnt_mountpoint);
mnt = BPF_CORE_READ(mnt, mnt_parent);
curr_vfsmount = (struct vfsmount *)&mnt->mnt;
// We might be at another fs root here (in which case
// curr_dentry->d_name will have "/", we need to go up another
// level to get an actual component name), so fill the dentry
// pointer array at this spot with NULL so it's ignored in the next
// loop and continue to check the above condition again.
dentry_arr[i] = NULL;
continue;
}
dentry_arr[i] = curr_dentry;
curr_dentry = parent;
}
if (truncated) {
// Use a relative path eg. ./some/dir as a best effort if we have
// more components than PATH_RESOLVER_MAX_COMPONENTS.
buf[0] = '.';
size = 1;
}
// Loop 2, walk the array of dentry pointers (in reverse order) and
// copy the d_name component of each one into buf, separating with '/'
for (int i = PATH_RESOLVER_MAX_COMPONENTS - 1; i >= 0; i--) {
struct dentry *dentry = dentry_arr[i];
if (dentry == NULL)
continue;
struct qstr component = BPF_CORE_READ(dentry, d_name);
if (size + component.len + 1 > PATH_MAX) {
bpf_printk("path under construction is too long: %s", buf);
goto out_err;
}
// Note that even though the value of size is guaranteed to be
// less than PATH_MAX_INDEX_MASK here, we have to apply the bound again
// before using it an index into an array as if it's spilled to the
// stack by the compiler, the verifier bounds information will not be
// retained after each bitwise and (this only carries over when stored
// in a register).
buf[size & PATH_MAX_INDEX_MASK] = '/';
size = (size + 1) & PATH_MAX_INDEX_MASK;
int ret = bpf_probe_read_kernel_str(buf + (size & PATH_MAX_INDEX_MASK),
PATH_MAX > size ? PATH_MAX - size : 0,
(void *)component.name);
if (ret > 0) {
size += ((ret - 1) & PATH_MAX_INDEX_MASK);
} else {
bpf_printk("could not read d_name at %p, current path %s", component.name, buf);
goto out_err;
}
}
// Special case: root directory. If the path is "/", the above loop will
// not have run and thus path_string will be an empty string. We handle
// that case here.
if (buf[0] == '\0') {
buf[0] = '/';
buf[1] = '\0';
size = 1;
}
return size + 1; // size does not include the \0
out_err:
buf[0] = '\0';
return 1;
}
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, struct kernfs_node *[PATH_RESOLVER_MAX_COMPONENTS]);
__uint(max_entries, 1);
} path_resolver_kernfs_node_scratch_map SEC(".maps");
// Resolve a struct kernfs_node to a string. Returns the size of the
// constructed path string, including the null terminator.
static size_t ebpf_resolve_kernfs_node_to_string(char *buf, struct kernfs_node *kn)
{
size_t cur = 0;
int depth = 0, zero = 0, read_len, name_len;
char name[KERNFS_NODE_COMPONENT_MAX_LEN];
buf[0] = '\0';
struct kernfs_node **kna = bpf_map_lookup_elem(&path_resolver_kernfs_node_scratch_map, &zero);
if (!kna) {
bpf_printk("could not get scratch area");
goto out_err;
}
while (depth < PATH_RESOLVER_MAX_COMPONENTS - 1) {
if (!kn)
break;
kna[depth] = kn;
kn = BPF_CORE_READ(kn, parent);
depth++;
}
while (depth > 0) {
depth--;
struct kernfs_node *curr = kna[depth];
read_len = bpf_probe_read_kernel_str(&name, KERNFS_NODE_COMPONENT_MAX_LEN,
(void *)BPF_CORE_READ(curr, name));
if (read_len < 0) {
bpf_printk("could not get read kernfs_node name: %d", read_len);
goto out_err;
}
name_len = read_len - 1;
if (name_len == 0)
continue;
if (cur + name_len + 1 > PATH_MAX) {
bpf_printk("path too long");
goto out_err;
}
buf[cur & PATH_MAX_INDEX_MASK] = '/';
cur = (cur + 1) & PATH_MAX_INDEX_MASK;
if (bpf_probe_read_kernel_str(
buf + (cur & PATH_MAX_INDEX_MASK),
PATH_MAX - cur > KERNFS_NODE_COMPONENT_MAX_LEN ? KERNFS_NODE_COMPONENT_MAX_LEN : 0,
(void *)name) < 0)
goto out_err;
cur += name_len & PATH_MAX_INDEX_MASK;
}
return cur + 1; // cur does not include the \0
out_err:
buf[0] = '\0';
return 1;
}
static size_t ebpf_resolve_pids_ss_cgroup_path_to_string(char *buf, const struct task_struct *task)
{
/*
* Since pids_cgrp_id is an enum value, we need to get it at runtime as it
* can change kernel-to-kernel depending on the kconfig or possibly not be
* enabled at all.
*/
int cgrp_id;
if (bpf_core_enum_value_exists(enum cgroup_subsys_id, pids_cgrp_id)) {
cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id, pids_cgrp_id);
} else {
/* Pids cgroup is not enabled on this kernel */
buf[0] = '\0';
return 1;
}
struct kernfs_node *kn = BPF_CORE_READ(task, cgroups, subsys[cgrp_id], cgroup, kn);
return ebpf_resolve_kernfs_node_to_string(buf, kn);
}
#endif // EBPF_EVENTPROBE_PATHRESOLVER_H