GPL/Events/Network/Probe.bpf.c (307 lines of code) (raw):

// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause /* * Copyright (C) 2021 Elasticsearch BV * * This software is dual-licensed under the BSD 2-Clause and GPL v2 licenses. * You may choose either one of them if you use this software. */ #include "vmlinux.h" #include <bpf/bpf_core_read.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "Helpers.h" #include "Network.h" #include "State.h" #include "Varlen.h" DECL_FUNC_RET(inet_csk_accept); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 131072); __type(key, struct sock *); __type(value, u32); __uint(map_flags, BPF_F_NO_PREALLOC); } sk_to_tgid SEC(".maps"); static int inet_csk_accept__exit(struct sock *sk) { if (!sk) goto out; if (ebpf_events_is_trusted_pid()) goto out; struct ebpf_net_event *event = bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); if (!event) goto out; if (ebpf_network_event__fill(event, sk)) { bpf_ringbuf_discard(event, 0); goto out; } // Record this socket so we can emit a close u32 tgid = event->pids.tgid; (void)bpf_map_update_elem(&sk_to_tgid, &sk, &tgid, BPF_ANY); event->hdr.type = EBPF_EVENT_NETWORK_CONNECTION_ACCEPTED; bpf_ringbuf_submit(event, 0); out: return 0; } SEC("fexit/inet_csk_accept") int BPF_PROG(fexit__inet_csk_accept) { struct sock *ret = FUNC_RET_READ(___type(ret), inet_csk_accept); return inet_csk_accept__exit(ret); } SEC("kretprobe/inet_csk_accept") int BPF_KRETPROBE(kretprobe__inet_csk_accept, struct sock *ret) { return inet_csk_accept__exit(ret); } static int tcp_connect(struct sock *sk, int ret) { if (ret) goto out; if (ebpf_events_is_trusted_pid()) goto out; struct ebpf_net_event *event = bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); if (!event) goto out; if (ebpf_network_event__fill(event, sk)) { bpf_ringbuf_discard(event, 0); goto out; } // Record this socket so we can emit a close u32 tgid = event->pids.tgid; (void)bpf_map_update_elem(&sk_to_tgid, &sk, &tgid, BPF_ANY); event->hdr.type = EBPF_EVENT_NETWORK_CONNECTION_ATTEMPTED; bpf_ringbuf_submit(event, 0); out: return 0; } SEC("fexit/tcp_v4_connect") int BPF_PROG(fexit__tcp_v4_connect, struct sock *sk, struct sockaddr *uaddr, int addr_len, int ret) { return tcp_connect(sk, ret); } SEC("kprobe/tcp_v4_connect") int BPF_KPROBE(kprobe__tcp_v4_connect, struct sock *sk) { struct ebpf_events_state state = {}; state.tcp_v4_connect.sk = sk; if (ebpf_events_is_trusted_pid()) return 0; ebpf_events_state__set(EBPF_EVENTS_STATE_TCP_V4_CONNECT, &state); return 0; } SEC("kretprobe/tcp_v4_connect") int BPF_KRETPROBE(kretprobe__tcp_v4_connect, int ret) { struct ebpf_events_state *state; state = ebpf_events_state__get(EBPF_EVENTS_STATE_TCP_V4_CONNECT); if (!state) return 0; return tcp_connect(state->tcp_v4_connect.sk, ret); } SEC("fexit/tcp_v6_connect") int BPF_PROG(fexit__tcp_v6_connect, struct sock *sk, struct sockaddr *uaddr, int addr_len, int ret) { return tcp_connect(sk, ret); } SEC("kprobe/tcp_v6_connect") int BPF_KPROBE(kprobe__tcp_v6_connect, struct sock *sk) { struct ebpf_events_state state = {}; state.tcp_v6_connect.sk = sk; if (ebpf_events_is_trusted_pid()) return 0; ebpf_events_state__set(EBPF_EVENTS_STATE_TCP_V6_CONNECT, &state); return 0; } SEC("kretprobe/tcp_v6_connect") int BPF_KRETPROBE(kretprobe__tcp_v6_connect, int ret) { struct ebpf_events_state *state; state = ebpf_events_state__get(EBPF_EVENTS_STATE_TCP_V6_CONNECT); if (!state) return 0; return tcp_connect(state->tcp_v6_connect.sk, ret); } static int tcp_close__enter(struct sock *sk) { if (ebpf_events_is_trusted_pid()) goto out; struct tcp_sock *tp = (struct tcp_sock *)sk; u64 bytes_sent = BPF_CORE_READ(tp, bytes_sent); u64 bytes_received = BPF_CORE_READ(tp, bytes_received); // Only process sockets we added, but since storage is limited, fall back to // looking at bytes if we're full if (bpf_map_delete_elem(&sk_to_tgid, &sk) != 0 && bytes_sent == 0 && bytes_received == 0) goto out; struct ebpf_net_event *event = bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); if (!event) goto out; if (ebpf_network_event__fill(event, sk)) { bpf_ringbuf_discard(event, 0); goto out; } event->net.tcp.close.bytes_sent = bytes_sent; event->net.tcp.close.bytes_received = bytes_received; event->hdr.type = EBPF_EVENT_NETWORK_CONNECTION_CLOSED; bpf_ringbuf_submit(event, 0); out: return 0; } SEC("fentry/tcp_close") int BPF_PROG(fentry__tcp_close, struct sock *sk, long timeout) { return tcp_close__enter(sk); } SEC("kprobe/tcp_close") int BPF_KPROBE(kprobe__tcp_close, struct sock *sk, long timeout) { return tcp_close__enter(sk); } #ifdef notyet /* * XXX naive, only handles ROUTING and DEST, untested, ipv6 needs more work to * be enabled. */ int skb_peel_nexthdr(struct __sk_buff *skb, u8 wanted) { struct ipv6hdr ip6; int off; u16 next; off = 0; if (bpf_skb_load_bytes(skb, off, &ip6, sizeof(ip6))) return (-1); off += sizeof(ip6); next = ip6.nexthdr; for (;;) { if (next == wanted) return (off); switch (next) { case NEXTHDR_ROUTING: /* FALLTHROUGH */ case NEXTHDR_DEST: if (bpf_skb_load_bytes(skb, off, &next, sizeof(next))) return (-1); off += ((next >> 8) + 1) * 8; next = next & 0xff; continue; default: return (-1); } } return (-1); /* NOTREACHED */ } #endif int skb_in_or_egress(struct __sk_buff *skb, int ingress) { struct udphdr udp; struct bpf_sock *sk; u32 *tgid, cap_len, zero = 0; u64 *sk_addr; struct ebpf_dns_event *event; struct ebpf_varlen_field *field; if (skb->family != AF_INET && skb->family != AF_INET6) goto ignore; if ((sk = skb->sk) == NULL) goto ignore; if ((sk = bpf_sk_fullsock(sk)) == NULL) goto ignore; if (sk->protocol != IPPROTO_UDP) goto ignore; if (sk->family == AF_INET) { struct iphdr ip; if (bpf_skb_load_bytes(skb, 0, &ip, sizeof(ip))) goto ignore; if (ip.protocol != IPPROTO_UDP) goto ignore; if (bpf_skb_load_bytes(skb, ip.ihl << 2, &udp, sizeof(udp))) goto ignore; } else { goto ignore; } #ifdef notyet /* ipv6 needs further work */ else if (sk->family == AF_INET6) { int t_off; t_off = skb_peel_nexthdr(skb, NEXTHDR_UDP); if (t_off == -1) goto ignore; if (bpf_skb_load_bytes(skb, t_off, &udp, sizeof(udp))) goto ignore; } #endif if (bpf_ntohs(udp.dest) != 53 && bpf_ntohs(udp.source) != 53) goto ignore; /* * Needed for kernels prior to f79efcb0075a20633cbf9b47759f2c0d538f78d8 * bpf: Permits pointers on stack for helper calls */ sk_addr = bpf_map_lookup_elem(&scratch64, &zero); if (sk_addr == NULL) goto ignore; *sk_addr = (u64)sk; tgid = bpf_map_lookup_elem(&sk_to_tgid, sk_addr); if (tgid == NULL) goto ignore; cap_len = skb->len; /* * verifier will complain, even with a skb->len * check at the beginning. */ if (cap_len > MAX_DNS_PACKET) cap_len = MAX_DNS_PACKET; /* * Yes this code is weird, but it convinces old verifiers (5.10), don't * blame me, be sure to test 5.10 if you change it. The minimal packet * should be iphlen + udphlen + 12(dns header size). Old verifiers * (5.10) are very sensitive here and a non constant right expression * (since iphlen is not constant due to options) fails. Do what we can * and filter the remaining bad packets in userland, same applies to * ipv6. Also be careful with `if cap_len > 0`, as clang will compile it * to a JNZ, which doesn't adjust umin, causing the * bpf_skb_load_bytes() down below to think cap_len can be zero. */ if (cap_len >= (sizeof(struct iphdr) + sizeof(udp) + 12)) { event = get_event_buffer(); if (event == NULL) goto ignore; event->hdr.type = EBPF_EVENT_NETWORK_DNS_PKT; event->hdr.ts = bpf_ktime_get_ns(); event->hdr.ts_boot = bpf_ktime_get_boot_ns_helper(); event->tgid = *tgid; event->cap_len = cap_len; event->orig_len = skb->len; event->direction = ingress ? EBPF_NETWORK_DIR_INGRESS : EBPF_NETWORK_DIR_EGRESS; ebpf_vl_fields__init(&event->vl_fields); field = ebpf_vl_field__add(&event->vl_fields, EBPF_VL_FIELD_DNS_BODY); if (bpf_skb_load_bytes(skb, 0, field->data, cap_len)) goto ignore; ebpf_vl_field__set_size(&event->vl_fields, field, cap_len); ebpf_ringbuf_write(&ringbuf, event, EVENT_SIZE(event), 0); } ignore: return (1); } SEC("cgroup_skb/egress") int skb_egress(struct __sk_buff *skb) { return skb_in_or_egress(skb, 0); } SEC("cgroup_skb/ingress") int skb_ingress(struct __sk_buff *skb) { return skb_in_or_egress(skb, 1); } int sk_maybe_save_tgid(struct bpf_sock *sk) { u32 tgid, zero = 0; u64 *sk_addr; if (sk->protocol != IPPROTO_UDP) return (1); tgid = bpf_get_current_pid_tgid() >> 32; /* * Needed for kernels prior to f79efcb0075a20633cbf9b47759f2c0d538f78d8 * bpf: Permits pointers on stack for helper calls */ sk_addr = bpf_map_lookup_elem(&scratch64, &zero); if (sk_addr == NULL) return (1); *sk_addr = (u64)sk; bpf_map_update_elem(&sk_to_tgid, sk_addr, &tgid, BPF_ANY); return (1); } /* * We save tgid again in send/recv/connect as the file descriptor might have * been passed to another process. */ SEC("cgroup/sendmsg4") int sendmsg4(struct bpf_sock_addr *sa) { return sk_maybe_save_tgid(sa->sk); } SEC("cgroup/recvmsg4") int recvmsg4(struct bpf_sock_addr *sa) { return sk_maybe_save_tgid(sa->sk); } SEC("cgroup/connect4") int connect4(struct bpf_sock_addr *sa) { return sk_maybe_save_tgid(sa->sk); } SEC("cgroup/sock_create") int sock_create(struct bpf_sock *sk) { return sk_maybe_save_tgid(sk); } SEC("cgroup/sock_release") int sock_release(struct bpf_sock *sk) { u32 zero = 0; u64 *sk_addr; if (sk->protocol != IPPROTO_UDP) return (1); /* * Needed for kernels prior to f79efcb0075a20633cbf9b47759f2c0d538f78d8 * bpf: Permits pointers on stack for helper calls */ sk_addr = bpf_map_lookup_elem(&scratch64, &zero); if (sk_addr == NULL) return (1); *sk_addr = (u64)sk; bpf_map_delete_elem(&sk_to_tgid, sk_addr); return (1); }