linux-ebpf/ebpf_cgroup.c (192 lines of code) (raw):
// Copyright (c) Microsoft Corporation
// SPDX-License-Identifier: MIT
#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "socket.h"
struct
{
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, sock_addr_skip_process_entry);
__type(value, sock_addr_skip_process_entry);
__uint(max_entries, 10);
} skip_process_map SEC(".maps");
struct
{
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, destination_entry);
__type(value, destination_entry);
__uint(max_entries, 10);
} policy_map SEC(".maps");
struct
{
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, sock_addr_audit_key); // source port and protocol
__type(value, sock_addr_audit_entry); // audit entry
__uint(max_entries, 200); // some older kernel version cannot support over 200 entries.
} audit_map SEC(".maps");
struct
{
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, __u64); // socket cookie or pid-tgid
__type(value, sock_addr_local_entry); // audit local entry
__uint(max_entries, 200); // some older kernel version cannot support over 200 entries.
} local_map SEC(".maps");
/*
check the current pid in the skip_process map.
return 1 if found, otherwise return 0.
*/
static __always_inline int
check_skip_process_map_entry(__u32 pid)
{
sock_addr_skip_process_entry key = {0};
key.pid = pid;
// Find the entry in the skip_process map.
sock_addr_skip_process_entry *skip_entry = bpf_map_lookup_elem(&skip_process_map, &key);
return (skip_entry != NULL) ? 1 : 0;
}
/*
update audit map entry if not skip redirecting.
return 0 if the entry is updated, otherwise
return 1 if pid found in the skip_process_map.
*/
static __always_inline int
update_local_map_entry(struct bpf_sock_addr *ctx)
{
__u64 pid_tip = bpf_get_current_pid_tgid();
__u32 pid = (__u32)(pid_tip >> 32);
if (check_skip_process_map_entry(pid) == 1)
{
return 1;
}
sock_addr_local_entry entry = {0};
entry.process_id = pid;
__u32 uid = (__u32)(bpf_get_current_uid_gid() >> 32);
entry.logon_id = uid;
entry.is_root = (uid == 0) ? 1 : 0; // root uid is 0.
entry.destination_ipv4 = ctx->user_ip4; // we only support ipv4 so far.
entry.destination_port = ctx->user_port;
entry.protocol = ctx->protocol;
__u64 ret = bpf_map_update_elem(&local_map, &pid_tip, &entry, 0);
if (ret != 0)
{
bpf_printk("update_local_map_entry: Failed to update local map entry with results:%u.", ret);
}
else
{
bpf_printk("update_local_map_entry: Updated local map entry with key:%u.", pid_tip);
}
return 0;
}
static __always_inline int
authorize_v4(struct bpf_sock_addr *ctx)
{
destination_entry entry = {0};
entry.destination_ip.ipv4 = ctx->user_ip4;
entry.destination_port = ctx->user_port;
entry.protocol = ctx->protocol;
// Find the entry in the policy map.
destination_entry *policy = bpf_map_lookup_elem(&policy_map, &entry);
if (policy != NULL)
{
bpf_printk("authorize_v4: Found v4 proxy entry value: %u, %u", policy->destination_ip.ipv4, policy->destination_port);
// update to the audit map before changing the destination ip and port.
if (update_local_map_entry(ctx) == 1)
{
bpf_printk("authorize_v4: Found skip process entry, skip the redirection.");
return BPF_SOCK_ADDR_VERDICT_PROCEED;
}
// TODO: check if the local ip is set.
// __u32 local_ip;
// __u64 read = bpf_probe_read_kernel(&local_ip, sizeof(__u32), &ctx->msg_src_ip4);
// if (read == 0 && local_ip != 0)
// {
// // read the local ip from the msg_src_ip4 successfully and ip is set.
// ctx->user_ip4 = local_ip;
// bpf_printk("authorize_v4: Local/source ip is set, redirect to source ip:%u.", local_ip);
// }
// else
{
ctx->user_ip4 = policy->destination_ip.ipv4;
bpf_printk("authorize_v4: Local/source ip is not set, redirect to loopback ip.");
}
ctx->user_port = policy->destination_port;
}
return BPF_SOCK_ADDR_VERDICT_PROCEED;
}
SEC("cgroup/connect4")
int connect4(struct bpf_sock_addr *ctx)
{
__u64 cookie = bpf_get_socket_cookie(ctx);
return authorize_v4(ctx);
}
static __always_inline int
update_audit_map_entry_sk(__u32 local_port, sock_addr_local_entry *local_entry)
{
sock_addr_audit_key key = {0};
key.protocol = local_entry->protocol;
key.source_port = local_port;
sock_addr_audit_entry entry = {0};
entry.process_id = local_entry->process_id;
entry.logon_id = local_entry->logon_id;
entry.is_root = local_entry->is_root;
entry.destination_ipv4 = local_entry->destination_ipv4;
entry.destination_port = local_entry->destination_port;
__u64 ret = bpf_map_update_elem(&audit_map, &key, &entry, 0);
if (ret != 0)
{
bpf_printk("update_audit_map_entry_sk: Failed to update audit map entry with results:%u.", ret);
}
else
{
bpf_printk("update_audit_map_entry_sk: Updated audit map entry with local port:%u.", key.source_port);
}
return 0;
}
static __always_inline int
trace_v4(struct pt_regs *ctx, struct probe_sock *sk)
{
struct sock_common skc;
// bpf_probe_read_kernel helper function requires kernel version 5.5+
// hence have to use bpf_probe_read helper function instead.
long re = bpf_probe_read(&skc, sizeof(struct sock_common), &sk->__sk_common);
if (re != 0)
{
// 0 is success.
return 0;
}
if (skc.skc_family != AF_INET)
{
// Only support IPv4.
return 0;
}
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = (__u32)(pid_tgid >> 32);
if (check_skip_process_map_entry(pid) == 1)
{
bpf_printk("trace_v4: Found skip process entry %u, skip the trace.", pid);
return 0;
}
// Find the entry in the local map.
sock_addr_local_entry *local_entry = bpf_map_lookup_elem(&local_map, &pid_tgid);
if (local_entry != NULL)
{
update_audit_map_entry_sk(skc.skc_num, local_entry);
__u64 ret = bpf_map_delete_elem(&local_map, &pid_tgid);
if (ret != 0)
{
bpf_printk("trace_v4: Failed to delete local map entry with results:%u.", ret);
}
else
{
bpf_printk("trace_v4: Deleted local map entry with key:%u.", pid_tgid);
}
return 0;
}
destination_entry entry = {0};
entry.destination_ip.ipv4 = skc.skc_daddr;
entry.destination_port = skc.skc_dport;
entry.protocol = IPPROTO_TCP;
// Find the entry in the policy map.
destination_entry *policy = bpf_map_lookup_elem(&policy_map, &entry);
if (policy != NULL)
{
__u32 uid = (__u32)(bpf_get_current_uid_gid() >> 32);
sock_addr_audit_key key = {0};
key.protocol = IPPROTO_TCP;
key.source_port = skc.skc_num;
sock_addr_audit_entry entry = {0};
entry.process_id = pid;
entry.logon_id = uid;
entry.is_root = (uid == 0) ? 1 : 0; // root uid is 0.
entry.destination_ipv4 = skc.skc_daddr;
entry.destination_port = skc.skc_dport;
__u64 ret = bpf_map_update_elem(&audit_map, &key, &entry, 0);
if (ret != 0)
{
bpf_printk("trace_v4: Failed to update audit map entry with results:%u.", ret);
}
else
{
bpf_printk("trace_v4: Updated audit map entry with local port:%u.", key.source_port);
}
}
return 0;
}
SEC("kprobe/tcp_v4_connect")
int BPF_KPROBE(tcp_v4_connect, struct probe_sock *sk)
{
return trace_v4(ctx, sk);
}
char _license[] SEC("license") = "GPL";