bpf/accesslog/common/connection.h (245 lines of code) (raw):
// Licensed to Apache Software Foundation (ASF) under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Apache Software Foundation (ASF) licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "api.h"
#include "socket.h"
#include "data_args.h"
#include "socket_opts.h"
#include "queue.h"
#include "socket_data.h"
// syscall:connect
struct connect_args_t {
__u32 fd;
__u32 has_remote;
struct sockaddr* addr;
struct sock *sock;
__u64 start_nacs;
struct connect_track_remote remote;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10000);
__type(key, __u64);
__type(value, struct connect_args_t);
} conecting_args SEC(".maps");
// syscall:accept
struct accept_args_t {
struct sockaddr* addr;
struct socket* socket;
__u64 start_nacs;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10000);
__type(key, __u64);
__type(value, struct accept_args_t);
} accepting_args SEC(".maps");
// syscall:close
struct sock_close_args_t {
int fd;
__u64 start_nacs;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10000);
__type(key, __u64);
__type(value, struct sock_close_args_t);
} closing_args SEC(".maps");
// notify to the user-space the connection connected(connect, accept)
// only traced connection ipv4/v6 or unknown connection
struct socket_connect_event_t {
// current connection id
__u64 conid;
// random value in one active connection
// generate a unique ID through connection_id + random_id, which is convenient for histogram data storage.
// we need random_id becase socketfd would be multiplexed in the same pid,
// but the metrics data(histogram) is MAP data, which can only be read by user-space with interval, so it would cause data delay.
__u64 random_id;
// connect operation start nanosecond
__u64 start_time;
// connect operation finish nanosecond
__u64 end_time;
// current process id
__u32 pid;
// current socket fd
__u32 sockfd;
// create from function name
__u8 func_name;
// the type of role in current connection
__u8 role;
// socket type
__u8 socket_family;
// is connect success or not
__u8 success;
__u32 __pad0;
// upstream
__u32 remote_addr_v4;
__u32 remote_port;
__u8 remote_addr_v6[16];
// downstream
__u32 local_addr_v4;
__u32 local_port;
__u8 local_addr_v6[16];
__u64 conntrack_upstream_ipl;
__u64 conntrack_upstream_iph;
__u32 conntrack_upstream_port;
};
DATA_QUEUE(socket_connection_event_queue);
// active connection cached into the hashmap
// if connection closed, then deleted
struct active_connection_t {
// random value in one active connection
// generate a unique ID through connection_id + random_id, which is convenient for histogram data storage
__u64 random_id;
// process id
__u32 pid;
// socket fd under process
__u32 sockfd;
// the type of role in current connection
__u32 role;
// socket type
__u32 socket_family;
// for protocol analyze
__u8 protocol;
// current connection is ssl
__u8 ssl;
// skip data upload when the protocol break(such as HTTP2)
__u8 skip_data_upload;
__u8 pad0;
__u32 pad1;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10000);
__type(key, __u64);
__type(value, struct active_connection_t);
} active_connection_map SEC(".maps");
static __inline __u64 gen_tgid_fd(__u32 tgid, __u32 sockfd) {
return ((__u64)tgid << 32) | sockfd;
}
struct socket_close_event_t {
// current connection id
__u64 conid;
// random value in one active connection
// generate a unique ID through connection_id + random_id, which is convenient for histogram data storage
__u64 random_id;
// close event execute time
__u64 start_time;
__u64 end_time;
// process id
__u32 pid;
// socket fd under process
__u32 sockfd;
// close success
__u32 success;
};
DATA_QUEUE(socket_close_event_queue);
static __inline bool family_should_trace(const __u32 family) {
return family != AF_UNKNOWN && family != AF_INET && family != AF_INET6 ? false : true;
}
static __always_inline void submit_new_connection(void* ctx, bool success, __u32 func_name, __u32 tgid, __u32 fd, __u64 start_nacs,
struct sockaddr* addr, const struct socket* socket, struct connect_track_remote* conntrack, __u8 role) {
// send to the user-space the connection event
__u64 curr_nacs = bpf_ktime_get_ns();
struct socket_connect_event_t *event;
event = rover_reserve_buf(&socket_connection_event_queue, sizeof(*event));
if (event == NULL) {
return;
}
__u64 conid = gen_tgid_fd(tgid, fd);
__u64 random_id = bpf_get_prandom_u32();
event->conid = conid;
event->random_id = random_id;
event->start_time = start_nacs;
event->end_time = curr_nacs;
event->func_name = func_name;
if (func_name == SOCKET_OPTS_TYPE_CONNECT) {
role = CONNECTION_ROLE_TYPE_CLIENT;
} else if (func_name == SOCKET_OPTS_TYPE_ACCEPT) {
role = CONNECTION_ROLE_TYPE_SERVER;
}
event->role = role;
event->pid = tgid;
event->sockfd = fd;
// cleanup and fill the conntrack
event->conntrack_upstream_iph = 0;
event->conntrack_upstream_ipl = 0;
event->conntrack_upstream_port = 0;
if (conntrack != NULL) {
event->conntrack_upstream_iph = (__u64)conntrack->iph;
event->conntrack_upstream_ipl = (__u64)conntrack->ipl;
event->conntrack_upstream_port = conntrack->port;
}
event->success = success;
__u16 port;
__u8 socket_family;
event->local_port = 0;
event->remote_port = 0;
if (socket == NULL) {
struct task_struct* task_ptr = (struct task_struct*)bpf_get_current_task();
struct files_struct *files = _(task_ptr->files);
struct fdtable *fdtable = _(files->fdt);
struct file *fd_data;
struct file **fd_ptr;
bpf_probe_read_kernel(&fd_ptr, sizeof(fd_ptr), &fdtable->fd);
bpf_probe_read_kernel(&fd_data, sizeof(fd_data), &fd_ptr[fd]);
socket = _(fd_data->private_data);
}
if (socket != NULL) {
// only get from accept function(server side)
struct sock* s;
BPF_CORE_READ_INTO(&s, socket, sk);
short unsigned int skc_family;
BPF_CORE_READ_INTO(&skc_family, s, __sk_common.skc_family);
event->socket_family = skc_family;
socket_family = skc_family;
if (event->socket_family == AF_INET) {
BPF_CORE_READ_INTO(&port, s, __sk_common.skc_num);
event->local_port = port;
BPF_CORE_READ_INTO(&event->local_addr_v4, s, __sk_common.skc_rcv_saddr);
BPF_CORE_READ_INTO(&port, s, __sk_common.skc_dport);
event->remote_port = bpf_ntohs(port);
BPF_CORE_READ_INTO(&event->remote_addr_v4, s, __sk_common.skc_daddr);
} else if (event->socket_family == AF_INET6) {
BPF_CORE_READ_INTO(&port, s, __sk_common.skc_num);
event->local_port = port;
BPF_CORE_READ_INTO(&event->local_addr_v6, s, __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr8);
BPF_CORE_READ_INTO(&port, s, __sk_common.skc_dport);
event->remote_port = bpf_ntohs(port);
BPF_CORE_READ_INTO(&event->remote_addr_v6, s, __sk_common.skc_v6_daddr.in6_u.u6_addr8);
}
} else if (addr != NULL) {
event->socket_family = _(addr->sa_family);
socket_family = event->socket_family;
if (event->socket_family == AF_INET) {
struct sockaddr_in *daddr = (struct sockaddr_in *)addr;
bpf_probe_read(&event->remote_addr_v4, sizeof(event->remote_addr_v4), &daddr->sin_addr.s_addr);
bpf_probe_read(&port, sizeof(port), &daddr->sin_port);
event->remote_port = bpf_ntohs(port);
// cleanup the local address
event->local_addr_v4 = 0;
} else if (event->socket_family == AF_INET6) {
struct sockaddr_in6 *daddr = (struct sockaddr_in6 *)addr;
bpf_probe_read(&event->remote_addr_v6, sizeof(event->remote_addr_v6), &daddr->sin6_addr.s6_addr);
bpf_probe_read(&port, sizeof(port), &daddr->sin6_port);
event->remote_port = bpf_ntohs(port);
__builtin_memset(&event->local_addr_v6, 0, sizeof(event->local_addr_v6));
}
} else {
event->socket_family = AF_UNKNOWN;
socket_family = AF_UNKNOWN;
}
rover_submit_buf(ctx, &socket_connection_event_queue, event, sizeof(*event));
if (success == false) {
return;
}
// if connect success, then add the activate connection into the kernel
// active connection save
struct active_connection_t con = {};
con.random_id = random_id;
con.pid = tgid;
con.sockfd = fd;
con.role = role;
con.socket_family = socket_family;
bpf_map_update_elem(&active_connection_map, &conid, &con, 0);
}
static __inline struct active_connection_t* get_or_create_active_conn(void *ctx, __u32 tgid, __u32 fd, __u32 func_name, __u8 role) {
__u64 conid = gen_tgid_fd(tgid, fd);
struct active_connection_t *conn = bpf_map_lookup_elem(&active_connection_map, &conid);
if (conn != NULL) {
return conn;
}
submit_new_connection(ctx, true, func_name, tgid, fd, 0, NULL, NULL, NULL, role);
return bpf_map_lookup_elem(&active_connection_map, &conid);
}
static __inline void submit_connection_when_not_exists(void *ctx, __u64 id, struct connect_args_t* connect_args, __u32 func_name, __u8 role) {
__u32 tgid = (__u32)(id >> 32);
__u32 fd = connect_args->fd;
__u64 conid = gen_tgid_fd(tgid, fd);
struct active_connection_t *conn = bpf_map_lookup_elem(&active_connection_map, &conid);
if (conn != NULL) {
return;
}
submit_new_connection(ctx, true, func_name, tgid, connect_args->fd, connect_args->start_nacs, connect_args->addr, NULL, &connect_args->remote, role);
}
static __inline void notify_close_connection(void* ctx, __u64 conid, struct active_connection_t* con, __u64 start_time, __u64 end_time, int ret) {
bpf_map_delete_elem(&socket_data_last_id_map, &conid);
bpf_map_delete_elem(&socket_data_id_generate_map, &conid);
struct socket_close_event_t *close_event;
close_event = rover_reserve_buf(&socket_close_event_queue, sizeof(*close_event));
if (close_event == NULL) {
return;
}
close_event->conid = conid;
close_event->random_id = con->random_id;
close_event->start_time = start_time;
close_event->end_time = end_time;
close_event->pid = con->pid;
close_event->sockfd = con->sockfd;
close_event->success = ret > 0 ? true : false;
rover_submit_buf(ctx, &socket_close_event_queue, close_event, sizeof(*close_event));
}
static __inline void submit_close_connection(void* ctx, __u32 tgid, __u32 fd, __u64 start_nacs, int ret) {
__u64 curr_nacs = bpf_ktime_get_ns();
__u64 conid = gen_tgid_fd(tgid, fd);
struct active_connection_t* con = bpf_map_lookup_elem(&active_connection_map, &conid);
if (con == NULL) {
return;
}
notify_close_connection(ctx, conid, con, start_nacs, curr_nacs, ret);
bpf_map_delete_elem(&active_connection_map, &conid);
}