bpf/profiling/network/sock_stats.h (169 lines of code) (raw):

// Licensed to Apache Software Foundation (ASF) under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Apache Software Foundation (ASF) licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include "args.h" #define CONNECTION_ROLE_TYPE_UNKNOWN 0 #define CONNECTION_ROLE_TYPE_CLIENT 1 #define CONNECTION_ROLE_TYPE_SERVER 2 // active connection cached into the hashmap // if connection closed, then deleted struct active_connection_t { // random value in one active connection // generate a unique ID through connection_id + random_id, which is convenient for histogram data storage __u64 random_id; // process id __u32 pid; // socket fd under process __u32 sockfd; // the type of role in current connection __u32 role; // socket type __u32 socket_family; // remote address __u32 remote_addr_v4; __u8 remote_addr_v6[16]; __u32 remote_port; // local address __u32 local_addr_v4; __u8 local_addr_v6[16]; __u16 local_port; // basic stats(bytes, avg(exe_time/count)) __u64 write_bytes; __u64 write_count; __u64 write_exe_time; __u64 read_bytes; __u64 read_count; __u64 read_exe_time; // RTT when write __u64 write_rtt_count; __u64 write_rtt_time; void *last_recv_sk_buff; // for protocol analyze __u8 protocol; // connect event already send __u8 connect_event_send; // current connection is ssl __u8 ssl; __u8 fix1; __u32 fix2; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 10000); __type(key, __u64); __type(value, struct active_connection_t); } active_connection_map SEC(".maps"); static __inline __u64 gen_tgid_fd(__u32 tgid, __u32 sockfd) { return ((__u64)tgid << 32) | sockfd; } // notify to the user-space the connection connected(connect, accept) // only traced connection ipv4/v6 or unknown connection struct socket_connect_event_t { // current connection id __u64 conid; // random value in one active connection // generate a unique ID through connection_id + random_id, which is convenient for histogram data storage. // we need random_id becase socketfd would be multiplexed in the same pid, // but the metrics data(histogram) is MAP data, which can only be read by user-space with interval, so it would cause data delay. __u64 random_id; // the duration of connect or connect, if unknown role then return 0 __u64 exe_time; // is need to complete the address information __u32 need_complete_addr; // current process id __u32 pid; // current socket fd __u32 sockfd; // create from function name __u32 func_name; // the type of role in current connection __u32 role; // socket type __u32 socket_family; // upstream __u32 remote_addr_v4; __u8 remote_addr_v6[16]; __u32 remote_port; // downstream __u32 local_addr_v4; __u8 local_addr_v6[16]; __u16 local_port; }; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); } socket_connection_event_queue SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __type(key, __u32); __type(value, struct socket_connect_event_t); __uint(max_entries, 1); } socket_connect_event_per_cpu_map SEC(".maps"); static __inline struct socket_connect_event_t* create_socket_connect_event() { __u32 kZero = 0; return bpf_map_lookup_elem(&socket_connect_event_per_cpu_map, &kZero); } struct socket_close_event_t { // current connection id __u64 conid; // random value in one active connection // generate a unique ID through connection_id + random_id, which is convenient for histogram data storage __u64 random_id; // close event execute time __u64 exe_time; // process id __u32 pid; // socket fd under process __u32 sockfd; // the protocol type of the connection __u8 protocol; // the connection is ssl __u8 ssl; __u16 fix; // the type of role in current connection __u32 role; // socket type __u32 socket_family; // upstream __u32 remote_addr_v4; __u8 remote_addr_v6[16]; __u32 remote_port; // downstream __u32 local_addr_v4; __u8 local_addr_v6[16]; __u16 local_port; __u32 fix1; // basic stats(bytes, avg(exe_time/count)) __u64 write_bytes; __u64 write_count; __u64 write_exe_time; __u64 read_bytes; __u64 read_count; __u64 read_exe_time; // RTT when write __u32 write_rtt_count; __u32 write_rtt_time; }; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); } socket_close_event_queue SEC(".maps"); // histogram stats #define SOCKET_CONNECTION_STATS_HISTOGRAM_DATA_TYPE_RTT 1 // RTT #define SOCKET_CONNECTION_STATS_HISTOGRAM_DATA_TYPE_EXE_TIME 2 // Execute time struct socket_connection_histogram_key_t { // conid + random_id = unique id __u64 conid; __u64 random_id; // histogram bucket __u64 bucket; // ingress, egress __u8 data_direction; // RTT, Execute time, etc. __u8 data_type; }; struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 10000); __type(key, struct socket_connection_histogram_key_t); __type(value, __u32); } socket_connection_stats_histogram SEC(".maps"); #define HISTOGRAM_MILLI_TOTAL_BUCKETS 34 // 0ms, 0.01ms, 0.05ms, 0.1ms, 0.5ms, 1ms, 1.2ms, 1.5ms, 1.7ms, 2ms, 2.5ms, 3ms, 5ms, 7ms, 10ms, 13ms, 16ms, 20ms, 25ms, 30ms, 35ms, 40ms, 45ms, 50ms, 70ms, 100ms, 150ms, 200ms, 300ms, 500ms, 1s, 2s, 3s, 5s __u64 histogram_milli_buckets[HISTOGRAM_MILLI_TOTAL_BUCKETS] = {0, 10000, 50000, 100000, 500000, 1000000, 1200000, 1500000, 1700000, 2000000, 2500000, 3000000, 5000000, 7000000, 10000000, 13000000, 16000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 70000000, 100000000, 150000000, 200000000, 300000000, 500000000, 1000000000, 2000000000, 3000000000, 5000000000}; static __inline void add_to_socket_connection_stats_histogram(__u64 conid, __u64 random_id, __u32 direction, __u32 type, __u64 value) { // RTT from us to ns if (type == SOCKET_CONNECTION_STATS_HISTOGRAM_DATA_TYPE_RTT) { value *= 1000; } __u64 bucket = HISTOGRAM_MILLI_TOTAL_BUCKETS - 1; for(__u64 inx = 1; inx < HISTOGRAM_MILLI_TOTAL_BUCKETS - 1; inx = inx + 1) { if (histogram_milli_buckets[inx] > value) { bucket = inx - 1; break; } } struct socket_connection_histogram_key_t key = {}; key.conid = conid; key.random_id = random_id; key.data_direction = direction; key.data_type = type; key.bucket = bucket; __u32 *val; val = bpf_map_lookup_elem(&socket_connection_stats_histogram, &key); if (!val) { __u32 count = 0; bpf_map_update_elem(&socket_connection_stats_histogram, &key, &count, BPF_NOEXIST); val = bpf_map_lookup_elem(&socket_connection_stats_histogram, &key); if (!val) return; } (*val) += 1; } #define SOCKET_EXCEPTION_OPERATION_TYPE_RETRANSMIT 1 #define SOCKET_EXCEPTION_OPERATION_TYPE_DROP 2 struct socket_exception_operation_event_t { __u32 pid; // socket type __u32 socket_family; // remote __u32 remote_addr_v4; __u8 remote_addr_v6[16]; __u32 remote_port; // local __u32 local_addr_v4; __u8 local_addr_v6[16]; __u16 local_port; // operation type __u32 type; }; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); } socket_exception_operation_event_queue SEC(".maps"); // openssl read or write struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 10000); __type(key, __u64); __type(value, struct sock_data_args_t); } openssl_sock_data_args SEC(".maps");