non-GPL/HostIsolation/Lib/TcLoader.c (411 lines of code) (raw):

// SPDX-License-Identifier: Elastic-2.0 /* * Copyright 2021 Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under * one or more contributor license agreements. Licensed under the Elastic * License 2.0; you may not use this file except in compliance with the Elastic * License 2.0. */ // // Loader for tc eBPF programs // #include "TcLoader.h" #include "Common.h" #include <argp.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/socket.h> #include <sys/stat.h> #include <time.h> #include <unistd.h> /* linux definitions */ #define SOL_NETLINK 270 #define NETLINK_EXT_ACK 11 #define ETH_P_ALL 0x0003 /* Every packet */ #define TC_H_MAJ_MASK (0xFFFF0000U) #define TC_H_MIN_MASK (0x0000FFFFU) #define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) #define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) #define TC_H_MAKE(maj, min) (((maj)&TC_H_MAJ_MASK) | ((min)&TC_H_MIN_MASK)) #define TC_H_INGRESS (0xFFFFFFF1U) #define TC_H_CLSACT TC_H_INGRESS #define TC_H_MIN_EGRESS 0xFFF3U #define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) enum { TCA_BPF_UNSPEC, TCA_BPF_ACT, TCA_BPF_POLICE, TCA_BPF_CLASSID, TCA_BPF_OPS_LEN, TCA_BPF_OPS, TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, TCA_BPF_TAG, TCA_BPF_ID, __TCA_BPF_MAX, }; #define NLMSG_TAIL(nmsg) ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) static int attr_put(struct nlmsghdr *n, size_t max, int type, const void *buf, size_t attr_len) { size_t len = RTA_LENGTH(attr_len); struct rtattr *rta = NULL; int rv = -1; if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > max) { ebpf_log("attr_put error: message longer than %d\n", max); rv = -1; goto out; } rta = NLMSG_TAIL(n); rta->rta_len = len; rta->rta_type = type; if (attr_len) { memcpy(RTA_DATA(rta), buf, attr_len); } n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); rv = 0; out: return rv; } static int attr_put_32(struct nlmsghdr *n, size_t max, int type, __u32 data) { return attr_put(n, max, type, &data, sizeof(__u32)); } static int attr_put_str(struct nlmsghdr *n, size_t max, int type, const char *s) { return attr_put(n, max, type, s, strlen(s) + 1); } static void rtnetlink_close(struct rtnetlink_handle *r) { if (r->fd >= 0) { close(r->fd); r->fd = -1; } } static void rtnetlink_send_error(struct nlmsgerr *err) { ebpf_log("rtnetlink replied: %s\n", strerror(-err->error)); } static int rtnetlink_open(struct rtnetlink_handle *rth) { socklen_t address_len = 0; int sendbuf = 32 * 1024; int receivebuf = 1024 * 1024; int one = 1; int rv = -1; if (rth == NULL) { ebpf_log("error: rth is NULL\n"); rv = -1; goto out; } memset(rth, 0, sizeof(*rth)); rth->proto = NETLINK_ROUTE; rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (rth->fd < 0) { ebpf_log("cannot open netlink socket\n"); rv = -1; goto out; } if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF, &sendbuf, sizeof(sendbuf)) < 0) { ebpf_log("error setsockopt sendbuf\n"); rv = -1; goto out; } if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF, &receivebuf, sizeof(receivebuf)) < 0) { ebpf_log("error setsockopt receivebuf\n"); rv = -1; goto out; } if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one))) { ebpf_log("error setsockopt netlink\n"); rv = -1; goto out; } memset(&rth->local, 0, sizeof(rth->local)); rth->local.nl_family = AF_NETLINK; rth->local.nl_groups = 0; if (bind(rth->fd, (struct sockaddr *)&rth->local, sizeof(rth->local)) < 0) { ebpf_log("failed to bind netlink socket\n"); rv = -1; goto out; } address_len = sizeof(rth->local); if (getsockname(rth->fd, (struct sockaddr *)&rth->local, &address_len) < 0) { ebpf_log("error getsockname\n"); rv = -1; goto out; } if (address_len != sizeof(rth->local)) { ebpf_log("bad address length %d\n", address_len); rv = -1; goto out; } if (rth->local.nl_family != AF_NETLINK) { ebpf_log("bad address family %d\n", rth->local.nl_family); rv = -1; goto out; } rth->seq = time(NULL); rv = 0; out: return rv; } static int rtnetlink_recv(int fd, struct msghdr *msg, char **answer) { struct iovec *iov = NULL; char *buf = NULL; ssize_t len = 0; int rv = 0; if (!msg) { ebpf_log("rtnetlink_recv error: NULL parameter\n"); rv = -1; goto out; } iov = msg->msg_iov; iov->iov_base = NULL; iov->iov_len = 0; do { len = recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC); } while (len < 0 && (errno == EINTR || errno == EAGAIN)); if (len <= 0) { ebpf_log("netlink recv error \n"); rv = len; goto out; } if (len < 32768) { len = 32768; } buf = malloc(len); if (!buf) { ebpf_log("malloc error \n"); rv = -ENOMEM; goto out; } iov->iov_base = buf; iov->iov_len = len; do { len = recvmsg(fd, msg, 0); } while (len < 0 && (errno == EINTR || errno == EAGAIN)); if (len <= 0) { free(buf); ebpf_log("netlink recv error \n"); rv = len; goto out; } if (answer) { *answer = buf; } else { free(buf); } rv = len; out: return rv; } static int rtnetlink_send(struct rtnetlink_handle *rtnl, struct nlmsghdr *nlmsg) { struct iovec iov = {.iov_base = nlmsg, .iov_len = nlmsg->nlmsg_len}; struct iovec riov = {0}; struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, }; unsigned int seq = 0; struct nlmsghdr *h = NULL; ssize_t recv_len = 0; char *buf = NULL; int rv = -1; if (!rtnl || !nlmsg) { ebpf_log("rtnetlink_send error: NULL parameter\n"); rv = -1; goto out; } h = iov.iov_base; h->nlmsg_seq = seq = ++rtnl->seq; /* request acknowledgement (NLMSG_ERROR packet) */ h->nlmsg_flags |= NLM_F_ACK; if (sendmsg(rtnl->fd, &msg, 0) < 0) { ebpf_log("failure talking to rtnetlink\n"); rv = -1; goto out; } /* switch to response iov */ memset(&riov, 0, sizeof(riov)); msg.msg_iov = &riov; msg.msg_iovlen = 1; recv_len = rtnetlink_recv(rtnl->fd, &msg, &buf); if (recv_len <= 0) { rv = -1; goto out; } if (msg.msg_namelen != sizeof(nladdr)) { ebpf_log("sender addr length == %d\n", msg.msg_namelen); rv = -1; goto out; } for (h = (struct nlmsghdr *)buf; recv_len >= (ssize_t)sizeof(*h);) { ssize_t len = h->nlmsg_len; ssize_t l = len - sizeof(*h); if (l < 0 || len > recv_len) { if (msg.msg_flags & MSG_TRUNC) { ebpf_log("truncated message\n"); rv = -1; goto out; } ebpf_log("bad message length: len=%d\n", len); rv = -1; goto out; } if (0 != nladdr.nl_pid || h->nlmsg_pid != rtnl->local.nl_pid || h->nlmsg_seq > seq || h->nlmsg_seq < seq - 1) { /* skip this message. */ recv_len -= NLMSG_ALIGN(len); h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); continue; } /* Parse acknowledgment packet */ if (h->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); int error = err->error; if (l < (ssize_t)sizeof(struct nlmsgerr)) { ebpf_log("error truncated\n"); rv = -1; goto out; } if (error) { rtnetlink_send_error(err); } rv = error ? -1 : 0; goto out; } ebpf_log("bad netlink reply\n"); recv_len -= NLMSG_ALIGN(len); h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); } if (msg.msg_flags & MSG_TRUNC) { ebpf_log("message truncated\n"); rv = -1; goto out; } if (recv_len) { ebpf_log("uneven reply, remained: %d\n", recv_len); rv = -1; goto out; } out: if (buf) { free(buf); } return rv; } static int netlink_qdisc(int cmd, unsigned int flags, const char *ifname) { int rv = -1; struct rtnetlink_handle qdisc_rth = {.fd = -1}; struct netlink_msg qdisc_req = { .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), .n.nlmsg_flags = NLM_F_REQUEST | flags, .n.nlmsg_type = cmd, .t.tcm_family = AF_UNSPEC, }; if (!ifname) { ebpf_log("netlink_qdisc error: NULL parameter\n"); rv = -1; goto out; } if (rtnetlink_open(&qdisc_rth) < 0) { ebpf_log("failed to open netlink\n"); rv = -1; goto out; } qdisc_req.t.tcm_parent = TC_H_CLSACT; qdisc_req.t.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); attr_put(&qdisc_req.n, sizeof(qdisc_req), TCA_KIND, "clsact", strlen("clsact") + 1); qdisc_req.t.tcm_ifindex = if_nametoindex(ifname); if (0 == qdisc_req.t.tcm_ifindex) { ebpf_log("failed to find device %s\n", ifname); rv = -1; goto out; } /* talk to netlink */ if (rtnetlink_send(&qdisc_rth, &qdisc_req.n) < 0) { ebpf_log("error talking to the kernel (rtnetlink_send)\n"); rv = -1; goto out; } rv = 0; out: rtnetlink_close(&qdisc_rth); return rv; } int netlink_qdisc_add(const char *ifname) { return netlink_qdisc(RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE, ifname); } int netlink_qdisc_del(const char *ifname) { return netlink_qdisc(RTM_DELQDISC, 0, ifname); } int netlink_filter_add_begin(struct netlink_ctx *ctx, const char *ifname) { int rv = -1; __u32 protocol = 0; struct nlmsghdr *n = NULL; if (!ctx) { ebpf_log("netlink_filter_add_begin error: NULL parameter\n"); rv = -1; goto out; } /* Initialize context for filter add */ memset(ctx, 0, sizeof(*ctx)); ctx->filter_rth.fd = -1; ctx->msg.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); ctx->msg.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; ctx->msg.n.nlmsg_type = RTM_NEWTFILTER; ctx->msg.t.tcm_family = AF_UNSPEC; if (rtnetlink_open(&ctx->filter_rth) < 0) { ebpf_log("failed to open netlink\n"); rtnetlink_close(&ctx->filter_rth); rv = -1; goto out; } protocol = htons(ETH_P_ALL); ctx->msg.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); ctx->msg.t.tcm_info = TC_H_MAKE(0 << 16, protocol); attr_put(&ctx->msg.n, sizeof(ctx->msg), TCA_KIND, "bpf", strlen("bpf") + 1); ctx->msg.t.tcm_ifindex = if_nametoindex(ifname); if (0 == ctx->msg.t.tcm_ifindex) { ebpf_log("failed to find device %s\n", ifname); rtnetlink_close(&ctx->filter_rth); rv = -1; goto out; } n = &ctx->msg.n; ctx->tail = NLMSG_TAIL(n); attr_put(n, MAX_MSG, TCA_OPTIONS, NULL, 0); rv = 0; out: return rv; } int netlink_filter_add_end(int fd, struct netlink_ctx *ctx, const char *ebpf_obj_filename) { struct nlmsghdr *nl = NULL; char buf[128]; int rv = -1; int len = 0; if (!ctx || !ebpf_obj_filename) { ebpf_log("netlink_filter_add_end error: NULL parameter\n"); rv = -1; goto out; } nl = &ctx->msg.n; memset(buf, 0, sizeof(buf)); len = snprintf(buf, sizeof(buf), "%s:[.text]", ebpf_obj_filename); if (len < 0 || len >= (int)sizeof(buf)) { ebpf_log("netlink_filter_add_end error: filename too long\n"); rv = -1; goto out; } attr_put_32(nl, MAX_MSG, TCA_BPF_FD, fd); attr_put_str(nl, MAX_MSG, TCA_BPF_NAME, buf); attr_put_32(nl, MAX_MSG, TCA_BPF_FLAGS, TCA_BPF_FLAG_ACT_DIRECT); /* XXX MISSING NLMSG_ALIGN */ ctx->tail->rta_len = (((char *)nl) + nl->nlmsg_len) - (char *)ctx->tail; /* talk to netlink */ if (rtnetlink_send(&ctx->filter_rth, &ctx->msg.n) < 0) { ebpf_log("error talking to the kernel (rtnetlink_send)\n"); rv = -1; goto out; } rv = 0; out: if (ctx) { rtnetlink_close(&ctx->filter_rth); } return rv; }