NetlinkMessage NetlinkMessage::TC()

in katran/lib/Netlink.cpp [49:191]


NetlinkMessage NetlinkMessage::TC(
    unsigned seq,
    int cmd,
    unsigned flags,
    uint32_t priority,
    int prog_fd,
    unsigned ifindex,
    const std::string& bpf_name,
    int direction) {
  /**
    format of netlink msg:
    +-------------------------------+
    |type                           |
    +-------------------------------+
    |flags                          |
    +-------------------------------+
    |seq                            |
    +-------------------------------+
    |##### TC's header #####        |
    +-------------------------------+
    |family                         |
    +-------------------------------+
    |ifindex                        |
    +-------------------------------+
    |parent                         |
    +-------------------------------+
    |tcm_info                       |
    +-------------------------------+
    |TCA_KIND                       |
    +-------------------------------+
    |TCA_options (nested)           |
    +-------------------------------+
    |bpf prog fd                    |
    +-------------------------------+
    |bpf flags                      |
    +-------------------------------+
    |bpf name                       |
    +-------------------------------+
    |TCA bpf act (nested)           |
    +-------------------------------+
    |TCA bpf prio (nested)          |
    +-------------------------------+
    |TCA act  kind                  |
    +-------------------------------+
    |TCA act options (nested)       |
    +-------------------------------+
    |TCA gact params                |
    +-------------------------------+
    |end of TCA act options         |
    +-------------------------------+
    |end of TCA bpf prio            |
    +-------------------------------+
    |end of TCA bpf act             |
    +-------------------------------+
    |end of TCA options             |
    +-------------------------------+

    netlink's header:

    1) type: depends of command, add/delete/modify filter (actual constanst in
       helpers above)
    2) flags: depends of the type; could be create/ create + exclusive / 0 (in
       case of delitation)
    3) seq - seq number for this message, we are going to use cur time in sec

    tc related headers and fields:
    1) family: either 0 for deletation or ETH_P_ALL if we are adding new
    filter 2) ifindex: index of interface where we are going to attach our
    prog. 3) parent: for bpf this field indicates the direction of the filter.
       either ingress or egress.
    4) tcm_info: for tc's filter this field combines protocol and priority
       (rfc3549 3.1.3)
    5) TCA_KIND: for bpf it's "bpf"
    bpf's specific options:
    1) bpf_prog_fd: file descriptor of already loaded bpf program
    2) bpf_flags: bpf related flags; for our use case use are using
       "direct action" (for imediate return after BPF run)
    3) bpf_name: name of bpf prog (to identify it, e.g. in tc show output), no
       special meaning behind this.
    4) act_kind: for bpf's related filter it's fixed to "gact"
    5) gact params: we only specify default action as TC_ACT_OK (we are going
       to hit this only if bpf prog exits w/ TC_ACT_PIPE and there is not
    filter after it)

  */

  NetlinkMessage ret;
  unsigned char* buf = ret.buf_.data();

  struct nlmsghdr* nlh;
  struct tcmsg* tc;
  uint32_t protocol = 0;
  unsigned int bpfFlags = TCA_BPF_FLAG_ACT_DIRECT;

  // Construct netlink message header
  nlh = mnl_nlmsg_put_header(buf);
  nlh->nlmsg_type = cmd;
  nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
  nlh->nlmsg_seq = seq;

  // Construct tc message header
  tc = reinterpret_cast<struct tcmsg*>(
      mnl_nlmsg_put_extra_header(nlh, sizeof(struct tcmsg)));
  tc->tcm_family = AF_UNSPEC;
  tc->tcm_ifindex = ifindex;
  tc->tcm_parent = direction;

  if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE) {
    protocol = htons(ETH_P_ALL);
  }
  tc->tcm_info = TC_H_MAKE(priority << 16, protocol);

  // Additional nested attribues
  mnl_attr_put(nlh, TCA_KIND, kBpfKind.size(), kBpfKind.data());
  {
    struct nlattr* options = mnl_attr_nest_start(nlh, TCA_OPTIONS);
    mnl_attr_put_u32(nlh, ::TCA_BPF_FD, prog_fd);
    mnl_attr_put_u32(nlh, ::TCA_BPF_FLAGS, bpfFlags);
    mnl_attr_put(nlh, ::TCA_BPF_NAME, bpf_name.size() + 1, bpf_name.c_str());
    {
      struct nlattr* act = mnl_attr_nest_start(nlh, ::TCA_BPF_ACT);
      {
        struct nlattr* prio = mnl_attr_nest_start(nlh, TCA_BPF_PRIO_1);
        mnl_attr_put(nlh, ::TCA_ACT_KIND, kTcActKind.size(), kTcActKind.data());
        {
          struct nlattr* actOptions =
              mnl_attr_nest_start(nlh, ::TCA_ACT_OPTIONS);
          struct tc_gact gactParm;
          memset(&gactParm, 0, sizeof(gactParm));
          gactParm.action = TC_ACT_OK;
          mnl_attr_put(nlh, ::TCA_GACT_PARMS, sizeof(gactParm), &gactParm);
          mnl_attr_nest_end(nlh, actOptions);
        }
        mnl_attr_nest_end(nlh, prio);
      }
      mnl_attr_nest_end(nlh, act);
    }
    mnl_attr_nest_end(nlh, options);
  }

  ret.buf_.resize(nlh->nlmsg_len);
  return ret;
}