int handle_egress()

in pkg/ebpf/c/tc.v6egress.bpf.c [136:290]


int handle_egress(struct __sk_buff *skb)
{
	
	struct keystruct trie_key;
	struct lpm_trie_val *trie_val;
	__u16 l4_src_port = 0;
	__u16 l4_dst_port = 0;
	struct conntrack_key flow_key;
	struct conntrack_value *flow_val;
	struct conntrack_key reverse_flow_key;
	struct conntrack_value *reverse_flow_val;
	struct data_t evt = {};
	void *data_end = (void *)(long)skb->data_end;
	void *data = (void *)(long)skb->data;

 	__builtin_memset(&flow_key, 0, sizeof(flow_key));
	__builtin_memset(&reverse_flow_key, 0, sizeof(reverse_flow_key));

	struct ethhdr *ether = data;
	if (data + sizeof(*ether) > data_end) {
		return BPF_OK;
	}

	if (ether->h_proto == 0xdd86) {  // htons(ETH_P_IPV6) -> 0x086ddU
		data += sizeof(*ether);
		struct ipv6hdr *ip = data;
		struct tcphdr *l4_tcp_hdr = data + sizeof(struct ipv6hdr);
		struct udphdr *l4_udp_hdr = data + sizeof(struct ipv6hdr);
		struct sctphdr *l4_sctp_hdr = data + sizeof(struct ipv6hdr);

		if (data + sizeof(*ip) > data_end) {
			return BPF_OK;
		}

		if (ip->version != 6) {
			return BPF_OK;
		}

		//ICMPv6 - Neighbor Discovery Packets
        if (ip->nexthdr == 58) {
        	return BPF_OK;
        }
   
		switch (ip->nexthdr) {
			case IPPROTO_TCP:
				if (data + sizeof(*ip) + sizeof(*l4_tcp_hdr) > data_end) {
					return BPF_OK;
				}
				l4_src_port = (((((unsigned short)(l4_tcp_hdr->source) & 0xFF)) << 8) | (((unsigned short)(l4_tcp_hdr->source) & 0xFF00) >> 8));
				l4_dst_port = (((((unsigned short)(l4_tcp_hdr->dest) & 0xFF)) << 8) | (((unsigned short)(l4_tcp_hdr->dest) & 0xFF00) >> 8));
				break;
			case IPPROTO_UDP:
				if (data + sizeof(*ip) + sizeof(*l4_udp_hdr) > data_end) {
					return BPF_OK;
				}
				l4_src_port = (((((unsigned short)(l4_udp_hdr->source) & 0xFF)) << 8) | (((unsigned short)(l4_udp_hdr->source) & 0xFF00) >> 8));
				l4_dst_port = (((((unsigned short)(l4_udp_hdr->dest) & 0xFF)) << 8) | (((unsigned short)(l4_udp_hdr->dest) & 0xFF00) >> 8));
				break;
			case IPPROTO_SCTP:
				if (data + sizeof(*ip) + sizeof(*l4_sctp_hdr) > data_end) {
					return BPF_OK;
				}
				l4_src_port = (((((unsigned short)(l4_sctp_hdr->source) & 0xFF)) << 8) | (((unsigned short)(l4_sctp_hdr->source) & 0xFF00) >> 8));
				l4_dst_port = (((((unsigned short)(l4_sctp_hdr->dest) & 0xFF)) << 8) | (((unsigned short)(l4_sctp_hdr->dest) & 0xFF00) >> 8));
				break;
		}

		trie_key.prefix_len = 128;
			
		//Fill the IP Key to be used for lookup
		for (int i=0; i<16; i++){
			trie_key.ip[i] = ip->daddr.in6_u.u6_addr8[i];
		}

		//Check for the an existing flow in the conntrack table
		flow_key.saddr = ip->saddr;
		flow_key.daddr = ip->daddr;
		flow_key.src_port = l4_src_port;
		flow_key.dest_port = l4_dst_port;
		flow_key.protocol = ip->nexthdr;
		flow_key.owner_addr = ip->saddr;

		evt.src_ip = ip->saddr;
		evt.dest_ip = ip->daddr;	
		evt.src_port = flow_key.src_port;
		evt.dest_port = flow_key.dest_port;
		evt.protocol = flow_key.protocol;
		
		__u32 key = 0; 
		struct pod_state *pst = bpf_map_lookup_elem(&egress_pod_state_map, &key);
		// There should always be an entry in pod_state_map. pst returned in above line should never be null.
		if (pst == NULL) {
			evt.verdict = 0;
			bpf_ringbuf_output(&policy_events, &evt, sizeof(evt), 0);
			return BPF_DROP;
		}

		if (pst->state == DEFAULT_DENY) {
			evt.verdict = 0;
			bpf_ringbuf_output(&policy_events, &evt, sizeof(evt), 0);
			return BPF_DROP;
		}

		//Check if it's an existing flow
		flow_val = (struct conntrack_value *)bpf_map_lookup_elem(&aws_conntrack_map, &flow_key);
		if (flow_val != NULL) { 
			// If it's a "default allow" flow, check if pod has flipped to "policies applied" state
			if (flow_val->val == CT_VAL_DEFAULT_ALLOW && pst->state == DEFAULT_ALLOW) {
				return BPF_OK;
			}
			if (flow_val->val == CT_VAL_POLICIES_APPLIED && pst->state == POLICIES_APPLIED) {
				return BPF_OK;
			}
			if (flow_val->val == CT_VAL_POLICIES_APPLIED && pst->state == DEFAULT_ALLOW) {
				flow_val->val = CT_VAL_DEFAULT_ALLOW;
				bpf_map_update_elem(&aws_conntrack_map, &flow_key, flow_val, 0); // 0 -> BPF_ANY
				return BPF_OK;
			}
			if (flow_val->val == CT_VAL_DEFAULT_ALLOW && pst->state == POLICIES_APPLIED) {
				int ret = evaluateByLookUp(trie_key, flow_key, pst, evt, ip, l4_dst_port);
				if (ret == BPF_DROP) {
					bpf_map_delete_elem(&aws_conntrack_map, &flow_key);
					return BPF_DROP;
				} 
				return BPF_OK;
			}
		}

		//Check for the reverse flow entry in the conntrack table
		reverse_flow_key.saddr = ip->daddr;
		reverse_flow_key.daddr = ip->saddr;
		reverse_flow_key.src_port = l4_dst_port;
		reverse_flow_key.dest_port = l4_src_port;
		reverse_flow_key.protocol = ip->nexthdr;
		reverse_flow_key.owner_addr = ip->saddr;
			
		//Check if it's a response packet
		reverse_flow_val = (struct conntrack_value *)bpf_map_lookup_elem(&aws_conntrack_map, &reverse_flow_key);
		if (reverse_flow_val != NULL) { 
			return BPF_OK;
		}

		if (pst->state == DEFAULT_ALLOW) {
			struct conntrack_value new_flow_val = {};
			new_flow_val.val = CT_VAL_DEFAULT_ALLOW;
			bpf_map_update_elem(&aws_conntrack_map, &flow_key, &new_flow_val, 0); // 0 - BPF_ANY
			evt.verdict = 1;
			bpf_ringbuf_output(&policy_events, &evt, sizeof(evt), 0);
			return BPF_OK;
		}

		return evaluateByLookUp(trie_key, flow_key, pst, evt, ip, l4_dst_port);
	}
	return BPF_OK;
}