in platform/barefoot/bfn-modules/modules/bf_tun.c [1689:1968]
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
void *msg_control, struct iov_iter *from,
int noblock, bool more)
{
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
struct sk_buff *skb;
size_t total_len = iov_iter_count(from);
size_t len = total_len, align = tun->align, linear;
struct virtio_net_hdr gso = { 0 };
struct tun_pcpu_stats *stats;
int good_linear;
int copylen;
bool zerocopy = false;
int err;
u32 rxhash = 0;
int skb_xdp = 1;
bool frags = tun_napi_frags_enabled(tfile);
if (!(tun->flags & IFF_NO_PI)) {
if (len < sizeof(pi))
return -EINVAL;
len -= sizeof(pi);
if (!copy_from_iter_full(&pi, sizeof(pi), from))
return -EFAULT;
}
if (tun->flags & IFF_VNET_HDR) {
int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
if (len < vnet_hdr_sz)
return -EINVAL;
len -= vnet_hdr_sz;
if (!copy_from_iter_full(&gso, sizeof(gso), from))
return -EFAULT;
if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
if (tun16_to_cpu(tun, gso.hdr_len) > len)
return -EINVAL;
iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
}
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
align += NET_IP_ALIGN;
if (unlikely(len < ETH_HLEN ||
(gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
return -EINVAL;
}
good_linear = SKB_MAX_HEAD(align);
if (msg_control) {
struct iov_iter i = *from;
/* There are 256 bytes to be copied in skb, so there is
* enough room for skb expand head in case it is used.
* The rest of the buffer is mapped from userspace.
*/
copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
if (copylen > good_linear)
copylen = good_linear;
linear = copylen;
iov_iter_advance(&i, copylen);
if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
zerocopy = true;
}
if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
/* For the packet that is not easy to be processed
* (e.g gso or jumbo packet), we will do it at after
* skb was created with generic XDP routine.
*/
skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
if (IS_ERR(skb)) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
return PTR_ERR(skb);
}
if (!skb)
return total_len;
} else {
if (!zerocopy) {
copylen = len;
if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
linear = good_linear;
else
linear = tun16_to_cpu(tun, gso.hdr_len);
}
if (frags) {
mutex_lock(&tfile->napi_mutex);
skb = tun_napi_alloc_frags(tfile, copylen, from);
/* tun_napi_alloc_frags() enforces a layout for the skb.
* If zerocopy is enabled, then this layout will be
* overwritten by zerocopy_sg_from_iter().
*/
zerocopy = false;
} else {
skb = tun_alloc_skb(tfile, align, copylen, linear,
noblock);
}
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EAGAIN)
this_cpu_inc(tun->pcpu_stats->rx_dropped);
if (frags)
mutex_unlock(&tfile->napi_mutex);
return PTR_ERR(skb);
}
if (zerocopy)
err = zerocopy_sg_from_iter(skb, from);
else
err = skb_copy_datagram_from_iter(skb, 0, from, len);
if (err) {
err = -EFAULT;
drop:
this_cpu_inc(tun->pcpu_stats->rx_dropped);
kfree_skb(skb);
if (frags) {
tfile->napi.skb = NULL;
mutex_unlock(&tfile->napi_mutex);
}
return err;
}
}
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
kfree_skb(skb);
if (frags) {
tfile->napi.skb = NULL;
mutex_unlock(&tfile->napi_mutex);
}
return -EINVAL;
}
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
if (tun->flags & IFF_NO_PI) {
u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
switch (ip_version) {
case 4:
pi.proto = htons(ETH_P_IP);
break;
case 6:
pi.proto = htons(ETH_P_IPV6);
break;
default:
this_cpu_inc(tun->pcpu_stats->rx_dropped);
kfree_skb(skb);
return -EINVAL;
}
}
skb_reset_mac_header(skb);
skb->protocol = pi.proto;
skb->dev = tun->dev;
break;
case IFF_TAP:
if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
err = -ENOMEM;
goto drop;
}
skb->protocol = eth_type_trans(skb, tun->dev);
break;
}
/* copy skb_ubuf_info for callback when skb has no error */
if (zerocopy) {
skb_shinfo(skb)->destructor_arg = msg_control;
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
} else if (msg_control) {
struct ubuf_info *uarg = msg_control;
uarg->callback(uarg, false);
}
skb_reset_network_header(skb);
skb_probe_transport_header(skb);
skb_record_rx_queue(skb, tfile->queue_index);
if (skb_xdp) {
struct bpf_prog *xdp_prog;
int ret;
local_bh_disable();
rcu_read_lock();
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
ret = do_xdp_generic(xdp_prog, skb);
if (ret != XDP_PASS) {
rcu_read_unlock();
local_bh_enable();
if (frags) {
tfile->napi.skb = NULL;
mutex_unlock(&tfile->napi_mutex);
}
return total_len;
}
}
rcu_read_unlock();
local_bh_enable();
}
/* Compute the costly rx hash only if needed for flow updates.
* We may get a very small possibility of OOO during switching, not
* worth to optimize.
*/
if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
!tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
rcu_read_lock();
if (unlikely(!(tun->dev->flags & IFF_UP))) {
err = -EIO;
rcu_read_unlock();
goto drop;
}
if (frags) {
u32 headlen;
/* Exercise flow dissector code path. */
skb_push(skb, ETH_HLEN);
headlen = eth_get_headlen(tun->dev, skb->data,
skb_headlen(skb));
if (unlikely(headlen > skb_headlen(skb))) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
napi_free_frags(&tfile->napi);
rcu_read_unlock();
mutex_unlock(&tfile->napi_mutex);
WARN_ON(1);
return -ENOMEM;
}
local_bh_disable();
napi_gro_frags(&tfile->napi);
local_bh_enable();
mutex_unlock(&tfile->napi_mutex);
} else if (tfile->napi_enabled) {
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
int queue_len;
spin_lock_bh(&queue->lock);
__skb_queue_tail(queue, skb);
queue_len = skb_queue_len(queue);
spin_unlock(&queue->lock);
if (!more || queue_len > NAPI_POLL_WEIGHT)
napi_schedule(&tfile->napi);
local_bh_enable();
} else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
tun_rx_batched(tun, tfile, skb, more);
} else {
netif_rx_ni(skb);
}
rcu_read_unlock();
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
u64_stats_inc(&stats->rx_packets);
u64_stats_add(&stats->rx_bytes, len);
u64_stats_update_end(&stats->syncp);
put_cpu_ptr(stats);
if (rxhash)
tun_flow_update(tun, rxhash, tfile);
return total_len;
}