[Devel] [PATCH 29/47] net: tun: track dropped skb via kfree_skb_reason()

Nikolay Borisov nikolay.borisov at virtuozzo.com
Tue Jan 3 18:39:25 MSK 2023


From: Dongli Zhang <dongli.zhang at oracle.com>

The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the
interface to forward the skb from TUN to vhost-net/virtio-net.

However, there are many "goto drop" in the TUN driver. Therefore, the
kfree_skb_reason() is involved at each "goto drop" to help userspace
ftrace/ebpf to track the reason for the loss of packets.

The below reasons are introduced:

- SKB_DROP_REASON_DEV_READY
- SKB_DROP_REASON_NOMEM
- SKB_DROP_REASON_HDR_TRUNC
- SKB_DROP_REASON_TAP_FILTER
- SKB_DROP_REASON_TAP_TXFILTER

Cc: Joao Martins <joao.m.martins at oracle.com>
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
Signed-off-by: David S. Miller <davem at davemloft.net>
Acked-by: Nikolay Borisov <nborisov at suse.com>
Signed-off-by: Nikolay Borisov <nikolay.borisov at virtuozzo.com>
---
 drivers/net/tun.c          | 38 ++++++++++++++++++++++++++++++--------
 include/linux/skbuff.h     | 18 ++++++++++++++++++
 include/trace/events/skb.h |  5 +++++
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 438f790ddc72..c3a459b4439b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1009,6 +1009,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+	enum skb_drop_reason drop_reason;
 	int txq = skb->queue_mapping;
 	struct netdev_queue *queue;
 	struct tun_file *tfile;
@@ -1018,8 +1019,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	tfile = rcu_dereference(tun->tfiles[txq]);
 
 	/* Drop packet if interface is not attached */
-	if (!tfile)
+	if (!tfile) {
+		drop_reason = SKB_DROP_REASON_DEV_READY;
 		goto drop;
+	}
 
 	if (!rcu_dereference(tun->steering_prog))
 		tun_automq_xmit(tun, skb);
@@ -1029,19 +1032,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Drop if the filter does not like it.
 	 * This is a noop if the filter is disabled.
 	 * Filter can be enabled only for the TAP devices. */
-	if (!check_filter(&tun->txflt, skb))
+	if (!check_filter(&tun->txflt, skb)) {
+		drop_reason = SKB_DROP_REASON_TAP_TXFILTER;
 		goto drop;
+	}
 
 	if (tfile->socket.sk->sk_filter &&
-	    sk_filter(tfile->socket.sk, skb))
+	    sk_filter(tfile->socket.sk, skb)) {
+		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
 		goto drop;
+	}
 
 	len = run_ebpf_filter(tun, skb, len);
-	if (len == 0 || pskb_trim(skb, len))
+	if (len == 0) {
+		drop_reason = SKB_DROP_REASON_TAP_FILTER;
 		goto drop;
+	}
 
-	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+	if (pskb_trim(skb, len)) {
+		drop_reason = SKB_DROP_REASON_NOMEM;
 		goto drop;
+	}
+
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+		drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
+		goto drop;
+	}
 
 	skb_tx_timestamp(skb);
 
@@ -1052,8 +1068,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset_ct(skb);
 
-	if (ptr_ring_produce(&tfile->tx_ring, skb))
+	if (ptr_ring_produce(&tfile->tx_ring, skb)) {
+		drop_reason = SKB_DROP_REASON_FULL_RING;
 		goto drop;
+	}
 
 	/* NETIF_F_LLTX requires to do our own update of trans_start */
 	queue = netdev_get_tx_queue(dev, txq);
@@ -1070,7 +1088,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 drop:
 	atomic_long_inc(&dev->tx_dropped);
 	skb_tx_error(skb);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	rcu_read_unlock();
 	return NET_XMIT_DROP;
 }
@@ -1666,6 +1684,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	u32 rxhash = 0;
 	int skb_xdp = 1;
 	bool frags = tun_napi_frags_enabled(tfile);
+	enum skb_drop_reason drop_reason;
 
 	if (!(tun->flags & IFF_NO_PI)) {
 		if (len < sizeof(pi))
@@ -1769,9 +1788,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
 		if (err) {
 			err = -EFAULT;
+			drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
 drop:
 			atomic_long_inc(&tun->dev->rx_dropped);
-			kfree_skb(skb);
+			kfree_skb_reason(skb, drop_reason);
 			if (frags) {
 				tfile->napi.skb = NULL;
 				mutex_unlock(&tfile->napi_mutex);
@@ -1818,6 +1838,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	case IFF_TAP:
 		if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
 			err = -ENOMEM;
+			drop_reason = SKB_DROP_REASON_HDR_TRUNC;
 			goto drop;
 		}
 		skb->protocol = eth_type_trans(skb, tun->dev);
@@ -1871,6 +1892,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	if (unlikely(!(tun->dev->flags & IFF_UP))) {
 		err = -EIO;
 		rcu_read_unlock();
+		drop_reason = SKB_DROP_REASON_DEV_READY;
 		goto drop;
 	}
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c20966c41348..057c7b7606c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -422,7 +422,25 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_DEV_HDR,        /* device driver specific
 					 * header/metadata is invalid
 					 */
+	/* the device is not ready to xmit/recv due to any of its data
+	 * structure that is not up/ready/initialized, e.g., the IFF_UP is
+	 * not set, or driver specific tun->tfiles[txq] is not initialized
+	 */
+	SKB_DROP_REASON_DEV_READY,
 	SKB_DROP_REASON_FULL_RING,      /* ring buffer is full */
+	SKB_DROP_REASON_NOMEM,          /* error due to OOM */
+	SKB_DROP_REASON_HDR_TRUNC,      /* failed to trunc/extract the header
+					 * from networking data, e.g., failed
+					 * to pull the protocol header from
+					 * frags via pskb_may_pull()
+					 */
+	SKB_DROP_REASON_TAP_FILTER,     /* dropped by (ebpf) filter directly
+					 * attached to tun/tap, e.g., via
+					 * TUNSETFILTEREBPF
+					 */
+	SKB_DROP_REASON_TAP_TXFILTER,   /* dropped by tx filter implemented
+					 * at tun/tap, e.g., check_filter()
+					 */
 
 	SKB_DROP_REASON_MAX,
 };
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index bc9a69db4303..81d62df3f1ed 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -55,7 +55,12 @@
 	EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG)            \
 	EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT)    \
 	EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR)                    \
+	EM(SKB_DROP_REASON_DEV_READY, DEV_READY)		\
 	EM(SKB_DROP_REASON_FULL_RING, FULL_RING)                \
+	EM(SKB_DROP_REASON_NOMEM, NOMEM)                        \
+	EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC)                \
+	EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER)              \
+	EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER)          \
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
-- 
2.34.1



More information about the Devel mailing list