[Devel] [PATCH RH7 3/3] vznetstat: Skip local skb going from !IFF_LOOPBACK interface

Kirill Tkhai ktkhai at virtuozzo.com
Tue Oct 6 16:19:51 MSK 2020


Local packet may be sent not only by 127.0.0.1. Say, if we have eth0 with 10.94.86.184,
and both server and client use this address to communicate, @out interface will be eth0,
while in real packets will be transmitted thru loopback inside single net ns.
We don't want vznetstat mark such the packets, because these marks conflict with ordinary
iptables rules.

Since venet_acct_in_ops executed at NF_INET_LOCAL_OUT stage, dst may be NULL (I assume
this after ip_queue_xmit(), where skb_rtable() may be NULL before routing).
We leave both checks (out->flags and this new). It looks like we should think about
making venet_acct_in_ops as NF_INET_POST_ROUTING hook, and kill out->flags check and
dst should be not zero there.

Lastly, I attach one of paths we come to the hook (for reviewers and history):

[76498.851548]  [<ffffffffc0dfa0ef>] venet_acct_out_hook+0xef/0x150 [ip_vznetstat]
[76498.856342]  [<ffffffff922cac08>] nf_iterate+0x98/0xe0
[76498.860179]  [<ffffffff922cacf8>] nf_hook_slow+0xa8/0x110
[76498.864098]  [<ffffffff922d9532>] __ip_local_out_sk+0x102/0x110
[76498.868028]  [<ffffffff922d8b40>] ? ip_forward_options+0x1c0/0x1c0
[76498.872302]  [<ffffffff922d955b>] ip_local_out_sk+0x1b/0x40
[76498.876054]  [<ffffffff922d9914>] ip_queue_xmit+0x144/0x3c0
[76498.880126]  [<ffffffff922f4304>] tcp_transmit_skb+0x4e4/0x9e0
[76498.883983]  [<ffffffff922f498a>] tcp_write_xmit+0x18a/0xd40
[76498.888200]  [<ffffffff922f57ce>] __tcp_push_pending_frames+0x2e/0xc0
[76498.892368]  [<ffffffff922e367c>] tcp_push+0xec/0x120
[76498.896262]  [<ffffffff922e71e2>] tcp_sendmsg+0xd2/0xc60
[76498.900257]  [<ffffffff923be1c2>] ? __schedule+0x402/0x990
[76498.904251]  [<ffffffff92313a99>] inet_sendmsg+0x69/0xb0
[76498.907751]  [<ffffffff9226864d>] sock_aio_write+0x15d/0x180
[76498.911435]  [<ffffffff91ce0525>] ? try_to_wake_up+0x255/0x470
[76498.915473]  [<ffffffff91e70086>] do_sync_write+0x96/0xe0
[76498.919402]  [<ffffffff91e70c75>] vfs_write+0x1c5/0x1f0
[76498.922945]  [<ffffffff91e7193f>] SyS_write+0x7f/0xf0
[76498.926721]  [<ffffffff91c2cd88>] ? sys_rt_sigreturn+0xe8/0x100
[76498.930878]  [<ffffffff923cbf92>] system_call_fastpath+0x25/0x2a

https://jira.sw.ru/browse/PSBM-120713

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 kernel/ve/vznetstat/ip6_vznetstat.c |    4 ++++
 kernel/ve/vznetstat/ip_vznetstat.c  |    8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/kernel/ve/vznetstat/ip6_vznetstat.c b/kernel/ve/vznetstat/ip6_vznetstat.c
index af095ee53045..1617de3cf0ad 100644
--- a/kernel/ve/vznetstat/ip6_vznetstat.c
+++ b/kernel/ve/vznetstat/ip6_vznetstat.c
@@ -21,6 +21,7 @@
 #include <linux/if.h>
 #include <linux/netdevice.h>
 #include <linux/vznetstat.h>
+#include <net/dst.h>
 
 static unsigned int
 venet_acct_in_hook_v6(const struct nf_hook_ops *hook,
@@ -46,10 +47,13 @@ venet_acct_out_hook_v6(const struct nf_hook_ops *hook,
 		    const struct net_device *out,
 		    const struct nf_hook_state *state)
 {
+	struct dst_entry *dst = skb_dst(skb);
 	int res = NF_ACCEPT;
 
 	if (out->flags & IFF_LOOPBACK)
 		goto out;
+	if (dst && (dst->dev->flags & IFF_LOOPBACK))
+		goto out;
 
 	skb->protocol = __constant_htons(ETH_P_IPV6);
 	venet_acct_classify_add_outgoing(out->nd_net->owner_ve->stat, skb);
diff --git a/kernel/ve/vznetstat/ip_vznetstat.c b/kernel/ve/vznetstat/ip_vznetstat.c
index d96065768ab3..5ea978d6dd88 100644
--- a/kernel/ve/vznetstat/ip_vznetstat.c
+++ b/kernel/ve/vznetstat/ip_vznetstat.c
@@ -77,6 +77,7 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook,
 				        const struct net_device *out,
 				        const struct nf_hook_state *state)
 {
+	struct dst_entry *dst = skb_dst(skb);
 	int res;
 
 	res = NF_ACCEPT;
@@ -84,6 +85,13 @@ static unsigned int venet_acct_out_hook(const struct nf_hook_ops *hook,
 	/* Skip loopback dev */
 	if (out->flags & IFF_LOOPBACK)
 		goto out;
+	/*
+	 * @skb is routed to loopback. Say, your eth0 has address 10.94.86.184
+	 * and ip_hdr(skb)->saddr == ip_hdr(skb)->daddr == 10.94.86.184.
+	 * Then, @out is eth0 and we skip @skb in the above check.
+	 */
+	if (dst && (dst->dev->flags & IFF_LOOPBACK))
+		goto out;
 
 	/* Paranoia */
 	if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))




More information about the Devel mailing list