[Devel] [PATCH RHEL7 COMMIT] net/skbuff: Don't waste memory reserves
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Apr 8 19:20:35 MSK 2019
The commit is pushed to "branch-rh7-3.10.0-957.10.1.vz7.85.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.10.1.vz7.85.12
------>
commit a6b8d2a78d837db8e7a3d8d6c15767042afd5cb2
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Mon Apr 8 19:20:32 2019 +0300
net/skbuff: Don't waste memory reserves
We were observing network performance issues due to packets
being dropped by sk_filter_trim_cap() since the 'skb' was allocated
from pfmemalloc reserves:
/*
* If the skb was allocated from pfmemalloc reserves, only
* allow SOCK_MEMALLOC sockets to use it as this socket is
* helping free memory
*/
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
return -ENOMEM;
Memalloc sockets are used for stuff like swap over NBD or NFS
and only memalloc sockets can process memalloc skbs. Since we
don't have any memalloc sockets in our setups we shouldn't have
memalloc skbs either. It simply doesn't make any sense to waste
memory reserves on skb only to drop packets later.
It appears that __dev_alloc_pages() unconditionally uses
__GFP_MEMALLOC, so unless caller added __GFP_NOMEMALLOC, the
__dev_alloc_pages() may dive into memory reserves.
Later build_skb() or __skb_fill_page_desc() sets skb->pfmemalloc = 1
so this skb always dropped by sk_filter_trim_cap().
Instead of wasting memory reserves we should simply avoid using
them in case of absence memalloc sockets in the system. Do this
by adding __GFP_MEMALLOC only when such socket is present in the
system.
https://pmc.acronis.com/browse/VSTOR-21390
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/skbuff.h | 21 +++++++++++++++++++--
include/net/sock.h | 15 ---------------
2 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 58515be85153..28a112d06ece 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2535,6 +2535,21 @@ void napi_consume_skb(struct sk_buff *skb, int budget);
void __kfree_skb_flush(void);
void __kfree_skb_defer(struct sk_buff *skb);
+#ifdef CONFIG_NET
+extern struct static_key memalloc_socks;
+static inline int sk_memalloc_socks(void)
+{
+ return static_key_false(&memalloc_socks);
+}
+#else
+
+static inline int sk_memalloc_socks(void)
+{
+ return 0;
+}
+
+#endif
+
/**
* __dev_alloc_pages - allocate page for network Rx
* @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
@@ -2555,7 +2570,9 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
* 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
* code in gfp_to_alloc_flags that should be enforcing this.
*/
- gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
+ gfp_mask |= __GFP_COLD | __GFP_COMP;
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
}
@@ -2601,7 +2618,7 @@ static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
gfp_mask |= __GFP_COLD;
- if (!(gfp_mask & __GFP_NOMEMALLOC))
+ if (sk_memalloc_socks() && !(gfp_mask & __GFP_NOMEMALLOC))
gfp_mask |= __GFP_MEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
diff --git a/include/net/sock.h b/include/net/sock.h
index 1b86b37fe884..6ac59b86b3a2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -785,21 +785,6 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
return test_bit(flag, &sk->sk_flags);
}
-#ifdef CONFIG_NET
-extern struct static_key memalloc_socks;
-static inline int sk_memalloc_socks(void)
-{
- return static_key_false(&memalloc_socks);
-}
-#else
-
-static inline int sk_memalloc_socks(void)
-{
- return 0;
-}
-
-#endif
-
static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask)
{
return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC);
More information about the Devel
mailing list