[Devel] [PATCH RHEL COMMIT] ms/memcg: enable accounting for IP address and routing-related objects

Konstantin Khorenko khorenko at virtuozzo.com
Tue Sep 28 14:05:11 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 12401efc77032f1602eade58c2894458d7e02fa5
Author: Vasily Averin <vvs at virtuozzo.com>
Date:   Tue Sep 28 14:05:11 2021 +0300

    ms/memcg: enable accounting for IP address and routing-related objects
    
    An netadmin inside container can use 'ip a a' and 'ip r a'
    to assign a large number of ipv4/ipv6 addresses and routing entries
    and force kernel to allocate megabytes of unaccounted memory
    for long-lived per-netdevice related kernel objects:
    'struct in_ifaddr', 'struct inet6_ifaddr', 'struct fib6_node',
    'struct rt6_info', 'struct fib_rules' and ip_fib caches.
    
    These objects can be manually removed, though usually they lives
    in memory till destroy of its net namespace.
    
    It makes sense to account for them to restrict the host's memory
    consumption from inside the memcg-limited container.
    
    One of such objects is the 'struct fib6_node' mostly allocated in
    net/ipv6/route.c::__ip6_ins_rt() inside the lock_bh()/unlock_bh() section:
    
     write_lock_bh(&table->tb6_lock);
     err = fib6_add(&table->tb6_root, rt, info, mxc);
     write_unlock_bh(&table->tb6_lock);
    
    In this case it is not enough to simply add SLAB_ACCOUNT to corresponding
    kmem cache. The proper memory cgroup still cannot be found due to the
    incorrect 'in_interrupt()' check used in memcg_kmem_bypass().
    
    Obsoleted in_interrupt() does not describe real execution context properly.
    >From include/linux/preempt.h:
    
     The following macros are deprecated and should not be used in new code:
     in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
    
    To verify the current execution context new macro should be used instead:
     in_task()      - We're in task context
    
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
    
    Signed-off-by: David S. Miller <davem at davemloft.net>
    (cherry picked from commit 6126891c6d4f6f4ef50323d2020635ee255a796e)
    https://jira.sw.ru/browse/PSBM-133990
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
 mm/memcontrol.c      | 2 +-
 net/core/fib_rules.c | 4 ++--
 net/ipv4/devinet.c   | 2 +-
 net/ipv4/fib_trie.c  | 4 ++--
 net/ipv6/addrconf.c  | 2 +-
 net/ipv6/ip6_fib.c   | 4 ++--
 net/ipv6/route.c     | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f231a39ea75..74a6dba5a023 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -969,7 +969,7 @@ static __always_inline bool memcg_kmem_bypass(void)
 		return false;
 
 	/* Memcg to charge can't be determined. */
-	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+	if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
 		return true;
 
 	return false;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a9f937975080..79df7cd9dbc1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 {
 	struct fib_rule *r;
 
-	r = kzalloc(ops->rule_size, GFP_KERNEL);
+	r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
 	if (r == NULL)
 		return -ENOMEM;
 
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto errout;
 	}
 
-	nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+	nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
 	if (!nlrule) {
 		err = -ENOMEM;
 		goto errout;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 73721a4448bd..d38124bd1b94 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
 
 static struct in_ifaddr *inet_alloc_ifa(void)
 {
-	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
 }
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6611ed3cba97..9340905c668c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
 {
 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
 					  sizeof(struct fib_alias),
-					  0, SLAB_PANIC, NULL);
+					  0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 
 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
 					   LEAF_SIZE,
-					   0, SLAB_PANIC, NULL);
+					   0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 }
 
 struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index db00087c29e9..f710f2f6ffa0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1080,7 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
 			goto out;
 	}
 
-	ifa = kzalloc(sizeof(*ifa), gfp_flags);
+	ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
 	if (!ifa) {
 		err = -ENOBUFS;
 		goto out;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ef75c9b05f17..1bec5b22f80d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
 	int ret = -ENOMEM;
 
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
-					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   sizeof(struct fib6_node), 0,
+					   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
 					   NULL);
 	if (!fib6_node_kmem)
 		goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 772de1c240cd..6eb121ced589 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6646,7 +6646,7 @@ int __init ip6_route_init(void)
 	ret = -ENOMEM;
 	ip6_dst_ops_template.kmem_cachep =
 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
-				  SLAB_HWCACHE_ALIGN, NULL);
+				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
 	if (!ip6_dst_ops_template.kmem_cachep)
 		goto out;
 


More information about the Devel mailing list