[Devel] [PATCH RHEL7 COMMIT] ms/ipv6: remove null_entry before adding default route

Vasily Averin vvs at virtuozzo.com
Thu Dec 24 07:12:08 MSK 2020


The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.3
------>
commit 886fc44d512e4a06aca0b7dbc81f3d6f511acedd
Author: Wei Wang <weiwan at google.com>
Date:   Thu Dec 24 07:12:08 2020 +0300

    ms/ipv6: remove null_entry before adding default route
    
    In the current code, when creating a new fib6 table, tb6_root.leaf gets
    initialized to net->ipv6.ip6_null_entry.
    If a default route is being added with rt->rt6i_metric = 0xffffffff,
    fib6_add() will add this route after net->ipv6.ip6_null_entry. As
    null_entry is shared, it could cause problem.
    
    In order to fix it, set fn->leaf to NULL before calling
    fib6_add_rt2node() when trying to add the first default route.
    And reset fn->leaf to null_entry when adding fails or when deleting the
    last default route.
    
    syzkaller reported the following issue which is fixed by this commit:
    
    WARNING: suspicious RCU usage
    4.15.0-rc5+ #171 Not tainted
    other info that might help us debug this:
    
    rcu_scheduler_active = 2, debug_locks = 1
    4 locks held by swapper/0/0:
     #0:  ((&net->ipv6.ip6_fib_timer)){+.-.}, at: [<00000000d43f631b>] lockdep_
     #0:  ((&net->ipv6.ip6_fib_timer)){+.-.}, at: [<00000000d43f631b>] call_tim
     #1:  (&(&net->ipv6.fib6_gc_lock)->rlock){+.-.}, at: [<000000002ff9d65c>] s
     #1:  (&(&net->ipv6.fib6_gc_lock)->rlock){+.-.}, at: [<000000002ff9d65c>] f
     #2:  (rcu_read_lock){....}, at: [<0000000091db762d>] __fib6_clean_all+0x0/
     #3:  (&(&tb->tb6_lock)->rlock){+.-.}, at: [<000000009e503581>] spin_lock_b
     #3:  (&(&tb->tb6_lock)->rlock){+.-.}, at: [<000000009e503581>] __fib6_clea
    
    stack backtrace:
    CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc5+ #171
    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Goo
    Call Trace:
     <IRQ>
     __dump_stack lib/dump_stack.c:17 [inline]
     dump_stack+0x194/0x257 lib/dump_stack.c:53
     lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4585
     fib6_del+0xcaa/0x11b0 net/ipv6/ip6_fib.c:1701
     fib6_clean_node+0x3aa/0x4f0 net/ipv6/ip6_fib.c:1892
     fib6_walk_continue+0x46c/0x8a0 net/ipv6/ip6_fib.c:1815
     fib6_walk+0x91/0xf0 net/ipv6/ip6_fib.c:1863
     fib6_clean_tree+0x1e6/0x340 net/ipv6/ip6_fib.c:1933
     __fib6_clean_all+0x1f4/0x3a0 net/ipv6/ip6_fib.c:1949
     fib6_clean_all net/ipv6/ip6_fib.c:1960 [inline]
     fib6_run_gc+0x16b/0x3c0 net/ipv6/ip6_fib.c:2016
     fib6_gc_timer_cb+0x20/0x30 net/ipv6/ip6_fib.c:2033
     call_timer_fn+0x228/0x820 kernel/time/timer.c:1320
     expire_timers kernel/time/timer.c:1357 [inline]
     __run_timers+0x7ee/0xb70 kernel/time/timer.c:1660
     run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686
     __do_softirq+0x2d7/0xb85 kernel/softirq.c:285
     invoke_softirq kernel/softirq.c:365 [inline]
     irq_exit+0x1cc/0x200 kernel/softirq.c:405
     exiting_irq arch/x86/include/asm/apic.h:540 [inline]
     smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052
     apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:904
     </IRQ>
    
    Reported-by: syzbot <syzkaller at googlegroups.com>
    Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_ta
    Signed-off-by: Wei Wang <weiwan at google.com>
    Acked-by: Martin KaFai Lau <kafai at fb.com>
    Signed-off-by: David S. Miller <davem at davemloft.net>
    (cherry-picked from comment 4512c43eac7e007d982e7ea45152ea6f3f4d1921)
    VvS: backported, removed rcu access to fn->leaf,
         added net argument to fib6_add_1()
    https://jira.sw.ru/browse/PSBM-123045
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
 net/ipv6/ip6_fib.c | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fb25b0c..d47ee63 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -558,7 +558,8 @@ out:
  *	node.
  */
 
-static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
+static struct fib6_node * fib6_add_1(struct net *net,
+				     struct fib6_node *root, void *addr,
 				     int addrlen, int plen,
 				     int offset, int allow_create,
 				     int replace_required)
@@ -603,6 +604,10 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				rt6_release(fn->leaf);
 				fn->leaf = NULL;
+			/* remove null_entry in the root node */
+			} else if (fn->fn_flags & RTN_TL_ROOT &&
+				   fn->leaf == net->ipv6.ip6_null_entry) {
+				fn->leaf = NULL;
 			}
 
 			fn->fn_sernum = sernum;
@@ -1072,7 +1077,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	if (!allow_create && !replace_required)
 		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
 
-	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
+	fn = fib6_add_1(info->nl_net, 
+			root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
 			rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
 			allow_create, replace_required);
 	if (IS_ERR(fn)) {
@@ -1112,7 +1118,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 			/* Now add the first leaf node to new subtree */
 
-			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+			sn = fib6_add_1(info->nl_net, sfn, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr), rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
 					allow_create, replace_required);
@@ -1131,7 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			sfn->parent = fn;
 			fn->subtree = sfn;
 		} else {
-			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+			sn = fib6_add_1(info->nl_net,
+					fn->subtree, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr), rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
 					allow_create, replace_required);
@@ -1185,13 +1192,16 @@ out:
 	return err;
 
 failure:
-	/* fn->leaf could be NULL if fn is an intermediate node and we
-	 * failed to add the new route to it in both subtree creation
-	 * failure and fib6_add_rt2node() failure case.
-	 * In both cases, fib6_repair_tree() should be called to fix
-	 * fn->leaf.
+	/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+	 * 1. fn is an intermediate node and we failed to add the new
+	 * route to it in both subtree creation failure and fib6_add_rt2node()
+	 * failure case.
+	 * 2. fn is the root node in the table and we fail to add the first
+	 * default route to it.
 	 */
-	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+	if (fn &&
+	    (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+	     (fn->fn_flags & RTN_TL_ROOT && fn->leaf == NULL)))
 		fib6_repair_tree(info->nl_net, fn);
 	if (!(rt->dst.flags & DST_NOCACHE))
 		dst_free(&rt->dst);
@@ -1394,6 +1404,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 	struct fib6_walker *w;
 	int iter = 0;
 
+	/* Set fn->leaf to null_entry for root node. */
+	if (fn->fn_flags & RTN_TL_ROOT) {
+		fn->leaf = net->ipv6.ip6_null_entry;
+		return fn;
+	}
+
 	for (;;) {
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
@@ -1530,10 +1546,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 	rt->dst.rt6_next = NULL;
 
-	/* If it was last route, expunge its radix tree node */
+	/* If it was last route, call fib6_repair_tree() to:
+	 * 1. For root node, put back null_entry as how the table was created.
+	 * 2. For other nodes, expunge its radix tree node.
+	 */
 	if (!fn->leaf) {
-		fn->fn_flags &= ~RTN_RTINFO;
-		net->ipv6.rt6_stats->fib_route_nodes--;
+		if (!(fn->fn_flags & RTN_TL_ROOT)) {
+			fn->fn_flags &= ~RTN_RTINFO;
+			net->ipv6.rt6_stats->fib_route_nodes--;
+		}
 		fn = fib6_repair_tree(net, fn);
 	}
 


More information about the Devel mailing list