[Devel] [PATCH RHEL7 COMMIT] ms/epoll: fix race between ep_poll_callback(POLLFREE) and ep_free()/ep_remove()

Konstantin Khorenko khorenko at virtuozzo.com
Tue Nov 19 19:14:26 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-1062.4.2.vz7.116.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.4.2.vz7.116.3
------>
commit 2438f87db5ed07e09993409a5579439e89c0f0bc
Author: Oleg Nesterov <oleg at redhat.com>
Date:   Tue Nov 19 19:14:24 2019 +0300

    ms/epoll: fix race between ep_poll_callback(POLLFREE) and ep_free()/ep_remove()
    
    The race was introduced by me in commit 971316f0503a ("epoll:
    ep_unregister_pollwait() can use the freed pwq->whead").  I did not
    realize that nothing can protect eventpoll after ep_poll_callback() sets
    ->whead = NULL, only whead->lock can save us from the race with
    ep_free() or ep_remove().
    
    Move ->whead = NULL to the end of ep_poll_callback() and add the
    necessary barriers.
    
    TODO: cleanup the ewake/EPOLLEXCLUSIVE logic, it was confusing even
    before this patch.
    
    Hopefully this explains use-after-free reported by syzcaller:
    
            BUG: KASAN: use-after-free in debug_spin_lock_before
            ...
             _raw_spin_lock_irqsave+0x4a/0x60 kernel/locking/spinlock.c:159
             ep_poll_callback+0x29f/0xff0 fs/eventpoll.c:1148
    
    this is spin_lock(eventpoll->lock),
    
            ...
            Freed by task 17774:
            ...
             kfree+0xe8/0x2c0 mm/slub.c:3883
             ep_free+0x22c/0x2a0 fs/eventpoll.c:865
    
    Fixes: 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead")
    Reported-by: 范龙飞 <long7573 at 126.com>
    Cc: stable at vger.kernel.org
    Signed-off-by: Oleg Nesterov <oleg at redhat.com>
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-99557
    (cherry picked from commit 138e4ad67afd5c6c318b056b4d17c17f2c0ca5c0)
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 fs/eventpoll.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d0b1d07b0806..760459c510ec 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -523,8 +523,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
 	wait_queue_head_t *whead;
 
 	rcu_read_lock();
-	/* If it is cleared by POLLFREE, it should be rcu-safe */
-	whead = rcu_dereference(pwq->whead);
+	/*
+	 * If it is cleared by POLLFREE, it should be rcu-safe.
+	 * If we read NULL we need a barrier paired with
+	 * smp_store_release() in ep_poll_callback(), otherwise
+	 * we rely on whead->lock.
+	 */
+	whead = smp_load_acquire(&pwq->whead);
 	if (whead)
 		remove_wait_queue(whead, &pwq->wait);
 	rcu_read_unlock();
@@ -1058,17 +1063,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct eventpoll *ep = epi->ep;
 	int ewake = 0;
 
-	if ((unsigned long)key & POLLFREE) {
-		ep_pwq_from_wait(wait)->whead = NULL;
-		/*
-		 * whead = NULL above can race with ep_remove_wait_queue()
-		 * which can do another remove_wait_queue() after us, so we
-		 * can't use __remove_wait_queue(). whead->lock is held by
-		 * the caller.
-		 */
-		list_del_init(&wait->task_list);
-	}
-
 	spin_lock_irqsave(&ep->lock, flags);
 
 	/*
@@ -1150,10 +1144,26 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	if (pwake)
 		ep_poll_safewake(&ep->poll_wait);
 
-	if (epi->event.events & EPOLLEXCLUSIVE)
-		return ewake;
+	if (!(epi->event.events & EPOLLEXCLUSIVE))
+		ewake = 1;
+
+	if ((unsigned long)key & POLLFREE) {
+		/*
+		 * If we race with ep_remove_wait_queue() it can miss
+		 * ->whead = NULL and do another remove_wait_queue() after
+		 * us, so we can't use __remove_wait_queue().
+		 */
+		list_del_init(&wait->task_list);
+		/*
+		 * ->whead != NULL protects us from the race with ep_free()
+		 * or ep_remove(), ep_remove_wait_queue() takes whead->lock
+		 * held by the caller. Once we nullify it, nothing protects
+		 * ep/epi or even wait.
+		 */
+		smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL);
+	}
 
-	return 1;
+	return ewake;
 }
 
 /*



More information about the Devel mailing list