[Devel] [PATCH RHEL7 COMMIT] tcache: Close race between tcache_invalidate_node() and tcache_attach_page()

Konstantin Khorenko khorenko at virtuozzo.com
Wed Jan 31 15:57:28 MSK 2018


rejected, will use v3 of the patch instead.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 01/31/2018 03:53 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-693.11.6.vz7.42.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-693.11.6.vz7.42.3
> ------>
> commit 5a87b5107ecc5f14aa4bcd1833b443ed4aa45b34
> Author: Kirill Tkhai <ktkhai at virtuozzo.com>
> Date:   Wed Jan 31 15:53:02 2018 +0300
>
>     tcache: Close race between tcache_invalidate_node() and tcache_attach_page()
>
>     tcache_attach_page()            tcache_invalidate_node()
>     ..                              __tcache_lookup_node()
>     ..                              __tcache_delete_node()
>     Check node->invalidated         ..
>     tcache_page_tree_insert()       ..
>     tcache_lru_add()                ..
>     ..                              tcache_invalidate_node_pages()
>     ..                                node->invalidated = true
>
>     Check nr_page to determ if there is a race and repeat
>     node pages iterations if so.
>
>     v2: Move invalidate assignment down in tcache_invalidate_node_tree().
>     synchronize_sched() to be sure all tcache_attach_page() see invalidated.
>
>     Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>     Acked-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> ---
>  mm/tcache.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/mm/tcache.c b/mm/tcache.c
> index a45af63fbd1b..1457664459bb 100644
> --- a/mm/tcache.c
> +++ b/mm/tcache.c
> @@ -676,6 +676,7 @@ static void tcache_invalidate_node(struct tcache_pool *pool,
>  	node = __tcache_lookup_node(&tree->root, key, &rb_link, &rb_parent);
>  	if (node) {
>  		tcache_hold_node(node);
> +		node->invalidated = true;
>  		__tcache_delete_node(&tree->root, node);
>  	}
>  	spin_unlock_irq(&tree->lock);
> @@ -703,6 +704,7 @@ tcache_invalidate_node_tree(struct tcache_node_tree *tree)
>  		WARN_ON(atomic_read(&node->kref.refcount) != 1);
>  		WARN_ON(node->nr_pages == 0);
>  		WARN_ON(node->invalidated);
> +		node->invalidated = true;
>
>  		tcache_hold_node(node);
>  		tcache_invalidate_node_pages(node);
> @@ -799,13 +801,16 @@ tcache_attach_page(struct tcache_node *node, pgoff_t index, struct page *page)
>  	int err = 0;
>
>  	tcache_init_page(page, node, index);
> -
> +	/*
> +	 * Disabling of irqs implies rcu_read_lock_sched().
> +	 * See tcache_invalidate_node_pages() for details.
> +	 */
>  	spin_lock_irqsave(&node->tree_lock, flags);
>  	err = tcache_page_tree_insert(node, index, page);
>  	spin_unlock(&node->tree_lock);
>  	if (!err)
>  		tcache_lru_add(node->pool, page);
> -	local_irq_restore(flags);
> +	local_irq_restore(flags); /* Implies rcu_read_lock_sched() */
>  	return err;
>  }
>
> @@ -911,17 +916,16 @@ static unsigned tcache_lookup(struct page **pages, struct tcache_node *node,
>  static noinline_for_stack void
>  tcache_invalidate_node_pages(struct tcache_node *node)
>  {
> +	bool repeat, synchronize_sched_once = true;
>  	pgoff_t indices[TCACHE_PAGEVEC_SIZE];
>  	struct page *pages[TCACHE_PAGEVEC_SIZE];
>  	pgoff_t index = 0;
>  	unsigned nr_pages;
> -	bool repeat;
>  	int i;
>
>  	/*
>  	 * First forbid new page insertions - see tcache_page_tree_replace.
>  	 */
> -	node->invalidated = true;
>  again:
>  	repeat = false;
>  	while ((nr_pages = tcache_lookup(pages, node, index,
> @@ -940,6 +944,7 @@ tcache_invalidate_node_pages(struct tcache_node *node)
>  				local_irq_enable();
>  				tcache_put_page(page);
>  			} else {
> +				/* Race with page_ref_freeze() */
>  				local_irq_enable();
>  				repeat = true;
>  			}
> @@ -948,7 +953,18 @@ tcache_invalidate_node_pages(struct tcache_node *node)
>  		index++;
>  	}
>
> +	if (!repeat && synchronize_sched_once) {
> +		/* Race with tcache_attach_page() */
> +		spin_lock_irq(&node->tree_lock);
> +		repeat = (node->nr_pages != 0);
> +		spin_unlock_irq(&node->tree_lock);
> +		if (repeat) {
> +			synchronize_sched_once = false;
> +			synchronize_sched();
> +		}
> +	}
>  	if (repeat) {
> +		schedule_timeout_interruptible(1);
>  		index = 0;
>  		goto again;
>  	}
> .
>


More information about the Devel mailing list