[Devel] [PATCH RHEL7 COMMIT] tcache: Add tcache_pool_nodeinfo::lock

Konstantin Khorenko khorenko at virtuozzo.com
Thu Aug 31 18:18:18 MSK 2017


The commit is pushed to "branch-rh7-3.10.0-514.26.1.vz7.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.26.1.vz7.35.5
------>
commit 13afaf53ede5cb733a5dba3319bcffea95fe9f48
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Thu Aug 31 18:18:18 2017 +0300

    tcache: Add tcache_pool_nodeinfo::lock
    
    Currently, for protection of all LRU lists is used tcache_nodeinfo::lock,
    which is the only for the NUMA node, and it is used for all containers.
    It's used when every container adds a page to LRU list. This makes it
    "big tcache lock", which does not scale good.
    
    The patch introduces a new lock for protection of struct tcache_pool_nodeinfo
    fields, in particular, LRU list. LRU lists of filesystems (of containers)
    are independent of each other, so different locks allows to scale better.
    
    This patch only introduces the lock, and the lock order is:
    tcache_nodeinfo::lock -> tcache_pool_nodeinfo::lock at the moment.
    Next patches gradually will allow to change it vice versa.
    
    Note, that now update of tcache_pool_nodeinfo::nr_pages and
    tcache_nodeinfo::nr_pages happens under different locks.
    
    v3: Add spin_lock_init() for lockdep
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Acked-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 mm/tcache.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/mm/tcache.c b/mm/tcache.c
index 9f296dc..ab70af2 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -66,6 +66,7 @@ struct tcache_pool_nodeinfo {
 	/* increased on every LRU add/del, reset once it gets big enough;
 	 * used for rate limiting rebalancing of reclaim_tree */
 	unsigned long			events;
+	spinlock_t			lock;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -255,6 +256,7 @@ static void tcache_lru_add(struct tcache_pool *pool, struct page *page)
 	struct tcache_pool_nodeinfo *pni = &pool->nodeinfo[nid];
 
 	spin_lock(&ni->lock);
+	spin_lock(&pni->lock);
 
 	ni->nr_pages++;
 	pni->nr_pages++;
@@ -271,6 +273,7 @@ static void tcache_lru_add(struct tcache_pool *pool, struct page *page)
 	if (unlikely(RB_EMPTY_NODE(&pni->reclaim_node)))
 		__tcache_insert_reclaim_node(ni, pni);
 
+	spin_unlock(&pni->lock);
 	spin_unlock(&ni->lock);
 }
 
@@ -293,6 +296,7 @@ static void tcache_lru_del(struct tcache_pool *pool, struct page *page,
 	struct tcache_pool_nodeinfo *pni = &pool->nodeinfo[nid];
 
 	spin_lock(&ni->lock);
+	spin_lock(&pni->lock);
 
 	/* Raced with reclaimer? */
 	if (unlikely(list_empty(&page->lru)))
@@ -306,6 +310,7 @@ static void tcache_lru_del(struct tcache_pool *pool, struct page *page,
 
 	__tcache_check_events(ni, pni);
 out:
+	spin_unlock(&pni->lock);
 	spin_unlock(&ni->lock);
 }
 
@@ -342,6 +347,7 @@ static int tcache_create_pool(void)
 		pni->pool = pool;
 		RB_CLEAR_NODE(&pni->reclaim_node);
 		INIT_LIST_HEAD(&pni->lru);
+		spin_lock_init(&pni->lock);
 	}
 
 	idr_preload(GFP_KERNEL);
@@ -1039,6 +1045,7 @@ tcache_lru_isolate(int nid, struct page **pages, int nr_to_isolate)
 	if (!tcache_grab_pool(pni->pool))
 		goto again;
 
+	spin_lock(&pni->lock);
 	nr = __tcache_lru_isolate(pni, pages, nr_to_isolate);
 	ni->nr_pages -= nr;
 	nr_isolated += nr;
@@ -1047,6 +1054,7 @@ tcache_lru_isolate(int nid, struct page **pages, int nr_to_isolate)
 	if (!list_empty(&pni->lru))
 		__tcache_insert_reclaim_node(ni, pni);
 
+	spin_unlock(&pni->lock);
 	tcache_put_pool(pni->pool);
 out:
 	spin_unlock_irq(&ni->lock);
@@ -1091,14 +1099,17 @@ tcache_try_to_reclaim_page(struct tcache_pool *pool, int nid)
 
 	local_irq_save(flags);
 
-	spin_lock(&ni->lock);
+	spin_lock(&pni->lock);
 	ret = __tcache_lru_isolate(pni, &page, 1);
-	ni->nr_pages -= ret;
-	spin_unlock(&ni->lock);
+	spin_unlock(&pni->lock);
 
 	if (!ret)
 		goto out;
 
+	spin_lock(&ni->lock);
+	ni->nr_pages -= ret;
+	spin_unlock(&ni->lock);
+
 	if (!__tcache_reclaim_page(page))
 		page = NULL;
 	else


More information about the Devel mailing list