[Devel] [PATCH RHEL7 COMMIT] tswap: shrink tswap page if swapcache page is uptodate

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jun 22 02:38:23 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.15
------>
commit 437db87482802fc5981fb7ea75e7bc041896eeec
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon Jun 22 13:38:23 2015 +0400

    tswap: shrink tswap page if swapcache page is uptodate
    
    Patchset description:
    
    Patches 1 and 2 fix memory corruption caused by tswap:
      https://jira.sw.ru/browse/PSBM-34269
    Patch 5 fixes pinning a page in tswap.
    Patch 7 fixes indefinitely long stalls during tswap reclaim.
    Patch 3, 4, and 6 do cleanup.
    
    Vladimir Davydov (6):
      tswap: enable exclusive gets
      tswap: do not allocate a page on store if there is already one
      tswap: introduce tswap_delete_page helper
      tswap: shrink tswap page if swapcache page is uptodate
      tswap: use global lock to protect lru lists
      tswap: do not writeback pages on reclaim
    
    Weijie Yang (1):
      mm: frontswap: invalidate expired data on a dup-store failure
    
    ###############################################################
    This patch description:
    
    On tswap shrink we attempt to move a tswap page back to swapcache. If
    there is already a swapcache page at the given offset, we leave the
    tswap page in frontswap while removing it from LRU, because we assume
    that it will be loaded in a moment from read_swap_cache_async.
    
    However, it is not always true. The reclaimer could have failed to free
    a swapcache page after successfully storing it to frontswap. In this
    case, the swapcache page will be uptodate and there is no need in
    keeping the backing page in frontswap. Quite the contrary, if we keep
    such a page in frontswap, it will be dangling there until the swapcache
    page it backs is reclaimed, which may take indefinitely long.
    
    This patch detects such situations and makes the shrinker invalidate
    tswap page then.
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 mm/tswap.c | 79 +++++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 26 deletions(-)

diff --git a/mm/tswap.c b/mm/tswap.c
index 1c19499..e4b27df 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -86,16 +86,17 @@ static int tswap_insert_page(swp_entry_t entry, struct page *page)
 	return err;
 }
 
-static struct page *tswap_delete_page(swp_entry_t entry)
+static struct page *tswap_delete_page(swp_entry_t entry, struct page *expected)
 {
 	struct page *page;
 
 	spin_lock(&tswap_lock);
-	page = radix_tree_delete(&tswap_page_tree, entry.val);
+	page = radix_tree_delete_item(&tswap_page_tree, entry.val, expected);
 	if (page)
 		tswap_nr_pages--;
 	spin_unlock(&tswap_lock);
 	if (page) {
+		BUG_ON(expected && page != expected);
 		BUG_ON(page_private(page) != entry.val);
 		tswap_lru_del(page);
 	}
@@ -118,6 +119,8 @@ static int tswap_writeback_page(struct page *page)
 		.sync_mode = WB_SYNC_NONE,
 	};
 
+	BUG_ON(!PageLocked(page));
+
 	entry.val = page_private(page);
 	swapper_space = swap_address_space(entry);
 retry:
@@ -125,11 +128,38 @@ retry:
 	found_page = find_get_page(swapper_space, entry.val);
 	if (found_page) {
 		/*
-		 * There is already a swap cache page at the given offset.
-		 * Hence, if the current page has not been loaded yet, it will
-		 * be in a moment (see read_swap_cache_async), so there is no
-		 * need to put it back to the lru list.
+		 * There is already a swap cache page at the given offset. If
+		 * the page is uptodate, we can safely free the frontswap page,
+		 * marking the swapcache page dirty. Otherwise, the frontswap
+		 * page is about to be loaded and cannot be released.
 		 */
+		err = -EBUSY;
+		if (!trylock_page(found_page)) {
+			put_page(found_page);
+			goto out;
+		}
+		/* recheck that the page is still in the swap cache */
+		if (!PageSwapCache(found_page) ||
+		    page_private(found_page) != entry.val) {
+			unlock_page(found_page);
+			put_page(found_page);
+			goto retry;
+		}
+		if (PageUptodate(found_page)) {
+			/*
+			 * Since we are holding the swap cache page lock, no
+			 * frontswap callbacks are allowed now. However, the
+			 * frontswap page could have been invalidated before we
+			 * took the lock, in which case we have nothing to do.
+			 */
+			err = -ENOENT;
+			if (tswap_delete_page(entry, page)) {
+				SetPageDirty(found_page);
+				put_page(page);
+				err = 0;
+			}
+		}
+		unlock_page(found_page);
 		put_page(found_page);
 		goto out;
 	}
@@ -155,28 +185,19 @@ retry:
 		 * prepared swap cache */
 		goto out_free_swapcache;
 
-	__set_page_locked(page);
 	SetPageSwapBacked(page);
 	err = __add_to_swap_cache(page, entry);
 	if (err) {
 		ClearPageSwapBacked(page);
-		__clear_page_locked(page);
 		/* __add_to_swap_cache clears page->private on failure */
 		set_page_private(page, entry.val);
-		/* putting the page back to the lru list before freeing swap
-		 * cache blocks others reclaiming threads from interfering */
-		tswap_lru_add(page);
 		/* __add_to_swap_cache does not return -EEXIST, so we can
 		 * safely clear SWAP_HAS_CACHE flag */
 		goto out_free_swapcache;
 	}
 
 	/* the page is now in the swap cache, remove it from tswap */
-	spin_lock(&tswap_lock);
-	BUG_ON(!radix_tree_delete_item(&tswap_page_tree, entry.val, page));
-	tswap_nr_pages--;
-	spin_unlock(&tswap_lock);
-
+	BUG_ON(!tswap_delete_page(entry, page));
 	put_page(page);
 
 	lru_cache_add_anon(page);
@@ -185,14 +206,14 @@ retry:
 	/* move it to the tail of the inactive list after end_writeback */
 	SetPageReclaim(page);
 
-	/* start writeback */
+	/* start writeback; unlocks the page */
 	__swap_writepage(page, &wbc, end_swap_bio_write);
-
-	goto out;
+	return 0;
 
 out_free_swapcache:
 	swapcache_free(entry, NULL);
 out:
+	unlock_page(page);
 	return err;
 }
 
@@ -203,19 +224,25 @@ static unsigned long tswap_shrink_scan(struct shrinker *shrink,
 	unsigned long nr_reclaimed = 0;
 
 	spin_lock(&lru->lock);
-	while (lru->nr_items > 0 && sc->nr_to_scan > 0) {
+	while (sc->nr_to_scan-- > 0) {
 		struct page *page;
+
+		if (!lru->nr_items)
+			break;
 		
 		page = list_first_entry(&lru->list, struct page, lru);
+		/* lock the page to avoid interference with
+		 * other reclaiming threads */
+		if (!trylock_page(page)) {
+			list_move_tail(&page->lru, &lru->list);
+			cond_resched_lock(&lru->lock);
+			continue;
+		}
 		get_page(page);
-
-		list_del_init(&page->lru);
-		lru->nr_items--;
 		spin_unlock(&lru->lock);
 
 		if (tswap_writeback_page(page) == 0)
 			nr_reclaimed++;
-		sc->nr_to_scan--;
 
 		put_page(page);
 
@@ -280,7 +307,7 @@ static int tswap_frontswap_load(unsigned type, pgoff_t offset,
 {
 	struct page *cache_page;
 
-	cache_page = tswap_delete_page(swp_entry(type, offset));
+	cache_page = tswap_delete_page(swp_entry(type, offset), NULL);
 	if (!cache_page)
 		return -1;
 
@@ -293,7 +320,7 @@ static void tswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
 {
 	struct page *cache_page;
 
-	cache_page = tswap_delete_page(swp_entry(type, offset));
+	cache_page = tswap_delete_page(swp_entry(type, offset), NULL);
 	if (cache_page)
 		put_page(cache_page);
 }



More information about the Devel mailing list