[Devel] [PATCH rh7 1/3] tswap: make store put-put-get coherent

Vladimir Davydov vdavydov at parallels.com
Fri Jun 19 03:41:03 PDT 2015


A frontswap backend must be put-put-get coherent, meaning that if two
successive puts store AAA and BBB at the same offset, the following get
must never return AAA. Tswap conforms to this rule, but only if the
second put does not fail. If it does, the get will return the value
stored by the first put, resulting in user memory corruption. This can
actually happen if the reclaimer fails to free a swapcache page after
successfully storing it to frontswap and the next store fails.

https://jira.sw.ru/browse/PSBM-34269

Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 mm/tswap.c | 101 +++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 36 deletions(-)

diff --git a/mm/tswap.c b/mm/tswap.c
index 6f8707367508..222cf5b251aa 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -66,6 +66,8 @@ static unsigned long tswap_shrink_count(struct shrinker *shrink,
 	return tswap_lru_node[sc->nid].nr_items;
 }
 
+static struct page *tswap_delete(swp_entry_t entry, struct page *expected);
+
 static int tswap_writeback_page(struct page *page)
 {
 	struct address_space *swapper_space;
@@ -134,11 +136,7 @@ retry:
 	}
 
 	/* the page is now in the swap cache, remove it from tswap */
-	spin_lock(&tswap_lock);
-	BUG_ON(!radix_tree_delete_item(&tswap_page_tree, entry.val, page));
-	tswap_nr_pages--;
-	spin_unlock(&tswap_lock);
-
+	BUG_ON(tswap_delete(entry, page) != page);
 	put_page(page);
 
 	lru_cache_add_anon(page);
@@ -204,49 +202,88 @@ static void tswap_frontswap_init(unsigned type)
 	 */
 }
 
-static int tswap_frontswap_store(unsigned type, pgoff_t offset,
-				 struct page *page)
+static struct page *__tswap_replace(swp_entry_t entry, struct page *page)
 {
-	swp_entry_t entry = swp_entry(type, offset);
-	struct page *cache_page, *old_cache_page = NULL;
+	struct page *old_page;
 	void **pslot;
 	int err = 0;
 
-	if (!tswap_active)
-		return -1;
-
-	cache_page = alloc_page(__GFP_HIGHMEM | __GFP_NORETRY | __GFP_NOWARN);
-	if (!cache_page)
-		return -1;
-
-	copy_highpage(cache_page, page);
-	set_page_private(cache_page, entry.val);
+	set_page_private(page, entry.val);
 
-	spin_lock(&tswap_lock);
 	pslot = radix_tree_lookup_slot(&tswap_page_tree, entry.val);
 	if (pslot) {
-		old_cache_page = radix_tree_deref_slot_protected(pslot,
-								 &tswap_lock);
-		radix_tree_replace_slot(pslot, cache_page);
+		old_page = radix_tree_deref_slot_protected(pslot,
+							   &tswap_lock);
+		if (old_page)
+			BUG_ON(page_private(old_page) != entry.val);
+		radix_tree_replace_slot(pslot, page);
 	} else {
+		old_page = NULL;
 		err = radix_tree_insert(&tswap_page_tree,
-					entry.val, cache_page);
+					entry.val, page);
 		BUG_ON(err == -EEXIST);
 		if (!err)
 			tswap_nr_pages++;
 	}
+	return err ? ERR_PTR(err) : old_page;
+}
+
+static struct page *__tswap_delete(swp_entry_t entry, struct page *expected)
+{
+	struct page *page;
+
+	page = radix_tree_delete_item(&tswap_page_tree, entry.val, expected);
+	if (page) {
+		BUG_ON(page_private(page) != entry.val);
+		tswap_nr_pages--;
+	}
+	return page;
+}
+
+static struct page *tswap_delete(swp_entry_t entry, struct page *expected)
+{
+	struct page *page;
+
+	spin_lock(&tswap_lock);
+	page = __tswap_delete(entry, expected);
+	spin_unlock(&tswap_lock);
+	return page;
+}
+
+static int tswap_frontswap_store(unsigned type, pgoff_t offset,
+				 struct page *page)
+{
+	swp_entry_t entry = swp_entry(type, offset);
+	struct page *cache_page, *old_cache_page;
+
+	if (tswap_active)
+		cache_page = alloc_page(__GFP_HIGHMEM | __GFP_NORETRY |
+					__GFP_NOWARN);
+	else
+		cache_page = NULL;
+	if (cache_page)
+		copy_highpage(cache_page, page);
+
+	spin_lock(&tswap_lock);
+	if (cache_page)
+		old_cache_page = __tswap_replace(entry, cache_page);
+	else
+		old_cache_page = __tswap_delete(entry, NULL);
 	spin_unlock(&tswap_lock);
 
-	if (err) {
-		put_page(cache_page);
+	if (IS_ERR(old_cache_page)) {
+		if (cache_page)
+			put_page(cache_page);
 		return -1;
 	}
-
-	tswap_lru_add(cache_page);
 	if (old_cache_page) {
 		tswap_lru_del(old_cache_page);
 		put_page(old_cache_page);
 	}
+
+	if (!cache_page)
+		return -1;
+	tswap_lru_add(cache_page);
 	return 0;
 }
 
@@ -256,22 +293,14 @@ static int tswap_frontswap_load(unsigned type, pgoff_t offset,
 	swp_entry_t entry = swp_entry(type, offset);
 	struct page *cache_page;
 
-	spin_lock(&tswap_lock);
-	cache_page = radix_tree_delete(&tswap_page_tree, entry.val);
-	if (cache_page)
-		tswap_nr_pages--;
-	spin_unlock(&tswap_lock);
-
+	cache_page = tswap_delete(entry, NULL);
 	if (!cache_page)
 		return -1;
 
-	BUG_ON(page_private(cache_page) != entry.val);
 	tswap_lru_del(cache_page);
-
 	if (page)
 		copy_highpage(page, cache_page);
 	put_page(cache_page);
-
 	return 0;
 }
 
-- 
2.1.4




More information about the Devel mailing list