[Devel] [PATCH rh7] tswap: fix panic on store if page exists

Vladimir Davydov vdavydov at parallels.com
Tue Jun 9 09:56:25 PDT 2015


frontswap_store can be called on a page even if there is already one
cached in frontswap at the same offset. This can happen e.g. if vmscan
fails to free a swap cache page after writing it back, in which case we
will get a bug:

  kernel BUG at mm/tswap.c:224!
  invalid opcode: 0000 [#1] SMP
  CPU: 1 PID: 8381 Comm: systemd-journal ve: 206 Tainted: G        W   --------------   3.10.0 #34 port-timerfd
  task: ffff88023d2a8d00 ti: ffff8801f93ea000 task.ti: ffff8801f93ea000
  RIP: 0010:[<ffffffff811b6f30>]  [<ffffffff811b6f30>] tswap_frontswap_store+0x110/0x120
  RSP: 0018:ffff8801f93eb590  EFLAGS: 00010282
  RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000000
  RDX: ffffea0007f35140 RSI: 0000000000000000 RDI: ffff8801f93eb548
  RBP: ffff8801f93eb5b0 R08: ffff8801b1c14f18 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffffea0002657400
  R13: 0000000000000fa8 R14: ffffea00053a5500 R15: 0000000000000000
  FS:  00007ffff7fec840(0000) GS:ffff880246e40000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007ffff7ff7000 CR3: 00000001f9927000 CR4: 00000000000006e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
  Stack:
   ffff88023c8529c0 ffffea00053a5500 0000000000000fa8 ffffffff81938500
   ffff8801f93eb5f0 ffffffff811894cb 0000000181186b7c ffffea00053a5500
   ffff8801f93eb6e0 ffff8801f93eb7d8 ffffea00053a5500 ffffffff81934860
  Call Trace:
   [<ffffffff811894cb>] __frontswap_store+0x7b/0x100
   [<ffffffff81184fc3>] swap_writepage+0x23/0x70
   [<ffffffff81157b03>] shrink_page_list+0x833/0xae0
   [<ffffffff81158403>] shrink_inactive_list+0x1c3/0x530
   [<ffffffff81158ec5>] shrink_lruvec+0x395/0x6d0
   [<ffffffff81155601>] ? shrink_slab+0x241/0x410
   [<ffffffff811592ef>] shrink_zone+0xef/0x2b0
   [<ffffffff81159878>] do_try_to_free_pages+0x198/0x530
   [<ffffffff81159e26>] try_to_free_mem_cgroup_pages+0xb6/0x140
   [<ffffffff811adcfd>] __mem_cgroup_try_charge+0x1dd/0xc90
   [<ffffffff811ab0dc>] ? __memcg_kmem_get_cache+0x4c/0x130
   [<ffffffff811aefb9>] mem_cgroup_charge_common+0x59/0xc0
   [<ffffffff811b01f6>] mem_cgroup_newpage_charge+0x26/0x30
   [<ffffffff811736eb>] handle_mm_fault+0xa3b/0xd90
   [<ffffffff811dc48e>] ? seq_open+0xfe/0x170
   [<ffffffff8108960a>] ? __mutex_init+0x2a/0x50
   [<ffffffff811dc40e>] ? seq_open+0x7e/0x170
   [<ffffffff811dc591>] ? single_open+0x61/0xb0
   [<ffffffff815cf3be>] __do_page_fault+0x15e/0x530
   [<ffffffff811d8f24>] ? mntput+0x24/0x40
   [<ffffffff811c4871>] ? terminate_walk+0x51/0x60
   [<ffffffff811c8b6b>] ? do_last.isra.62+0x11b/0xff0
   [<ffffffff812ab4fb>] ? string.isra.5+0x3b/0xf0
   [<ffffffff812aca31>] ? vsnprintf+0x201/0x6a0
   [<ffffffff815cf7aa>] do_page_fault+0x1a/0x70
   [<ffffffff815cba08>] page_fault+0x28/0x30
   [<ffffffff812ad751>] ? copy_user_generic_unrolled+0x41/0xc0
   [<ffffffff811dd59b>] ? seq_read+0x29b/0x3b0
   [<ffffffff811b956c>] vfs_read+0x9c/0x170
   [<ffffffff811ba098>] SyS_read+0x58/0xb0
   [<ffffffff815d42d9>] system_call_fastpath+0x16/0x1b

That said, we should handle radix_tree_insert errors properly in
tswap_frontswap_store.

Reported-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 mm/tswap.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/mm/tswap.c b/mm/tswap.c
index 4b792cd20710..6f8707367508 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -208,7 +208,9 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
 				 struct page *page)
 {
 	swp_entry_t entry = swp_entry(type, offset);
-	struct page *cache_page;
+	struct page *cache_page, *old_cache_page = NULL;
+	void **pslot;
+	int err = 0;
 
 	if (!tswap_active)
 		return -1;
@@ -221,12 +223,30 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
 	set_page_private(cache_page, entry.val);
 
 	spin_lock(&tswap_lock);
-	BUG_ON(radix_tree_insert(&tswap_page_tree, entry.val, cache_page));
-	tswap_nr_pages++;
+	pslot = radix_tree_lookup_slot(&tswap_page_tree, entry.val);
+	if (pslot) {
+		old_cache_page = radix_tree_deref_slot_protected(pslot,
+								 &tswap_lock);
+		radix_tree_replace_slot(pslot, cache_page);
+	} else {
+		err = radix_tree_insert(&tswap_page_tree,
+					entry.val, cache_page);
+		BUG_ON(err == -EEXIST);
+		if (!err)
+			tswap_nr_pages++;
+	}
 	spin_unlock(&tswap_lock);
 
-	tswap_lru_add(cache_page);
+	if (err) {
+		put_page(cache_page);
+		return -1;
+	}
 
+	tswap_lru_add(cache_page);
+	if (old_cache_page) {
+		tswap_lru_del(old_cache_page);
+		put_page(old_cache_page);
+	}
 	return 0;
 }
 
-- 
2.1.4




More information about the Devel mailing list