[Devel] [PATCH RHEL7 COMMIT] tswap: fix panic on store if page exists

Konstantin Khorenko khorenko at virtuozzo.com
Wed Jun 17 01:44:37 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.14
------>
commit a5950dde76a23bbe0d60f9eb0f19c56917ccbc08
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Wed Jun 17 12:44:37 2015 +0400

    tswap: fix panic on store if page exists
    
    frontswap_store can be called on a page even if there is already one
    cached in frontswap at the same offset. This can happen e.g. if vmscan
    fails to free a swap cache page after writing it back, in which case we
    will get a bug:
    
      kernel BUG at mm/tswap.c:224!
      invalid opcode: 0000 [#1] SMP
      CPU: 1 PID: 8381 Comm: systemd-journal ve: 206 Tainted: G        W   --------------   3.10.0 #34 port-timerfd
      task: ffff88023d2a8d00 ti: ffff8801f93ea000 task.ti: ffff8801f93ea000
      RIP: 0010:[<ffffffff811b6f30>]  [<ffffffff811b6f30>] tswap_frontswap_store+0x110/0x120
      RSP: 0018:ffff8801f93eb590  EFLAGS: 00010282
      RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000000
      RDX: ffffea0007f35140 RSI: 0000000000000000 RDI: ffff8801f93eb548
      RBP: ffff8801f93eb5b0 R08: ffff8801b1c14f18 R09: 0000000000000000
      R10: 0000000000000000 R11: 0000000000000000 R12: ffffea0002657400
      R13: 0000000000000fa8 R14: ffffea00053a5500 R15: 0000000000000000
      FS:  00007ffff7fec840(0000) GS:ffff880246e40000(0000) knlGS:0000000000000000
      CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      CR2: 00007ffff7ff7000 CR3: 00000001f9927000 CR4: 00000000000006e0
      DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
      DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
      Stack:
       ffff88023c8529c0 ffffea00053a5500 0000000000000fa8 ffffffff81938500
       ffff8801f93eb5f0 ffffffff811894cb 0000000181186b7c ffffea00053a5500
       ffff8801f93eb6e0 ffff8801f93eb7d8 ffffea00053a5500 ffffffff81934860
      Call Trace:
       [<ffffffff811894cb>] __frontswap_store+0x7b/0x100
       [<ffffffff81184fc3>] swap_writepage+0x23/0x70
       [<ffffffff81157b03>] shrink_page_list+0x833/0xae0
       [<ffffffff81158403>] shrink_inactive_list+0x1c3/0x530
       [<ffffffff81158ec5>] shrink_lruvec+0x395/0x6d0
       [<ffffffff81155601>] ? shrink_slab+0x241/0x410
       [<ffffffff811592ef>] shrink_zone+0xef/0x2b0
       [<ffffffff81159878>] do_try_to_free_pages+0x198/0x530
       [<ffffffff81159e26>] try_to_free_mem_cgroup_pages+0xb6/0x140
       [<ffffffff811adcfd>] __mem_cgroup_try_charge+0x1dd/0xc90
       [<ffffffff811ab0dc>] ? __memcg_kmem_get_cache+0x4c/0x130
       [<ffffffff811aefb9>] mem_cgroup_charge_common+0x59/0xc0
       [<ffffffff811b01f6>] mem_cgroup_newpage_charge+0x26/0x30
       [<ffffffff811736eb>] handle_mm_fault+0xa3b/0xd90
       [<ffffffff811dc48e>] ? seq_open+0xfe/0x170
       [<ffffffff8108960a>] ? __mutex_init+0x2a/0x50
       [<ffffffff811dc40e>] ? seq_open+0x7e/0x170
       [<ffffffff811dc591>] ? single_open+0x61/0xb0
       [<ffffffff815cf3be>] __do_page_fault+0x15e/0x530
       [<ffffffff811d8f24>] ? mntput+0x24/0x40
       [<ffffffff811c4871>] ? terminate_walk+0x51/0x60
       [<ffffffff811c8b6b>] ? do_last.isra.62+0x11b/0xff0
       [<ffffffff812ab4fb>] ? string.isra.5+0x3b/0xf0
       [<ffffffff812aca31>] ? vsnprintf+0x201/0x6a0
       [<ffffffff815cf7aa>] do_page_fault+0x1a/0x70
       [<ffffffff815cba08>] page_fault+0x28/0x30
       [<ffffffff812ad751>] ? copy_user_generic_unrolled+0x41/0xc0
       [<ffffffff811dd59b>] ? seq_read+0x29b/0x3b0
       [<ffffffff811b956c>] vfs_read+0x9c/0x170
       [<ffffffff811ba098>] SyS_read+0x58/0xb0
       [<ffffffff815d42d9>] system_call_fastpath+0x16/0x1b
    
    That said, we should handle radix_tree_insert errors properly in
    tswap_frontswap_store.
    
    Reported-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 mm/tswap.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/mm/tswap.c b/mm/tswap.c
index 4b792cd..6f87073 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -208,7 +208,9 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
 				 struct page *page)
 {
 	swp_entry_t entry = swp_entry(type, offset);
-	struct page *cache_page;
+	struct page *cache_page, *old_cache_page = NULL;
+	void **pslot;
+	int err = 0;
 
 	if (!tswap_active)
 		return -1;
@@ -221,12 +223,30 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
 	set_page_private(cache_page, entry.val);
 
 	spin_lock(&tswap_lock);
-	BUG_ON(radix_tree_insert(&tswap_page_tree, entry.val, cache_page));
-	tswap_nr_pages++;
+	pslot = radix_tree_lookup_slot(&tswap_page_tree, entry.val);
+	if (pslot) {
+		old_cache_page = radix_tree_deref_slot_protected(pslot,
+								 &tswap_lock);
+		radix_tree_replace_slot(pslot, cache_page);
+	} else {
+		err = radix_tree_insert(&tswap_page_tree,
+					entry.val, cache_page);
+		BUG_ON(err == -EEXIST);
+		if (!err)
+			tswap_nr_pages++;
+	}
 	spin_unlock(&tswap_lock);
 
-	tswap_lru_add(cache_page);
+	if (err) {
+		put_page(cache_page);
+		return -1;
+	}
 
+	tswap_lru_add(cache_page);
+	if (old_cache_page) {
+		tswap_lru_del(old_cache_page);
+		put_page(old_cache_page);
+	}
 	return 0;
 }
 



More information about the Devel mailing list