[Devel] [PATCH rh7] tswap: fix panic on store if page exists
Vladimir Davydov
vdavydov at parallels.com
Tue Jun 9 09:56:25 PDT 2015
frontswap_store can be called on a page even if there is already one
cached in frontswap at the same offset. This can happen e.g. if vmscan
fails to free a swap cache page after writing it back, in which case we
will get a bug:
kernel BUG at mm/tswap.c:224!
invalid opcode: 0000 [#1] SMP
CPU: 1 PID: 8381 Comm: systemd-journal ve: 206 Tainted: G W -------------- 3.10.0 #34 port-timerfd
task: ffff88023d2a8d00 ti: ffff8801f93ea000 task.ti: ffff8801f93ea000
RIP: 0010:[<ffffffff811b6f30>] [<ffffffff811b6f30>] tswap_frontswap_store+0x110/0x120
RSP: 0018:ffff8801f93eb590 EFLAGS: 00010282
RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000000
RDX: ffffea0007f35140 RSI: 0000000000000000 RDI: ffff8801f93eb548
RBP: ffff8801f93eb5b0 R08: ffff8801b1c14f18 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffea0002657400
R13: 0000000000000fa8 R14: ffffea00053a5500 R15: 0000000000000000
FS: 00007ffff7fec840(0000) GS:ffff880246e40000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffff7ff7000 CR3: 00000001f9927000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffff88023c8529c0 ffffea00053a5500 0000000000000fa8 ffffffff81938500
ffff8801f93eb5f0 ffffffff811894cb 0000000181186b7c ffffea00053a5500
ffff8801f93eb6e0 ffff8801f93eb7d8 ffffea00053a5500 ffffffff81934860
Call Trace:
[<ffffffff811894cb>] __frontswap_store+0x7b/0x100
[<ffffffff81184fc3>] swap_writepage+0x23/0x70
[<ffffffff81157b03>] shrink_page_list+0x833/0xae0
[<ffffffff81158403>] shrink_inactive_list+0x1c3/0x530
[<ffffffff81158ec5>] shrink_lruvec+0x395/0x6d0
[<ffffffff81155601>] ? shrink_slab+0x241/0x410
[<ffffffff811592ef>] shrink_zone+0xef/0x2b0
[<ffffffff81159878>] do_try_to_free_pages+0x198/0x530
[<ffffffff81159e26>] try_to_free_mem_cgroup_pages+0xb6/0x140
[<ffffffff811adcfd>] __mem_cgroup_try_charge+0x1dd/0xc90
[<ffffffff811ab0dc>] ? __memcg_kmem_get_cache+0x4c/0x130
[<ffffffff811aefb9>] mem_cgroup_charge_common+0x59/0xc0
[<ffffffff811b01f6>] mem_cgroup_newpage_charge+0x26/0x30
[<ffffffff811736eb>] handle_mm_fault+0xa3b/0xd90
[<ffffffff811dc48e>] ? seq_open+0xfe/0x170
[<ffffffff8108960a>] ? __mutex_init+0x2a/0x50
[<ffffffff811dc40e>] ? seq_open+0x7e/0x170
[<ffffffff811dc591>] ? single_open+0x61/0xb0
[<ffffffff815cf3be>] __do_page_fault+0x15e/0x530
[<ffffffff811d8f24>] ? mntput+0x24/0x40
[<ffffffff811c4871>] ? terminate_walk+0x51/0x60
[<ffffffff811c8b6b>] ? do_last.isra.62+0x11b/0xff0
[<ffffffff812ab4fb>] ? string.isra.5+0x3b/0xf0
[<ffffffff812aca31>] ? vsnprintf+0x201/0x6a0
[<ffffffff815cf7aa>] do_page_fault+0x1a/0x70
[<ffffffff815cba08>] page_fault+0x28/0x30
[<ffffffff812ad751>] ? copy_user_generic_unrolled+0x41/0xc0
[<ffffffff811dd59b>] ? seq_read+0x29b/0x3b0
[<ffffffff811b956c>] vfs_read+0x9c/0x170
[<ffffffff811ba098>] SyS_read+0x58/0xb0
[<ffffffff815d42d9>] system_call_fastpath+0x16/0x1b
That said, we should handle radix_tree_insert errors properly in
tswap_frontswap_store.
Reported-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
mm/tswap.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/mm/tswap.c b/mm/tswap.c
index 4b792cd20710..6f8707367508 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -208,7 +208,9 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
struct page *page)
{
swp_entry_t entry = swp_entry(type, offset);
- struct page *cache_page;
+ struct page *cache_page, *old_cache_page = NULL;
+ void **pslot;
+ int err = 0;
if (!tswap_active)
return -1;
@@ -221,12 +223,30 @@ static int tswap_frontswap_store(unsigned type, pgoff_t offset,
set_page_private(cache_page, entry.val);
spin_lock(&tswap_lock);
- BUG_ON(radix_tree_insert(&tswap_page_tree, entry.val, cache_page));
- tswap_nr_pages++;
+ pslot = radix_tree_lookup_slot(&tswap_page_tree, entry.val);
+ if (pslot) {
+ old_cache_page = radix_tree_deref_slot_protected(pslot,
+ &tswap_lock);
+ radix_tree_replace_slot(pslot, cache_page);
+ } else {
+ err = radix_tree_insert(&tswap_page_tree,
+ entry.val, cache_page);
+ BUG_ON(err == -EEXIST);
+ if (!err)
+ tswap_nr_pages++;
+ }
spin_unlock(&tswap_lock);
- tswap_lru_add(cache_page);
+ if (err) {
+ put_page(cache_page);
+ return -1;
+ }
+ tswap_lru_add(cache_page);
+ if (old_cache_page) {
+ tswap_lru_del(old_cache_page);
+ put_page(old_cache_page);
+ }
return 0;
}
--
2.1.4
More information about the Devel
mailing list