[Devel] [PATCH RHEL7 COMMIT] ext4: Do read_trylock on fastmap
Vasily Averin
vvs at virtuozzo.com
Tue Jul 21 07:44:58 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.10.1.vz7.162.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.10.1.vz7.162.10
------>
commit dbcf2f65b881599749d9d130fb11e9eb8716e795
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Tue Jul 21 07:44:58 2020 +0300
ext4: Do read_trylock on fastmap
@svarog reported crash on kaio over ext4. The investigation shows
we take i_es_lock after plo->lock is already held (in reverse order)
on fastmap way in ext4_es_lookup_extent():
#4 [ffff9c3e6ca83928] __read_lock_failed at ffffffffac1c2f9e
#5 [ffff9c3e6ca83930] _raw_read_lock at ffffffffac5bf044
#6 [ffff9c3e6ca83940] ext4_es_lookup_extent at ffffffffc046c96a [ext4]
#7 [ffff9c3e6ca83970] ext4_map_blocks at ffffffffc0425a6d [ext4]
#8 [ffff9c3e6ca839f8] __ext4_overwrite_io at ffffffffc041ee37 [ext4]
#9 [ffff9c3e6ca83a20] ext4_fastmap at ffffffffc041eece [ext4]
#10 [ffff9c3e6ca83a60] kaio_fastmap at ffffffffc01497a1 [pio_kaio]
#11 [ffff9c3e6ca83ad0] ploop_make_request at ffffffffc034cb7c [ploop]
Thus, normal order fails to work:
#13 [ffff9c54bfa03b08] native_queued_spin_lock_slowpath at ffffffffabf29b40
#14 [ffff9c54bfa03b10] queued_spin_lock_slowpath at ffffffffac5b0734
#15 [ffff9c54bfa03b20] _raw_spin_lock_irqsave at ffffffffac5bf387
#16 [ffff9c54bfa03b38] ploop_complete_io_state at ffffffffc0347543 [ploop]
#17 [ffff9c54bfa03b68] kaio_complete_io_request at ffffffffc01496c8 [pio_kaio]
#18 [ffff9c54bfa03b98] kaio_rw_kreq_complete at ffffffffc0149faa [pio_kaio]
#19 [ffff9c54bfa03be0] aio_complete at ffffffffac0c87be
#20 [ffff9c54bfa03c48] dio_complete at ffffffffac0b4a50
#21 [ffff9c54bfa03c88] dio_bio_end_aio at ffffffffac0b4c87
#22 [ffff9c54bfa03cb8] bio_endio at ffffffffac0b1acc
#23 [ffff9c54bfa03ce8] dec_pending at ffffffffc003d148 [dm_mod]
#24 [ffff9c54bfa03d40] clone_endio at ffffffffc003e361 [dm_mod]
#25 [ffff9c54bfa03d78] bio_endio at ffffffffac0b1acc
#26 [ffff9c54bfa03da8] blk_update_request at ffffffffac17d0e0
#27 [ffff9c54bfa03de8] blk_mq_end_request at ffffffffac18749a
#28 [ffff9c54bfa03e08] nvme_complete_rq at ffffffffc01a9cfc [nvme_core]
#29 [ffff9c54bfa03e18] nvme_pci_complete_rq at ffffffffc01b9e70 [nvme]
#30 [ffff9c54bfa03e40] __blk_mq_complete_request at ffffffffac187666
#31 [ffff9c54bfa03e68] blk_mq_complete_request at ffffffffac187717
#32 [ffff9c54bfa03e78] nvme_irq at ffffffffc01ba0b2 [nvme]
#33 [ffff9c54bfa03eb0] __handle_irq_event_percpu at ffffffffabf63ee4
#34 [ffff9c54bfa03ef8] handle_irq_event_percpu at ffffffffabf64092
#35 [ffff9c54bfa03f28] handle_irq_event at ffffffffabf6411c
#36 [ffff9c54bfa03f50] handle_edge_irq at ffffffffabf66f3f
#37 [ffff9c54bfa03f70] handle_irq at ffffffffabe2f524
#38 [ffff9c54bfa03fb8] do_IRQ at ffffffffac5ce96d
---
fs/ext4/ext4.h | 4 ++--
fs/ext4/extents_status.c | 7 ++++++-
fs/ext4/file.c | 2 +-
fs/ext4/inode.c | 4 ++--
4 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 193ef8903a36e..3cb410c5925cc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -578,8 +578,8 @@ enum {
* allows jbd2 to avoid submitting data before commit. */
#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
- /* Search in extent tree only */
-#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY 0x8000
+ /* Search in extent tree only and do not block */
+#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK 0x8000
/*
* The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 472103bdd9886..543e1eb69eb8a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -798,7 +798,12 @@ int __ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
es_debug("lookup extent in block %u\n", lblk);
tree = &EXT4_I(inode)->i_es_tree;
- read_lock(&EXT4_I(inode)->i_es_lock);
+
+ if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK) {
+ if (!read_trylock(&EXT4_I(inode)->i_es_lock))
+ return 0;
+ } else
+ read_lock(&EXT4_I(inode)->i_es_lock);
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2ee774a54ad4b..edaf966c9a2f9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,7 +137,7 @@ static int ext4_fastmap(struct inode *inode, sector_t lblk_sec,
return -ENOENT;
found = __ext4_overwrite_io(inode, lblk_sec << 9, len, &map,
- EXT4_GET_BLOCKS_EXTENT_TREE_ONLY);
+ EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK);
if (!found)
return -ENOENT;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4c1ab7c9a7c6..a0f1e91d65d77 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -497,7 +497,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
return -EIO;
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ if (__ext4_es_lookup_extent(inode, map->m_lblk, &es, flags)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -524,7 +524,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
goto found;
}
- if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY)
+ if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK)
return -ENOENT;
/*
More information about the Devel
mailing list