[Devel] [PATCH RH7 2/3] ext4: Do read_trylock on fastmap

Kirill Tkhai ktkhai at virtuozzo.com
Mon Jul 20 14:03:44 MSK 2020


@svarog reported crash on kaio over ext4. The investigation shows
we take i_es_lock after plo->lock is already held (in reverse order)
on fastmap way in ext4_es_lookup_extent():

 #4 [ffff9c3e6ca83928] __read_lock_failed at ffffffffac1c2f9e
 #5 [ffff9c3e6ca83930] _raw_read_lock at ffffffffac5bf044
 #6 [ffff9c3e6ca83940] ext4_es_lookup_extent at ffffffffc046c96a [ext4]
 #7 [ffff9c3e6ca83970] ext4_map_blocks at ffffffffc0425a6d [ext4]
 #8 [ffff9c3e6ca839f8] __ext4_overwrite_io at ffffffffc041ee37 [ext4]
 #9 [ffff9c3e6ca83a20] ext4_fastmap at ffffffffc041eece [ext4]
#10 [ffff9c3e6ca83a60] kaio_fastmap at ffffffffc01497a1 [pio_kaio]
#11 [ffff9c3e6ca83ad0] ploop_make_request at ffffffffc034cb7c [ploop]

Thus, normal order fails to work:

#13 [ffff9c54bfa03b08] native_queued_spin_lock_slowpath at ffffffffabf29b40
#14 [ffff9c54bfa03b10] queued_spin_lock_slowpath at ffffffffac5b0734
#15 [ffff9c54bfa03b20] _raw_spin_lock_irqsave at ffffffffac5bf387
#16 [ffff9c54bfa03b38] ploop_complete_io_state at ffffffffc0347543 [ploop]
#17 [ffff9c54bfa03b68] kaio_complete_io_request at ffffffffc01496c8 [pio_kaio]
#18 [ffff9c54bfa03b98] kaio_rw_kreq_complete at ffffffffc0149faa [pio_kaio]
#19 [ffff9c54bfa03be0] aio_complete at ffffffffac0c87be
#20 [ffff9c54bfa03c48] dio_complete at ffffffffac0b4a50
#21 [ffff9c54bfa03c88] dio_bio_end_aio at ffffffffac0b4c87
#22 [ffff9c54bfa03cb8] bio_endio at ffffffffac0b1acc
#23 [ffff9c54bfa03ce8] dec_pending at ffffffffc003d148 [dm_mod]
#24 [ffff9c54bfa03d40] clone_endio at ffffffffc003e361 [dm_mod]
#25 [ffff9c54bfa03d78] bio_endio at ffffffffac0b1acc
#26 [ffff9c54bfa03da8] blk_update_request at ffffffffac17d0e0
#27 [ffff9c54bfa03de8] blk_mq_end_request at ffffffffac18749a
#28 [ffff9c54bfa03e08] nvme_complete_rq at ffffffffc01a9cfc [nvme_core]
#29 [ffff9c54bfa03e18] nvme_pci_complete_rq at ffffffffc01b9e70 [nvme]
#30 [ffff9c54bfa03e40] __blk_mq_complete_request at ffffffffac187666
#31 [ffff9c54bfa03e68] blk_mq_complete_request at ffffffffac187717
#32 [ffff9c54bfa03e78] nvme_irq at ffffffffc01ba0b2 [nvme]
#33 [ffff9c54bfa03eb0] __handle_irq_event_percpu at ffffffffabf63ee4
#34 [ffff9c54bfa03ef8] handle_irq_event_percpu at ffffffffabf64092
#35 [ffff9c54bfa03f28] handle_irq_event at ffffffffabf6411c
#36 [ffff9c54bfa03f50] handle_edge_irq at ffffffffabf66f3f
#37 [ffff9c54bfa03f70] handle_irq at ffffffffabe2f524
#38 [ffff9c54bfa03fb8] do_IRQ at ffffffffac5ce96d
--- <IRQ stack> ---
#40 [ffff9c3d9e58b530] __es_insert_extent at ffffffffc046b994 [ext4]
#41 [ffff9c3d9e58b580] ext4_es_insert_extent at ffffffffc046c75b [ext4]
#42 [ffff9c3d9e58b5f0] ext4_map_blocks at ffffffffc0425afb [ext4]
#43 [ffff9c3d9e58b678] _ext4_get_block at ffffffffc04262ff [ext4]
#44 [ffff9c3d9e58b6e0] ext4_get_block at ffffffffc0426356 [ext4]

To fix that, this patch replaces read_lock() with read_try_lock()
in ext4_es_lookup_extent().

Also rename define into EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK
to underline that its non-blocking.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 fs/ext4/ext4.h           |    4 ++--
 fs/ext4/extents_status.c |    7 ++++++-
 fs/ext4/file.c           |    2 +-
 fs/ext4/inode.c          |    4 ++--
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 193ef8903a36..3cb410c5925c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -578,8 +578,8 @@ enum {
 	 * allows jbd2 to avoid submitting data before commit. */
 #define EXT4_GET_BLOCKS_IO_SUBMIT		0x0400
 
-	/* Search in extent tree only */
-#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY	0x8000
+	/* Search in extent tree only and do not block */
+#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK	0x8000
 
 /*
  * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 472103bdd988..543e1eb69eb8 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -798,7 +798,12 @@ int __ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
 	es_debug("lookup extent in block %u\n", lblk);
 
 	tree = &EXT4_I(inode)->i_es_tree;
-	read_lock(&EXT4_I(inode)->i_es_lock);
+
+	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK) {
+		if (!read_trylock(&EXT4_I(inode)->i_es_lock))
+			return 0;
+	} else
+		read_lock(&EXT4_I(inode)->i_es_lock);
 
 	/* find extent in cache firstly */
 	es->es_lblk = es->es_len = es->es_pblk = 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2ee774a54ad4..edaf966c9a2f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,7 +137,7 @@ static int ext4_fastmap(struct inode *inode, sector_t lblk_sec,
 		return -ENOENT;
 
 	found = __ext4_overwrite_io(inode, lblk_sec << 9, len, &map,
-				    EXT4_GET_BLOCKS_EXTENT_TREE_ONLY);
+				    EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK);
 	if (!found)
 		return -ENOENT;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4c1ab7c9a7c..a0f1e91d65d7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -497,7 +497,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		return -EIO;
 
 	/* Lookup extent status tree firstly */
-	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+	if (__ext4_es_lookup_extent(inode, map->m_lblk, &es, flags)) {
 		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
 			map->m_pblk = ext4_es_pblock(&es) +
 					map->m_lblk - es.es_lblk;
@@ -524,7 +524,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		goto found;
 	}
 
-	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY)
+	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK)
 		return -ENOENT;
 
 	/*




More information about the Devel mailing list