[Devel] [PATCH RHEL7 COMMIT] ext4: Do read_trylock on fastmap

Vasily Averin vvs at virtuozzo.com
Tue Jul 21 07:44:58 MSK 2020


The commit is pushed to "branch-rh7-3.10.0-1127.10.1.vz7.162.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.10.1.vz7.162.10
------>
commit dbcf2f65b881599749d9d130fb11e9eb8716e795
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Tue Jul 21 07:44:58 2020 +0300

    ext4: Do read_trylock on fastmap
    
    @svarog reported crash on kaio over ext4. The investigation shows
    we take i_es_lock after plo->lock is already held (in reverse order)
    on fastmap way in ext4_es_lookup_extent():
    
     #4 [ffff9c3e6ca83928] __read_lock_failed at ffffffffac1c2f9e
     #5 [ffff9c3e6ca83930] _raw_read_lock at ffffffffac5bf044
     #6 [ffff9c3e6ca83940] ext4_es_lookup_extent at ffffffffc046c96a [ext4]
     #7 [ffff9c3e6ca83970] ext4_map_blocks at ffffffffc0425a6d [ext4]
     #8 [ffff9c3e6ca839f8] __ext4_overwrite_io at ffffffffc041ee37 [ext4]
     #9 [ffff9c3e6ca83a20] ext4_fastmap at ffffffffc041eece [ext4]
    #10 [ffff9c3e6ca83a60] kaio_fastmap at ffffffffc01497a1 [pio_kaio]
    #11 [ffff9c3e6ca83ad0] ploop_make_request at ffffffffc034cb7c [ploop]
    
    Thus, normal order fails to work:
    
    #13 [ffff9c54bfa03b08] native_queued_spin_lock_slowpath at ffffffffabf29b40
    #14 [ffff9c54bfa03b10] queued_spin_lock_slowpath at ffffffffac5b0734
    #15 [ffff9c54bfa03b20] _raw_spin_lock_irqsave at ffffffffac5bf387
    #16 [ffff9c54bfa03b38] ploop_complete_io_state at ffffffffc0347543 [ploop]
    #17 [ffff9c54bfa03b68] kaio_complete_io_request at ffffffffc01496c8 [pio_kaio]
    #18 [ffff9c54bfa03b98] kaio_rw_kreq_complete at ffffffffc0149faa [pio_kaio]
    #19 [ffff9c54bfa03be0] aio_complete at ffffffffac0c87be
    #20 [ffff9c54bfa03c48] dio_complete at ffffffffac0b4a50
    #21 [ffff9c54bfa03c88] dio_bio_end_aio at ffffffffac0b4c87
    #22 [ffff9c54bfa03cb8] bio_endio at ffffffffac0b1acc
    #23 [ffff9c54bfa03ce8] dec_pending at ffffffffc003d148 [dm_mod]
    #24 [ffff9c54bfa03d40] clone_endio at ffffffffc003e361 [dm_mod]
    #25 [ffff9c54bfa03d78] bio_endio at ffffffffac0b1acc
    #26 [ffff9c54bfa03da8] blk_update_request at ffffffffac17d0e0
    #27 [ffff9c54bfa03de8] blk_mq_end_request at ffffffffac18749a
    #28 [ffff9c54bfa03e08] nvme_complete_rq at ffffffffc01a9cfc [nvme_core]
    #29 [ffff9c54bfa03e18] nvme_pci_complete_rq at ffffffffc01b9e70 [nvme]
    #30 [ffff9c54bfa03e40] __blk_mq_complete_request at ffffffffac187666
    #31 [ffff9c54bfa03e68] blk_mq_complete_request at ffffffffac187717
    #32 [ffff9c54bfa03e78] nvme_irq at ffffffffc01ba0b2 [nvme]
    #33 [ffff9c54bfa03eb0] __handle_irq_event_percpu at ffffffffabf63ee4
    #34 [ffff9c54bfa03ef8] handle_irq_event_percpu at ffffffffabf64092
    #35 [ffff9c54bfa03f28] handle_irq_event at ffffffffabf6411c
    #36 [ffff9c54bfa03f50] handle_edge_irq at ffffffffabf66f3f
    #37 [ffff9c54bfa03f70] handle_irq at ffffffffabe2f524
    #38 [ffff9c54bfa03fb8] do_IRQ at ffffffffac5ce96d
---
 fs/ext4/ext4.h           | 4 ++--
 fs/ext4/extents_status.c | 7 ++++++-
 fs/ext4/file.c           | 2 +-
 fs/ext4/inode.c          | 4 ++--
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 193ef8903a36e..3cb410c5925cc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -578,8 +578,8 @@ enum {
 	 * allows jbd2 to avoid submitting data before commit. */
 #define EXT4_GET_BLOCKS_IO_SUBMIT		0x0400
 
-	/* Search in extent tree only */
-#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY	0x8000
+	/* Search in extent tree only and do not block */
+#define EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK	0x8000
 
 /*
  * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 472103bdd9886..543e1eb69eb8a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -798,7 +798,12 @@ int __ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
 	es_debug("lookup extent in block %u\n", lblk);
 
 	tree = &EXT4_I(inode)->i_es_tree;
-	read_lock(&EXT4_I(inode)->i_es_lock);
+
+	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK) {
+		if (!read_trylock(&EXT4_I(inode)->i_es_lock))
+			return 0;
+	} else
+		read_lock(&EXT4_I(inode)->i_es_lock);
 
 	/* find extent in cache firstly */
 	es->es_lblk = es->es_len = es->es_pblk = 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2ee774a54ad4b..edaf966c9a2f9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,7 +137,7 @@ static int ext4_fastmap(struct inode *inode, sector_t lblk_sec,
 		return -ENOENT;
 
 	found = __ext4_overwrite_io(inode, lblk_sec << 9, len, &map,
-				    EXT4_GET_BLOCKS_EXTENT_TREE_ONLY);
+				    EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK);
 	if (!found)
 		return -ENOENT;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4c1ab7c9a7c6..a0f1e91d65d77 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -497,7 +497,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		return -EIO;
 
 	/* Lookup extent status tree firstly */
-	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+	if (__ext4_es_lookup_extent(inode, map->m_lblk, &es, flags)) {
 		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
 			map->m_pblk = ext4_es_pblock(&es) +
 					map->m_lblk - es.es_lblk;
@@ -524,7 +524,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		goto found;
 	}
 
-	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY)
+	if (flags & EXT4_GET_BLOCKS_EXTENT_TREE_ONLY_NONBLOCK)
 		return -ENOENT;
 
 	/*


More information about the Devel mailing list