[Devel] [PATCH rh7 1/3] ms/ext4: move handling of list of shrinkable inodes into extent status code

Konstantin Khorenko khorenko at virtuozzo.com
Fri Apr 13 13:08:48 MSK 2018


From: Jan Kara <jack at suse.cz>

Currently callers adding extents to extent status tree were responsible
for adding the inode to the list of inodes with freeable extents. This
is error prone and puts list handling in unnecessarily many places.

Just add inode to the list automatically when the first non-delay extent
is added to the tree and remove inode from the list when the last
non-delay extent is removed.

Signed-off-by: Jan Kara <jack at suse.cz>
Signed-off-by: Theodore Ts'o <tytso at mit.edu>
(cherry picked from commit b0dea4c1651f3cdb6d17604fa473e72cb74cdc6b)

https://jira.sw.ru/browse/PSBM-83335

We do face a situation when all (32) cpus on a node content on sbi->s_es_lock
shrinking extents on a single superblock and
shrinking extents goes very slow (180 sec in average!).

crash> struct ext4_sb_info 0xffff882fcb7ca800 -p

  s_es_nr_inode = 3173832,
  s_es_stats = {
    es_stats_shrunk = 70,
    es_stats_cache_hits = 35182748,
    es_stats_cache_misses = 2622931,
    es_stats_scan_time = 182642303461,
    es_stats_max_scan_time = 276290979674,

This patch should help a bit because it decreases sbi->s_es_nr_inode right
in __es_shrink() as a side effect, thus cpus which comes later to __es_shrink()
will loop less cycles.

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 fs/ext4/extents.c        |  2 --
 fs/ext4/extents_status.c | 10 ++++++----
 fs/ext4/extents_status.h |  2 --
 fs/ext4/inode.c          |  2 --
 fs/ext4/ioctl.c          |  2 --
 fs/ext4/super.c          |  1 -
 6 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a8675aea44ad..ccbb952482e8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4658,7 +4658,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 	trace_ext4_ext_map_blocks_exit(inode, flags, map,
 				       err ? err : allocated);
-	ext4_es_list_add(inode);
 	return err ? err : allocated;
 }
 
@@ -5248,7 +5247,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		error = ext4_fill_fiemap_extents(inode, start_blk,
 						 len_blks, fieinfo);
 	}
-	ext4_es_list_add(inode);
 	return error;
 }
 
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d632a3e43994..77f44d382aa5 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -297,7 +297,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
 	trace_ext4_es_find_delayed_extent_range_exit(inode, es);
 }
 
-void ext4_es_list_add(struct inode *inode)
+static void ext4_es_list_add(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -313,7 +313,7 @@ void ext4_es_list_add(struct inode *inode)
 	spin_unlock(&sbi->s_es_lock);
 }
 
-void ext4_es_list_del(struct inode *inode)
+static void ext4_es_list_del(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -343,7 +343,8 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
 	 * We don't count delayed extent because we never try to reclaim them
 	 */
 	if (!ext4_es_is_delayed(es)) {
-		EXT4_I(inode)->i_es_shk_nr++;
+		if (!EXT4_I(inode)->i_es_shk_nr++)
+			ext4_es_list_add(inode);
 		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
 					s_es_stats.es_stats_shk_cnt);
 	}
@@ -362,7 +363,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 	/* Decrease the shrink counter when this es is not delayed */
 	if (!ext4_es_is_delayed(es)) {
 		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
-		EXT4_I(inode)->i_es_shk_nr--;
+		if (!--EXT4_I(inode)->i_es_shk_nr)
+			ext4_es_list_del(inode);
 		percpu_counter_dec(&EXT4_SB(inode->i_sb)->
 					s_es_stats.es_stats_shk_cnt);
 	}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 0e6a33e81e5f..b0b78b95f481 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -150,7 +150,5 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
 
 extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
-extern void ext4_es_list_add(struct inode *inode);
-extern void ext4_es_list_del(struct inode *inode);
 
 #endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1853dccc88c7..e633c707b119 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -500,7 +500,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 
 	/* Lookup extent status tree firstly */
 	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
-		ext4_es_list_add(inode);
 		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
 			map->m_pblk = ext4_es_pblock(&es) +
 					map->m_lblk - es.es_lblk;
@@ -1519,7 +1518,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 
 	/* Lookup extent status tree firstly */
 	if (ext4_es_lookup_extent(inode, iblock, &es)) {
-		ext4_es_list_add(inode);
 		if (ext4_es_is_hole(&es)) {
 			retval = 0;
 			down_read(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f81c7105e046..affd0a4fb440 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -79,8 +79,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
 	memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
 	ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
 	ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
-	ext4_es_list_del(inode1);
-	ext4_es_list_del(inode2);
 
 	isize = i_size_read(inode1);
 	i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32709698fb23..a8aae06a9336 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1109,7 +1109,6 @@ void ext4_clear_inode(struct inode *inode)
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
-	ext4_es_list_del(inode);
 	if (EXT4_I(inode)->jinode) {
 		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 					       EXT4_I(inode)->jinode);
-- 
2.15.1



More information about the Devel mailing list