[Devel] [PATCH vz7 v2] ext4: replace ext4_kvmalloc() with kvmalloc()

Oleg Babin obabin at virtuozzo.com
Fri May 11 18:37:20 MSK 2018


ext4_kvmalloc() is used to allocate the table of group descritors
blocks. It is called in GFP_NOFS context which is not vmalloc()
compatible so it cannot be directly replaced with kvmalloc().

In order to use kvmalloc() with GFP_KERNEL flag the memory allocation
is moved from add_new_gdb() / add_new_gdb_meta_bg() functions to the
beginning of the resize process before any journaling is started and
any FS locks are taken.

After this we do not need ext4_kvmalloc/ext4_kvfree functions any more
so remove them.

v2: replace incorrect 'return err' with 'goto' to a clean up code.

https://jira.sw.ru/browse/PSBM-83044
Signed-off-by: Oleg Babin <obabin at virtuozzo.com>
---
 fs/ext4/ext4.h    |   3 --
 fs/ext4/mballoc.c |   6 +--
 fs/ext4/resize.c  | 112 +++++++++++++++++++++++++++++++++++-------------------
 fs/ext4/super.c   |  39 +++----------------
 4 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 00903ac..ceb69e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2304,9 +2304,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
 /* super.c */
 extern int ext4_calculate_overhead(struct super_block *sb);
 extern void ext4_superblock_csum_set(struct super_block *sb);
-extern void *ext4_kvmalloc(size_t size, gfp_t flags);
-extern void *ext4_kvzalloc(size_t size, gfp_t flags);
-extern void ext4_kvfree(void *ptr);
 extern int ext4_alloc_flex_bg_array(struct super_block *sb,
 				    ext4_group_t ngroup);
 extern const char *ext4_decode_error(struct super_block *sb, int errno,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b3ddecb..d9778e4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
 	if (sbi->s_group_info) {
 		memcpy(new_groupinfo, sbi->s_group_info,
 		       sbi->s_group_info_size * sizeof(*sbi->s_group_info));
-		ext4_kvfree(sbi->s_group_info);
+		kvfree(sbi->s_group_info);
 	}
 	sbi->s_group_info = new_groupinfo;
 	sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
@@ -2495,7 +2495,7 @@ err_freebuddy:
 		kfree(sbi->s_group_info[i]);
 	iput(sbi->s_buddy_cache);
 err_freesgi:
-	ext4_kvfree(sbi->s_group_info);
+	kvfree(sbi->s_group_info);
 	return -ENOMEM;
 }
 
@@ -2710,7 +2710,7 @@ int ext4_mb_release(struct super_block *sb)
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		for (i = 0; i < num_meta_group_infos; i++)
 			kfree(sbi->s_group_info[i]);
-		ext4_kvfree(sbi->s_group_info);
+		kvfree(sbi->s_group_info);
 	}
 	kfree(sbi->s_mb_offsets);
 	kfree(sbi->s_mb_maxs);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 218fd6f..ec2f5c3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -79,6 +79,11 @@ static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
 	return overhead;
 }
 
+static unsigned long num_desc_blocks(struct super_block *sb, ext4_group_t groups)
+{
+	return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
+}
+
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
 
@@ -738,6 +743,49 @@ static int verify_reserved_gdb(struct super_block *sb,
 }
 
 /*
+ * Allocate the top-level s_group_desc array for the specified number
+ * of groups. As the memory is allocated before the journaling is started
+ * we can safely use kvmalloc() with GFP_KERNEL flag here.
+ */
+static int ext4_alloc_group_desc_bh_array(struct super_block *sb,
+					  ext4_group_t n_groups_count)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	unsigned long o_gdb_count = sbi->s_gdb_count;
+	unsigned long n_gdb_count = num_desc_blocks(sb, n_groups_count);
+	struct buffer_head **o_group_desc, **n_group_desc;
+
+	if (n_gdb_count <= o_gdb_count)
+		return 0;
+
+	o_group_desc = sbi->s_group_desc;
+	n_group_desc = kvmalloc(n_gdb_count * sizeof(struct buffer_head *),
+				GFP_KERNEL);
+	if (!n_group_desc) {
+		ext4_warning(sb, "not enough memory for %lu groups", n_gdb_count);
+		return -ENOMEM;
+	}
+
+	memcpy(n_group_desc, o_group_desc,
+	       o_gdb_count * sizeof(struct buffer_head *));
+
+	memset(n_group_desc + o_gdb_count * sizeof(struct buffer_head *),
+	       0, (n_gdb_count - o_gdb_count) * sizeof(struct buffer_head *));
+
+	sbi->s_group_desc = n_group_desc;
+
+	/*
+	 * Do not update sbi->s_gdb_count here as it can be used later
+	 * in reserve_backup_gdb(). The field will be upated later in
+	 * add_new_gdb() / add_new_gdb_meta_bg().
+	 */
+
+	kvfree(o_group_desc);
+
+	return 0;
+}
+
+/*
  * Called when we need to bring a reserved group descriptor table block into
  * use from the resize inode.  The primary copy of the new GDT block currently
  * is an indirect block (under the double indirect block in the resize inode).
@@ -757,7 +805,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
 	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
-	struct buffer_head **o_group_desc, **n_group_desc;
 	struct buffer_head *dind;
 	struct buffer_head *gdb_bh;
 	int gdbackups;
@@ -815,16 +862,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	if (unlikely(err))
 		goto exit_dind;
 
-	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
-				     sizeof(struct buffer_head *),
-				     GFP_NOFS);
-	if (!n_group_desc) {
-		err = -ENOMEM;
-		ext4_warning(sb, "not enough memory for %lu groups",
-			     gdb_num + 1);
-		goto exit_inode;
-	}
-
 	/*
 	 * Finally, we have all of the possible failures behind us...
 	 *
@@ -850,13 +887,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 	brelse(dind);
 
-	o_group_desc = EXT4_SB(sb)->s_group_desc;
-	memcpy(n_group_desc, o_group_desc,
-	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
-	n_group_desc[gdb_num] = gdb_bh;
-	EXT4_SB(sb)->s_group_desc = n_group_desc;
+	/*
+	 * The s_group_desc array was reallocated at the beginning of the
+	 * resize process but the s_gdb_count field had not been updated
+	 * then because we use its old value in the reserve_backup_gdb().
+	 */
+	BUG_ON(gdb_num != EXT4_SB(sb)->s_gdb_count);
+	EXT4_SB(sb)->s_group_desc[gdb_num] = gdb_bh;
 	EXT4_SB(sb)->s_gdb_count++;
-	ext4_kvfree(o_group_desc);
 
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 	err = ext4_handle_dirty_super(handle, sb);
@@ -866,7 +904,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	return err;
 
 exit_inode:
-	ext4_kvfree(n_group_desc);
 	brelse(iloc.bh);
 exit_dind:
 	brelse(dind);
@@ -884,7 +921,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 			       handle_t *handle, ext4_group_t group) {
 	ext4_fsblk_t gdblock;
 	struct buffer_head *gdb_bh;
-	struct buffer_head **o_group_desc, **n_group_desc;
 	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
 	int err;
 
@@ -893,23 +929,16 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	gdb_bh = sb_bread(sb, gdblock);
 	if (!gdb_bh)
 		return -EIO;
-	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
-				     sizeof(struct buffer_head *),
-				     GFP_NOFS);
-	if (!n_group_desc) {
-		err = -ENOMEM;
-		ext4_warning(sb, "not enough memory for %lu groups",
-			     gdb_num + 1);
-		return err;
-	}
 
-	o_group_desc = EXT4_SB(sb)->s_group_desc;
-	memcpy(n_group_desc, o_group_desc,
-	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
-	n_group_desc[gdb_num] = gdb_bh;
-	EXT4_SB(sb)->s_group_desc = n_group_desc;
+	/*
+	 * The s_group_desc array was reallocated at the beginning of the
+	 * resize process but the s_gdb_count field had not been updated
+	 * then because we use its old value in the reserve_backup_gdb().
+	 */
+	BUG_ON(gdb_num != EXT4_SB(sb)->s_gdb_count);
+	EXT4_SB(sb)->s_group_desc[gdb_num] = gdb_bh;
 	EXT4_SB(sb)->s_gdb_count++;
-	ext4_kvfree(o_group_desc);
+
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
@@ -1623,6 +1652,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	if (err)
 		goto out;
 
+	err = ext4_alloc_group_desc_bh_array(sb, input->group + 1);
+	if (err)
+		goto out;
+
 	err = ext4_alloc_flex_bg_array(sb, input->group + 1);
 	if (err)
 		goto out;
@@ -1772,11 +1805,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 } /* ext4_group_extend */
 
 
-static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
-{
-	return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
-}
-
 /*
  * Release the resize inode and drop the resize_inode feature if there
  * are no more reserved gdt blocks, and then convert the file system
@@ -1973,9 +2001,13 @@ retry:
 	if (ext4_blocks_count(es) == n_blocks_count)
 		goto out;
 
+	err = ext4_alloc_group_desc_bh_array(sb, n_group + 1);
+	if (err)
+		goto out;
+
 	err = ext4_alloc_flex_bg_array(sb, n_group + 1);
 	if (err)
-		return err;
+		goto out;
 
 	err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
 	if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f644bf0..3fbcf86 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -157,35 +157,6 @@ void ext4_superblock_csum_set(struct super_block *sb)
 	es->s_checksum = ext4_superblock_csum(sb, es);
 }
 
-void *ext4_kvmalloc(size_t size, gfp_t flags)
-{
-	void *ret;
-
-	ret = kmalloc(size, flags);
-	if (!ret)
-		ret = __vmalloc(size, flags, PAGE_KERNEL);
-	return ret;
-}
-
-void *ext4_kvzalloc(size_t size, gfp_t flags)
-{
-	void *ret;
-
-	ret = kzalloc(size, flags);
-	if (!ret)
-		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
-	return ret;
-}
-
-void ext4_kvfree(void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-
-}
-
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 			       struct ext4_group_desc *bg)
 {
@@ -947,8 +918,8 @@ static void ext4_put_super(struct super_block *sb)
 
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
-	ext4_kvfree(sbi->s_group_desc);
-	ext4_kvfree(sbi->s_flex_groups);
+	kvfree(sbi->s_group_desc);
+	kvfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeclusters_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2191,7 +2162,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
 		memcpy(new_groups, sbi->s_flex_groups,
 		       (sbi->s_flex_groups_allocated *
 			sizeof(struct flex_groups)));
-		ext4_kvfree(sbi->s_flex_groups);
+		kvfree(sbi->s_flex_groups);
 	}
 	sbi->s_flex_groups = new_groups;
 	sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
@@ -4674,7 +4645,7 @@ failed_mount7:
 failed_mount6:
 	ext4_mb_release(sb);
 	if (sbi->s_flex_groups)
-		ext4_kvfree(sbi->s_flex_groups);
+		kvfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeclusters_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -4706,7 +4677,7 @@ failed_mount3:
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
-	ext4_kvfree(sbi->s_group_desc);
+	kvfree(sbi->s_group_desc);
 failed_mount:
 	if (sbi->s_chksum_driver)
 		crypto_free_shash(sbi->s_chksum_driver);
-- 
1.8.3.1



More information about the Devel mailing list