[Devel] [PATCH RHEL8 COMMIT] ext4: replace ext4_kvmalloc() with kvmalloc()

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jun 7 16:37:17 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.35
------>
commit 5e0235ca2ae8966a84393eabcc62a93df97cb819
Author: Oleg Babin <obabin at virtuozzo.com>
Date:   Mon Jun 7 16:37:17 2021 +0300

    ext4: replace ext4_kvmalloc() with kvmalloc()
    
    ext4_kvmalloc() is used to allocate the table of group descritors
    blocks. It is called in GFP_NOFS context which is not vmalloc()
    compatible so it cannot be directly replaced with kvmalloc().
    
    In order to use kvmalloc() with GFP_KERNEL flag the memory allocation
    is moved from add_new_gdb() / add_new_gdb_meta_bg() functions to the
    beginning of the resize process before any journaling is started and
    any FS locks are taken.
    
    After this we do not need ext4_kvmalloc/ext4_kvfree functions any more.
    
    https://jira.sw.ru/browse/PSBM-83044
    
    Signed-off-by: Oleg Babin <obabin at virtuozzo.com>
    Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
    
    +++
    ext4: fix out of bounds access in ext4_alloc_group_desc_bh_array()
    
    https://jira.sw.ru/browse/PSBM-87413
    
    mFixes: d695abe ("ext4: replace ext4_kvmalloc() with kvmalloc()")
    Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
    
    (cherry-picked from vz7 commit cfd1ff8794a4 ("ext4: replace ext4_kvmalloc() with
    kvmalloc()"))
    
    https://jira.sw.ru/browse/PSBM-127849
    Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
 fs/ext4/ext4.h   |  4 ++--
 fs/ext4/resize.c | 51 ++++++++++-----------------------------------------
 fs/ext4/super.c  | 29 +++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e2efa056bfb1..231f1ac0f32b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2632,10 +2632,10 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
 extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
 extern int ext4_calculate_overhead(struct super_block *sb);
 extern void ext4_superblock_csum_set(struct super_block *sb);
-extern void *ext4_kvmalloc(size_t size, gfp_t flags);
-extern void *ext4_kvzalloc(size_t size, gfp_t flags);
 extern int ext4_alloc_flex_bg_array(struct super_block *sb,
 				    ext4_group_t ngroup);
+extern int ext4_alloc_group_desc_bh_array(struct super_block *sb,
+                                         ext4_group_t ngroup);
 extern const char *ext4_decode_error(struct super_block *sb, int errno,
 				     char nbuf[16]);
 extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f178af1dffe0..4819b4769aad 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -792,7 +792,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
 	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
-	struct buffer_head **o_group_desc, **n_group_desc = NULL;
 	struct buffer_head *dind = NULL;
 	struct buffer_head *gdb_bh = NULL;
 	int gdbackups;
@@ -851,16 +850,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	if (unlikely(err))
 		goto errout;
 
-	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
-				     sizeof(struct buffer_head *),
-				     GFP_NOFS);
-	if (!n_group_desc) {
-		err = -ENOMEM;
-		ext4_warning(sb, "not enough memory for %lu groups",
-			     gdb_num + 1);
-		goto errout;
-	}
-
 	/*
 	 * Finally, we have all of the possible failures behind us...
 	 *
@@ -888,15 +877,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 	brelse(dind);
 
-	rcu_read_lock();
-	o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
-	memcpy(n_group_desc, o_group_desc,
-	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
-	rcu_read_unlock();
-	n_group_desc[gdb_num] = gdb_bh;
-	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
+	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc[gdb_num], gdb_bh);
 	EXT4_SB(sb)->s_gdb_count++;
-	ext4_kvfree_array_rcu(o_group_desc);
 
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 	err = ext4_handle_dirty_super(handle, sb);
@@ -904,7 +886,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		ext4_std_error(sb, err);
 	return err;
 errout:
-	kvfree(n_group_desc);
 	brelse(iloc.bh);
 	brelse(dind);
 	brelse(gdb_bh);
@@ -920,7 +901,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 			       handle_t *handle, ext4_group_t group) {
 	ext4_fsblk_t gdblock;
 	struct buffer_head *gdb_bh;
-	struct buffer_head **o_group_desc, **n_group_desc;
 	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
 	int err;
 
@@ -929,35 +909,16 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	gdb_bh = ext4_sb_bread(sb, gdblock, 0);
 	if (IS_ERR(gdb_bh))
 		return PTR_ERR(gdb_bh);
-	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
-				     sizeof(struct buffer_head *),
-				     GFP_NOFS);
-	if (!n_group_desc) {
-		brelse(gdb_bh);
-		err = -ENOMEM;
-		ext4_warning(sb, "not enough memory for %lu groups",
-			     gdb_num + 1);
-		return err;
-	}
-
-	rcu_read_lock();
-	o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
-	memcpy(n_group_desc, o_group_desc,
-	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
-	rcu_read_unlock();
-	n_group_desc[gdb_num] = gdb_bh;
 
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (err) {
-		kvfree(n_group_desc);
 		brelse(gdb_bh);
 		return err;
 	}
 
-	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
+	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc[gdb_num], gdb_bh);
 	EXT4_SB(sb)->s_gdb_count++;
-	ext4_kvfree_array_rcu(o_group_desc);
 	return err;
 }
 
@@ -1677,6 +1638,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	if (err)
 		goto out;
 
+	err = ext4_alloc_group_desc_bh_array(sb, input->group + 1);
+	if (err)
+		goto out;
+
 	err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
 	if (err)
 		goto out;
@@ -2049,6 +2014,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 	if (err)
 		goto out;
 
+	err = ext4_alloc_group_desc_bh_array(sb, n_group + 1);
+	if (err)
+		goto out;
+
 	err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
 	if (err)
 		goto out;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d03b997b77b3..fe52b83fabd8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2545,6 +2545,35 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
 	return 0;
 }
 
+/*
+ * Allocate the top-level s_group_desc array for the specified number
+ * of groups. As the memory is allocated before the journaling is started
+ * we can safely use kvmalloc() with GFP_KERNEL flag here.
+ */
+int ext4_alloc_group_desc_bh_array(struct super_block *sb, ext4_group_t ngroup)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	unsigned long num_desc = DIV_ROUND_UP(ngroup,  EXT4_DESC_PER_BLOCK(sb));
+	struct buffer_head **n_group_desc;
+
+	if (num_desc <= sbi->s_gdb_count)
+		return 0;
+
+	n_group_desc = kvmalloc(num_desc * sizeof(struct buffer_head *),
+				GFP_KERNEL);
+	if (!n_group_desc) {
+		ext4_warning(sb, "not enough memory for %lu groups", num_desc);
+		return -ENOMEM;
+	}
+
+	memcpy(n_group_desc, sbi->s_group_desc,
+		sbi->s_gdb_count * sizeof(struct buffer_head *));
+	kvfree(sbi->s_group_desc);
+
+	sbi->s_group_desc = n_group_desc;
+	return 0;
+}
+
 static int ext4_fill_flex_info(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);


More information about the Devel mailing list