[Devel] [RH6 PATCH] [MS] ext4: collapse a single extent tree block into the inode if possible

Dmitry Monakhov dmonakhov at openvz.org
Sun May 15 23:41:28 PDT 2016


Backport ecb94f5fdf4b72547fca022421a9dca1672bddd4
This patch is required for sane defragmenration procedure.
https://jira.sw.ru/browse/PSBM-46563
#ORIG_MSG:
[PATCH] ext4: collapse a single extent tree block into the inode if possible

If an inode has more than 4 extents, but then later some of the
extents are merged together, we can optimize the file system by moving
the extents up into the inode, and discarding the extent tree block.
This is important, because if there are a large number of inodes with
an external extent tree blocks where the contents could fit in the
inode, this can significantly increase the fsck time of the file
system.

Google-Bug-Id: 6801242

Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 85c4d4e..5eba717 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1668,10 +1668,54 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
 }
 
 /*
+ * This function does a very simple check to see if we can collapse
+ * an extent tree with a single extent tree leaf block into the inode.
+ */
+static void ext4_ext_try_to_merge_up(handle_t *handle,
+				     struct inode *inode,
+				     struct ext4_ext_path *path)
+{
+	size_t s;
+	unsigned max_root = ext4_ext_space_root(inode, 0);
+	ext4_fsblk_t blk;
+
+	if ((path[0].p_depth != 1) ||
+	    (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
+	    (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
+		return;
+
+	/*
+	 * We need to modify the block allocation bitmap and the block
+	 * group descriptor to release the extent tree block.  If we
+	 * can't get the journal credits, give up.
+	 */
+	if (ext4_journal_extend(handle, 2))
+		return;
+
+	/*
+	 * Copy the extent data up to the inode
+	 */
+	blk = ext4_idx_pblock(path[0].p_idx);
+	s = le16_to_cpu(path[1].p_hdr->eh_entries) *
+		sizeof(struct ext4_extent_idx);
+	s += sizeof(struct ext4_extent_header);
+
+	memcpy(path[0].p_hdr, path[1].p_hdr, s);
+	path[0].p_depth = 0;
+	path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
+		(path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
+	path[0].p_hdr->eh_max = cpu_to_le16(max_root);
+
+	brelse(path[1].p_bh);
+	ext4_free_blocks(handle, inode, blk, 1, EXT4_FREE_BLOCKS_METADATA);
+}
+
+/*
  * This function tries to merge the @ex extent to neighbours in the tree.
  * return 1 if merge left else 0.
  */
-static int ext4_ext_try_to_merge(struct inode *inode,
+static int ext4_ext_try_to_merge(handle_t *handle,
+				  struct inode *inode,
 				  struct ext4_ext_path *path,
 				  struct ext4_extent *ex) {
 	struct ext4_extent_header *eh;
@@ -1687,8 +1731,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
 		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
 
 	if (!merge_done)
-		ret = ext4_ext_try_to_merge_right(inode, path, ex);
+		ret =  ext4_ext_try_to_merge_right(inode, path, ex);
 
+	ext4_ext_try_to_merge_up(handle, inode, path);
 	return ret;
 }
 
@@ -1897,7 +1942,7 @@ has_space:
 merge:
 	/* try to merge extents to the right */
 	if (!(flag & EXT4_GET_BLOCKS_DIO))
-		ext4_ext_try_to_merge(inode, path, nearex);
+		ext4_ext_try_to_merge(handle, inode, path, nearex);
 
 	/* try to merge extents to the left */
 
@@ -1906,7 +1951,7 @@ merge:
 	if (err)
 		goto cleanup;
 
-	err = ext4_ext_dirty(handle, inode, path + depth);
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 
 cleanup:
 	if (npath) {
@@ -2878,9 +2923,9 @@ static int ext4_split_extent_at(handle_t *handle,
 			ext4_ext_mark_initialized(ex);
 
 		if (!(flags & EXT4_GET_BLOCKS_DIO))
-			ext4_ext_try_to_merge(inode, path, ex);
+			ext4_ext_try_to_merge(handle, inode, path, ex);
 
-		err = ext4_ext_dirty(handle, inode, path + depth);
+		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 		goto out;
 	}
 
@@ -2894,7 +2939,7 @@ static int ext4_split_extent_at(handle_t *handle,
 	 * path may lead to new leaf, not to original leaf any more
 	 * after ext4_ext_insert_extent() returns,
 	 */
-	err = ext4_ext_dirty(handle, inode, path + depth);
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 	if (err)
 		goto fix_extent_len;
 
@@ -2912,8 +2957,8 @@ static int ext4_split_extent_at(handle_t *handle,
 			goto fix_extent_len;
 		/* update the extent length and mark as initialized */
 		ex->ee_len = cpu_to_le16(ee_len);
-		ext4_ext_try_to_merge(inode, path, ex);
-		err = ext4_ext_dirty(handle, inode, path + depth);
+		ext4_ext_try_to_merge(handle, inode, path, ex);
+		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 		goto out;
 	} else if (err)
 		goto fix_extent_len;
@@ -2924,7 +2969,7 @@ out:
 
 fix_extent_len:
 	ex->ee_len = orig_ex.ee_len;
-	ext4_ext_dirty(handle, inode, path + depth);
+	ext4_ext_dirty(handle, inode, path + path->p_depth);
 	return err;
 }
 
@@ -3193,7 +3238,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * To merge left, pass "ex2 - 1" to try_to_merge(),
 		 * since it merges towards right _only_.
 		 */
-		ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
+		ret = ext4_ext_try_to_merge(handle, inode, path, ex2 - 1);
 		if (ret) {
 			err = ext4_ext_correct_indexes(handle, inode, path);
 			if (err)
@@ -3208,15 +3253,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	 * i.e. ex2 == ex and ex3 == NULL.
 	 */
 	if (!ex3) {
-		ret = ext4_ext_try_to_merge(inode, path, ex2);
+		ret = ext4_ext_try_to_merge(handle, inode, path, ex2);
 		if (ret) {
 			err = ext4_ext_correct_indexes(handle, inode, path);
 			if (err)
 				goto out;
+			depth = ext_depth(inode);
 		}
 	}
 	/* Mark modified extent as dirty */
-	err = ext4_ext_dirty(handle, inode, path + depth);
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
@@ -3228,7 +3274,7 @@ insert:
 		ex->ee_block = orig_ex.ee_block;
 		ex->ee_len   = orig_ex.ee_len;
 		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-		ext4_ext_dirty(handle, inode, path + depth);
+		ext4_ext_dirty(handle, inode, path + path->p_depth);
 		/* zero out the first half */
 		return allocated;
 	} else if (err)
@@ -3524,10 +3570,10 @@ static int ext4_convert_unwritten_extents_dio(handle_t *handle,
 	/* note: ext4_ext_correct_indexes() isn't needed here because
 	 * borders are not changed
 	 */
-	ext4_ext_try_to_merge(inode, path, ex);
+	ext4_ext_try_to_merge(handle, inode, path, ex);
 
 	/* Mark modified extent as dirty */
-	err = ext4_ext_dirty(handle, inode, path + depth);
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 out:
 	ext4_ext_show_leaf(inode, path);
 	return err;
@@ -4493,8 +4539,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
 		if (ext4_ext_is_uninitialized(&tmp_ex))
 			ext4_ext_mark_uninitialized(ex1);
 
-		ext4_ext_try_to_merge(inode2, path2, ex2);
-		ext4_ext_try_to_merge(inode1, path1, ex1);
+		ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+		ext4_ext_try_to_merge(handle, inode1, path1, ex1);
 		*erp = ext4_ext_dirty(handle, inode2, path2 +
 				      path2->p_depth);
 		if (*erp)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 1701c9d..c9887b8 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -759,7 +759,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 				   subclass, NULL, _RET_IP_, NULL);
 }
 
-EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+EXPORT_SYMBOL(mutex_lock_interruptible_nested);
 
 static inline int
 ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)


More information about the Devel mailing list