[Devel] [PATCH RHEL7 COMMIT] ext4: update defragmentation codebase
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Jun 22 03:39:27 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.15
------>
commit 62750cddf994cc4942a22f1bcb85f1d4fc7ec66b
Author: Dmitry Monakhov <dmonakhov at openvz.org>
Date: Mon Jun 22 14:39:27 2015 +0400
ext4: update defragmentation codebase
Currently defragmentation code is fataly broken PCLIN-31215
In fact defrag engine is isolated from other fs-stuff, so it
is reasonable to backport it from current mainstream kernel:
commit fcf6b1b729bcd23f2b49a84fb33ffbb44712ee6a
Author: Dmitry Monakhov <dmonakhov at openvz.org>
Date: Sat Aug 30 23:52:19 2014 -0400
Starting from that point one can use directory compacting feature:
https://github.com/dmonakhov/e2fsprogs/blob/e4defrag2/misc/e4defrag2.c
Test cases: ext4/302 ext4/303 ext4/304 generic/323 ext4/307 ext4/308 generic/018
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
fs/ext4/ext4.h | 6 +-
fs/ext4/extents.c | 224 +++++++++-
fs/ext4/move_extent.c | 1106 +++++++------------------------------------------
3 files changed, 359 insertions(+), 977 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e53ceed..699dd11 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2677,6 +2677,11 @@ extern int ext4_find_delalloc_range(struct inode *inode,
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
+extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
+ struct inode *inode2, ext4_lblk_t lblk1,
+ ext4_lblk_t lblk2, ext4_lblk_t count,
+ int mark_unwritten,int *err);
/* move_extent.c */
@@ -2687,7 +2692,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
-
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b2cebab..a2fc301 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -290,6 +290,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
return size;
}
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+ int nofail)
+{
+ struct ext4_ext_path *path = *ppath;
+ int unwritten = ext4_ext_is_uninitialized(path[path->p_depth].p_ext);
+
+ return ext4_split_extent_at(handle, inode, path, lblk, unwritten ?
+ EXT4_EXT_MARK_UNINIT1|EXT4_EXT_MARK_UNINIT2 : 0,
+ EXT4_GET_BLOCKS_PRE_IO |
+ (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
/*
* Calculate the number of metadata blocks needed
* to allocate @blocks
@@ -1454,7 +1468,7 @@ found_extent:
* allocated block. Thus, index entries have to be consistent
* with leaves.
*/
-static ext4_lblk_t
+ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{
int depth;
@@ -2721,22 +2735,14 @@ again:
*/
if (end >= ee_block &&
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
- int split_flag = 0;
-
- if (ext4_ext_is_uninitialized(ex))
- split_flag = EXT4_EXT_MARK_UNINIT1 |
- EXT4_EXT_MARK_UNINIT2;
-
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
- err = ext4_split_extent_at(handle, inode, path,
- end + 1, split_flag,
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_METADATA_NOFAIL);
+ err = ext4_force_split_extent_at(handle, inode, &path,
+ end + 1, 1);
if (err < 0)
goto out;
@@ -4739,3 +4745,199 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return error;
}
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1: First inode
+ * @inode2: Second inode
+ * @lblk1: Start block for first inode
+ * @lblk2: Start block for second inode
+ * @count: Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp: Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ * i_mutex is held for both inodes
+ * i_data_sem is locked for write for both inodes
+ * Assumptions:
+ * All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+ struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+ ext4_lblk_t count, int unwritten, int *erp)
+{
+ struct ext4_ext_path *path1 = NULL;
+ struct ext4_ext_path *path2 = NULL;
+ int replaced_count = 0;
+
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+ BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+ BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+
+ *erp = ext4_es_remove_extent(inode1, lblk1, count);
+ if (unlikely(*erp))
+ return 0;
+ *erp = ext4_es_remove_extent(inode2, lblk2, count);
+ if (unlikely(*erp))
+ return 0;
+
+ while (count) {
+ struct ext4_extent *ex1, *ex2, tmp_ex;
+ ext4_lblk_t e1_blk, e2_blk;
+ int e1_len, e2_len, len;
+ int split = 0;
+
+ path1 = ext4_ext_find_extent(inode1, lblk1, NULL);
+ if (unlikely(IS_ERR(path1))) {
+ *erp = PTR_ERR(path1);
+ path1 = NULL;
+ finish:
+ count = 0;
+ goto repeat;
+ }
+ path2 = ext4_ext_find_extent(inode2, lblk2, NULL);
+ if (unlikely(IS_ERR(path2))) {
+ *erp = PTR_ERR(path2);
+ path2 = NULL;
+ goto finish;
+ }
+ ex1 = path1[path1->p_depth].p_ext;
+ ex2 = path2[path2->p_depth].p_ext;
+ /* Do we have somthing to swap ? */
+ if (unlikely(!ex2 || !ex1))
+ goto finish;
+
+ e1_blk = le32_to_cpu(ex1->ee_block);
+ e2_blk = le32_to_cpu(ex2->ee_block);
+ e1_len = ext4_ext_get_actual_len(ex1);
+ e2_len = ext4_ext_get_actual_len(ex2);
+
+ /* Hole handling */
+ if (!in_range(lblk1, e1_blk, e1_len) ||
+ !in_range(lblk2, e2_blk, e2_len)) {
+ ext4_lblk_t next1, next2;
+
+ /* if hole after extent, then go to next extent */
+ next1 = ext4_ext_next_allocated_block(path1);
+ next2 = ext4_ext_next_allocated_block(path2);
+ /* If hole before extent, then shift to that extent */
+ if (e1_blk > lblk1)
+ next1 = e1_blk;
+ if (e2_blk > lblk2)
+ next2 = e1_blk;
+ /* Do we have something to swap */
+ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+ goto finish;
+ /* Move to the rightest boundary */
+ len = next1 - lblk1;
+ if (len < next2 - lblk2)
+ len = next2 - lblk2;
+ if (len > count)
+ len = count;
+ lblk1 += len;
+ lblk2 += len;
+ count -= len;
+ goto repeat;
+ }
+
+ /* Prepare left boundary */
+ if (e1_blk < lblk1) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (e2_blk < lblk2) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ /* Prepare right boundary */
+ len = count;
+ if (len > e1_blk + e1_len - lblk1)
+ len = e1_blk + e1_len - lblk1;
+ if (len > e2_blk + e2_len - lblk2)
+ len = e2_blk + e2_len - lblk2;
+
+ if (len != e1_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1 + len, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (len != e2_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2 + len, 0);
+ if (*erp)
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ BUG_ON(e2_len != e1_len);
+ *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+
+ /* Both extents are fully inside boundaries. Swap it now */
+ tmp_ex = *ex1;
+ ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+ ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+ ex1->ee_len = cpu_to_le16(e2_len);
+ ex2->ee_len = cpu_to_le16(e1_len);
+ if (unwritten)
+ ext4_ext_mark_uninitialized(ex2);
+ if (ext4_ext_is_uninitialized(&tmp_ex))
+ ext4_ext_mark_uninitialized(ex1);
+
+ ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+ ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+ *erp = ext4_ext_dirty(handle, inode2, path2 +
+ path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_dirty(handle, inode1, path1 +
+ path1->p_depth);
+ /*
+ * Looks scarry ah..? second inode already points to new blocks,
+ * and it was successfully dirtied. But luckily error may happen
+ * only due to journal error, so full transaction will be
+ * aborted anyway.
+ */
+ if (unlikely(*erp))
+ goto finish;
+ lblk1 += len;
+ lblk2 += len;
+ replaced_count += len;
+ count -= len;
+
+ repeat:
+ ext4_ext_drop_refs(path1);
+ kfree(path1);
+ ext4_ext_drop_refs(path2);
+ kfree(path2);
+ path1 = path2 = NULL;
+ }
+ return replaced_count;
+}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 986a838..a3d0191 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -27,120 +27,26 @@
* @lblock: logical block number to find an extent path
* @path: pointer to an extent path pointer (for output)
*
- * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
+ * ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
*/
static inline int
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
- struct ext4_ext_path **orig_path)
+ struct ext4_ext_path **ppath)
{
- int ret = 0;
struct ext4_ext_path *path;
- path = ext4_ext_find_extent(inode, lblock, *orig_path);
+ path = ext4_ext_find_extent(inode, lblock, *ppath);
if (IS_ERR(path))
- ret = PTR_ERR(path);
- else if (path[ext_depth(inode)].p_ext == NULL)
- ret = -ENODATA;
- else
- *orig_path = path;
-
- return ret;
-}
-
-/**
- * copy_extent_status - Copy the extent's initialization status
- *
- * @src: an extent for getting initialize status
- * @dest: an extent to be set the status
- */
-static void
-copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
-{
- if (ext4_ext_is_uninitialized(src))
- ext4_ext_mark_uninitialized(dest);
- else
- dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
-}
-
-/**
- * mext_next_extent - Search for the next extent and set it to "extent"
- *
- * @inode: inode which is searched
- * @path: this will obtain data for the next extent
- * @extent: pointer to the next extent we have just gotten
- *
- * Search the next extent in the array of ext4_ext_path structure (@path)
- * and set it to ext4_extent structure (@extent). In addition, the member of
- * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
- * ext4_ext_path structure refers to the last extent, or a negative error
- * value on failure.
- */
-static int
-mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
- struct ext4_extent **extent)
-{
- struct ext4_extent_header *eh;
- int ppos, leaf_ppos = path->p_depth;
-
- ppos = leaf_ppos;
- if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
- /* leaf block */
- *extent = ++path[ppos].p_ext;
- path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
- return 0;
- }
-
- while (--ppos >= 0) {
- if (EXT_LAST_INDEX(path[ppos].p_hdr) >
- path[ppos].p_idx) {
- int cur_ppos = ppos;
-
- /* index block */
- path[ppos].p_idx++;
- path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
- if (path[ppos+1].p_bh)
- brelse(path[ppos+1].p_bh);
- path[ppos+1].p_bh =
- sb_bread(inode->i_sb, path[ppos].p_block);
- if (!path[ppos+1].p_bh)
- return -EIO;
- path[ppos+1].p_hdr =
- ext_block_hdr(path[ppos+1].p_bh);
-
- /* Halfway index block */
- while (++cur_ppos < leaf_ppos) {
- path[cur_ppos].p_idx =
- EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
- path[cur_ppos].p_block =
- ext4_idx_pblock(path[cur_ppos].p_idx);
- if (path[cur_ppos+1].p_bh)
- brelse(path[cur_ppos+1].p_bh);
- path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
- path[cur_ppos].p_block);
- if (!path[cur_ppos+1].p_bh)
- return -EIO;
- path[cur_ppos+1].p_hdr =
- ext_block_hdr(path[cur_ppos+1].p_bh);
- }
-
- path[leaf_ppos].p_ext = *extent = NULL;
-
- eh = path[leaf_ppos].p_hdr;
- if (le16_to_cpu(eh->eh_entries) == 0)
- /* empty leaf is found */
- return -ENODATA;
-
- /* leaf block */
- path[leaf_ppos].p_ext = *extent =
- EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
- path[leaf_ppos].p_block =
- ext4_ext_pblock(path[leaf_ppos].p_ext);
- return 0;
- }
+ return PTR_ERR(path);
+ if (path[ext_depth(inode)].p_ext == NULL) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ *ppath = NULL;
+ return -ENODATA;
}
- /* We found the last extent */
- return 1;
+ *ppath = path;
+ return 0;
}
/**
@@ -178,429 +84,19 @@ ext4_double_up_write_data_sem(struct inode *orig_inode,
}
/**
- * mext_insert_across_blocks - Insert extents across leaf block
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Allocate a new leaf block and insert extents into it. Return 0 on success,
- * or a negative error value on failure.
- */
-static int
-mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
- struct ext4_extent *o_start, struct ext4_extent *o_end,
- struct ext4_extent *start_ext, struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_ext_path *orig_path = NULL;
- ext4_lblk_t eblock = 0;
- int new_flag = 0;
- int end_flag = 0;
- int err = 0;
-
- if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
- if (o_start == o_end) {
-
- /* start_ext new_ext end_ext
- * donor |---------|-----------|--------|
- * orig |------------------------------|
- */
- end_flag = 1;
- } else {
-
- /* start_ext new_ext end_ext
- * donor |---------|----------|---------|
- * orig |---------------|--------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
- }
-
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (start_ext->ee_len && new_ext->ee_len &&
- !end_ext->ee_len && o_start == o_end) {
-
- /* start_ext new_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_start->ee_len = start_ext->ee_len;
- eblock = le32_to_cpu(start_ext->ee_block);
- new_flag = 1;
-
- } else if (!start_ext->ee_len && new_ext->ee_len &&
- end_ext->ee_len && o_start == o_end) {
-
- /* new_ext end_ext
- * donor |--------------|---------------|
- * orig |------------------------------|
- */
- o_end->ee_block = end_ext->ee_block;
- o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-
- /*
- * Set 0 to the extent block if new_ext was
- * the first block.
- */
- if (new_ext->ee_block)
- eblock = le32_to_cpu(new_ext->ee_block);
-
- new_flag = 1;
- } else {
- ext4_debug("ext4 move extent: Unexpected insert case\n");
- return -EIO;
- }
-
- if (new_flag) {
- err = get_ext_path(orig_inode, eblock, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, new_ext, 0))
- goto out;
- }
-
- if (end_flag) {
- err = get_ext_path(orig_inode,
- le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
- if (err)
- goto out;
-
- if (ext4_ext_insert_extent(handle, orig_inode,
- orig_path, end_ext, 0))
- goto out;
- }
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
-
- return err;
-
-}
-
-/**
- * mext_insert_inside_block - Insert new extent to the extent block
- *
- * @o_start: first original extent to be moved
- * @o_end: last original extent to be moved
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- * @eh: extent header of target leaf block
- * @range_to_move: used to decide how to insert extent
- *
- * Insert extents into the leaf block. The extent (@o_start) is overwritten
- * by inserted extents.
- */
-static void
-mext_insert_inside_block(struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext,
- struct ext4_extent_header *eh,
- int range_to_move)
-{
- int i = 0;
- unsigned long len;
-
- /* Move the existing extents */
- if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
- len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
- (unsigned long)(o_end + 1);
- memmove(o_end + 1 + range_to_move, o_end + 1, len);
- }
-
- /* Insert start entry */
- if (start_ext->ee_len)
- o_start[i++].ee_len = start_ext->ee_len;
-
- /* Insert new entry */
- if (new_ext->ee_len) {
- o_start[i] = *new_ext;
- ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
- }
-
- /* Insert end entry */
- if (end_ext->ee_len)
- o_start[i] = *end_ext;
-
- /* Increment the total entries counter on the extent block */
- le16_add_cpu(&eh->eh_entries, range_to_move);
-}
-
-/**
- * mext_insert_extents - Insert new extent
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @o_start: first original extent to be changed
- * @o_end: last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext: middle of new extent to be inserted
- * @end_ext: last new extent to be inserted
- *
- * Call the function to insert extents. If we cannot add more extents into
- * the leaf block, we call mext_insert_across_blocks() to create a
- * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
- * on success, or a negative error value on failure.
- */
-static int
-mext_insert_extents(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path,
- struct ext4_extent *o_start,
- struct ext4_extent *o_end,
- struct ext4_extent *start_ext,
- struct ext4_extent *new_ext,
- struct ext4_extent *end_ext)
-{
- struct ext4_extent_header *eh;
- unsigned long need_slots, slots_range;
- int range_to_move, depth, ret;
-
- /*
- * The extents need to be inserted
- * start_extent + new_extent + end_extent.
- */
- need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
- (new_ext->ee_len ? 1 : 0);
-
- /* The number of slots between start and end */
- slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
- / sizeof(struct ext4_extent);
-
- /* Range to move the end of extent */
- range_to_move = need_slots - slots_range;
- depth = orig_path->p_depth;
- orig_path += depth;
- eh = orig_path->p_hdr;
-
- if (depth) {
- /* Register to journal */
- ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
- if (ret)
- return ret;
- }
-
- /* Expansion */
- if (range_to_move > 0 &&
- (range_to_move > le16_to_cpu(eh->eh_max)
- - le16_to_cpu(eh->eh_entries))) {
-
- ret = mext_insert_across_blocks(handle, orig_inode, o_start,
- o_end, start_ext, new_ext, end_ext);
- if (ret < 0)
- return ret;
- } else
- mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
- end_ext, eh, range_to_move);
-
- return ext4_ext_dirty(handle, orig_inode, orig_path);
-}
-
-/**
- * mext_leaf_block - Move one leaf extent block into the inode.
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @orig_path: path indicates first extent to be changed
- * @dext: donor extent
- * @from: start offset on the target file
- *
- * In order to insert extents into the leaf block, we must divide the extent
- * in the leaf block into three extents. The one is located to be inserted
- * extents, and the others are located around it.
- *
- * Therefore, this function creates structures to save extents of the leaf
- * block, and inserts extents by calling mext_insert_extents() with
- * created extents. Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_leaf_block(handle_t *handle, struct inode *orig_inode,
- struct ext4_ext_path *orig_path, struct ext4_extent *dext,
- ext4_lblk_t *from)
-{
- struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
- struct ext4_extent new_ext, start_ext, end_ext;
- ext4_lblk_t new_ext_end;
- int oext_alen, new_ext_alen, end_ext_alen;
- int depth = ext_depth(orig_inode);
- int ret;
-
- start_ext.ee_block = end_ext.ee_block = 0;
- o_start = o_end = oext = orig_path[depth].p_ext;
- oext_alen = ext4_ext_get_actual_len(oext);
- start_ext.ee_len = end_ext.ee_len = 0;
-
- new_ext.ee_block = cpu_to_le32(*from);
- ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
- new_ext.ee_len = dext->ee_len;
- new_ext_alen = ext4_ext_get_actual_len(&new_ext);
- new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-
- /*
- * Case: original extent is first
- * oext |--------|
- * new_ext |--|
- * start_ext |--|
- */
- if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
- le32_to_cpu(new_ext.ee_block) <
- le32_to_cpu(oext->ee_block) + oext_alen) {
- start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
- le32_to_cpu(oext->ee_block));
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(oext, &start_ext);
- } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
- prev_ext = oext - 1;
- /*
- * We can merge new_ext into previous extent,
- * if these are contiguous and same extent type.
- */
- if (ext4_can_extents_be_merged(orig_inode, prev_ext,
- &new_ext)) {
- o_start = prev_ext;
- start_ext.ee_len = cpu_to_le16(
- ext4_ext_get_actual_len(prev_ext) +
- new_ext_alen);
- start_ext.ee_block = oext->ee_block;
- copy_extent_status(prev_ext, &start_ext);
- new_ext.ee_len = 0;
- }
- }
-
- /*
- * Case: new_ext_end must be less than oext
- * oext |-----------|
- * new_ext |-------|
- */
- if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
- EXT4_ERROR_INODE(orig_inode,
- "new_ext_end(%u) should be less than or equal to "
- "oext->ee_block(%u) + oext_alen(%d) - 1",
- new_ext_end, le32_to_cpu(oext->ee_block),
- oext_alen);
- ret = -EIO;
- goto out;
- }
-
- /*
- * Case: new_ext is smaller than original extent
- * oext |---------------|
- * new_ext |-----------|
- * end_ext |---|
- */
- if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
- new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
- end_ext.ee_len =
- cpu_to_le16(le32_to_cpu(oext->ee_block) +
- oext_alen - 1 - new_ext_end);
- copy_extent_status(oext, &end_ext);
- end_ext_alen = ext4_ext_get_actual_len(&end_ext);
- ext4_ext_store_pblock(&end_ext,
- (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
- end_ext.ee_block =
- cpu_to_le32(le32_to_cpu(o_end->ee_block) +
- oext_alen - end_ext_alen);
- }
-
- ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
- o_end, &start_ext, &new_ext, &end_ext);
-out:
- return ret;
-}
-
-/**
- * mext_calc_swap_extents - Calculate extents for extent swapping.
- *
- * @tmp_dext: the extent that will belong to the original inode
- * @tmp_oext: the extent that will belong to the donor inode
- * @orig_off: block offset of original inode
- * @donor_off: block offset of donor inode
- * @max_count: the maximum length of extents
- *
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_calc_swap_extents(struct ext4_extent *tmp_dext,
- struct ext4_extent *tmp_oext,
- ext4_lblk_t orig_off, ext4_lblk_t donor_off,
- ext4_lblk_t max_count)
-{
- ext4_lblk_t diff, orig_diff;
- struct ext4_extent dext_old, oext_old;
-
- BUG_ON(orig_off != donor_off);
-
- /* original and donor extents have to cover the same block offset */
- if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
- le32_to_cpu(tmp_oext->ee_block) +
- ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
- return -ENODATA;
-
- if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
- le32_to_cpu(tmp_dext->ee_block) +
- ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
- return -ENODATA;
-
- dext_old = *tmp_dext;
- oext_old = *tmp_oext;
-
- /* When tmp_dext is too large, pick up the target range. */
- diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
-
- ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
- le32_add_cpu(&tmp_dext->ee_block, diff);
- le16_add_cpu(&tmp_dext->ee_len, -diff);
-
- if (max_count < ext4_ext_get_actual_len(tmp_dext))
- tmp_dext->ee_len = cpu_to_le16(max_count);
-
- orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
- ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
-
- /* Adjust extent length if donor extent is larger than orig */
- if (ext4_ext_get_actual_len(tmp_dext) >
- ext4_ext_get_actual_len(tmp_oext) - orig_diff)
- tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
- orig_diff);
-
- tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
-
- copy_extent_status(&oext_old, tmp_dext);
- copy_extent_status(&dext_old, tmp_oext);
-
- return 0;
-}
-
-/**
* mext_check_coverage - Check that all extents in range has the same type
*
* @inode: inode in question
* @from: block offset of inode
* @count: block count to be checked
- * @uninit: extents expected to be uninitialized
+ * @unwritten: extents expected to be unwritten
* @err: pointer to save error value
*
* Return 1 if all extents in range has expected type, and zero otherwise.
*/
static int
mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
- int uninit, int *err)
+ int unwritten, int *err)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ext;
@@ -611,178 +107,32 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
if (*err)
goto out;
ext = path[ext_depth(inode)].p_ext;
- if (uninit != ext4_ext_is_uninitialized(ext))
+ if (unwritten != ext4_ext_is_uninitialized(ext))
goto out;
from += ext4_ext_get_actual_len(ext);
ext4_ext_drop_refs(path);
}
ret = 1;
out:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
return ret;
}
/**
- * mext_replace_branches - Replace original extents with new extents
- *
- * @handle: journal handle
- * @orig_inode: original inode
- * @donor_inode: donor inode
- * @from: block offset of orig_inode
- * @count: block count to be replaced
- * @err: pointer to save return value
- *
- * Replace original inode extents and donor inode extents page by page.
- * We implement this replacement in the following three steps:
- * 1. Save the block information of original and donor inodes into
- * dummy extents.
- * 2. Change the block information of original inode to point at the
- * donor inode blocks.
- * 3. Change the block information of donor inode to point at the saved
- * original inode blocks in the dummy extents.
- *
- * Return replaced block count.
- */
-static int
-mext_replace_branches(handle_t *handle, struct inode *orig_inode,
- struct inode *donor_inode, ext4_lblk_t from,
- ext4_lblk_t count, int *err)
-{
- struct ext4_ext_path *orig_path = NULL;
- struct ext4_ext_path *donor_path = NULL;
- struct ext4_extent *oext, *dext;
- struct ext4_extent tmp_dext, tmp_oext;
- ext4_lblk_t orig_off = from, donor_off = from;
- int depth;
- int replaced_count = 0;
- int dext_alen;
-
- *err = ext4_es_remove_extent(orig_inode, from, count);
- if (*err)
- goto out;
-
- *err = ext4_es_remove_extent(donor_inode, from, count);
- if (*err)
- goto out;
-
- /* Get the original extent for the block "orig_off" */
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
-
- /* Get the donor extent for the head */
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- if (unlikely(!dext))
- goto missing_donor_extent;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count);
- if (*err)
- goto out;
-
- /* Loop for the donor extents */
- while (1) {
- /* The extent for donor must be found. */
- if (unlikely(!dext)) {
- missing_donor_extent:
- EXT4_ERROR_INODE(donor_inode,
- "The extent for donor must be found");
- *err = -EIO;
- goto out;
- } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
- EXT4_ERROR_INODE(donor_inode,
- "Donor offset(%u) and the first block of donor "
- "extent(%u) should be equal",
- donor_off,
- le32_to_cpu(tmp_dext.ee_block));
- *err = -EIO;
- goto out;
- }
-
- /* Set donor extent to orig extent */
- *err = mext_leaf_block(handle, orig_inode,
- orig_path, &tmp_dext, &orig_off);
- if (*err)
- goto out;
-
- /* Set orig extent to donor extent */
- *err = mext_leaf_block(handle, donor_inode,
- donor_path, &tmp_oext, &donor_off);
- if (*err)
- goto out;
-
- dext_alen = ext4_ext_get_actual_len(&tmp_dext);
- replaced_count += dext_alen;
- donor_off += dext_alen;
- orig_off += dext_alen;
-
- BUG_ON(replaced_count > count);
- /* Already moved the expected blocks */
- if (replaced_count >= count)
- break;
-
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- *err = get_ext_path(orig_inode, orig_off, &orig_path);
- if (*err)
- goto out;
- depth = ext_depth(orig_inode);
- oext = orig_path[depth].p_ext;
- tmp_oext = *oext;
-
- if (donor_path)
- ext4_ext_drop_refs(donor_path);
- *err = get_ext_path(donor_inode, donor_off, &donor_path);
- if (*err)
- goto out;
- depth = ext_depth(donor_inode);
- dext = donor_path[depth].p_ext;
- tmp_dext = *dext;
-
- *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
- donor_off, count - replaced_count);
- if (*err)
- goto out;
- }
-
-out:
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (donor_path) {
- ext4_ext_drop_refs(donor_path);
- kfree(donor_path);
- }
-
- return replaced_count;
-}
-
-/**
* mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
- * @index: page index
+ * @index1: page index
+ * @index2: page index
* @page: result page vector
*
* Grab two locked pages for inode's by inode order
*/
static int
mext_page_double_lock(struct inode *inode1, struct inode *inode2,
- pgoff_t index, struct page *page[2])
+ pgoff_t index1, pgoff_t index2, struct page *page[2])
{
struct address_space *mapping[2];
unsigned fl = AOP_FLAG_NOFS;
@@ -792,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
mapping[0] = inode1->i_mapping;
mapping[1] = inode2->i_mapping;
} else {
+ pgoff_t tmp = index1;
+ index1 = index2;
+ index2 = tmp;
mapping[0] = inode2->i_mapping;
mapping[1] = inode1->i_mapping;
}
- page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
+ page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
if (!page[0])
return -ENOMEM;
- page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
+ page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
if (!page[1]) {
unlock_page(page[0]);
page_cache_release(page[0]);
@@ -861,8 +214,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
}
if (!buffer_mapped(bh)) {
zero_user(page, block_start, blocksize);
- if (!err)
- set_buffer_uptodate(bh);
+ set_buffer_uptodate(bh);
continue;
}
}
@@ -893,33 +245,34 @@ out:
* @o_filp: file structure of original file
* @donor_inode: donor inode
* @orig_page_offset: page index on original file
+ * @donor_page_offset: page index on donor file
* @data_offset_in_page: block index where data swapping starts
* @block_len_in_page: the number of blocks to be swapped
- * @uninit: orig extent is uninitialized or not
+ * @unwritten: orig extent is unwritten or not
* @err: pointer to save return value
*
* Save the data in original inode blocks and replace original inode extents
- * with donor inode extents by calling mext_replace_branches().
+ * with donor inode extents by calling ext4_swap_extents().
* Finally, write out the saved data in new original inode blocks. Return
* replaced block count.
*/
static int
move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
- pgoff_t orig_page_offset, int data_offset_in_page,
- int block_len_in_page, int uninit, int *err)
+ pgoff_t orig_page_offset, pgoff_t donor_page_offset,
+ int data_offset_in_page,
+ int block_len_in_page, int unwritten, int *err)
{
struct inode *orig_inode = file_inode(o_filp);
struct page *pagep[2] = {NULL, NULL};
handle_t *handle;
- ext4_lblk_t orig_blk_offset;
- long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+ ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
- unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
int err2, jblocks, retries = 0;
int replaced_count = 0;
int from = data_offset_in_page << orig_inode->i_blkbits;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ struct super_block *sb = orig_inode->i_sb;
/*
* It needs twice the amount of ordinary journal buffers because
@@ -934,13 +287,11 @@ again:
return 0;
}
- if (segment_eq(get_fs(), KERNEL_DS))
- w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
- offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+ donor_blk_offset = donor_page_offset * blocks_per_page +
+ data_offset_in_page;
/* Calculate data_size */
if ((orig_blk_offset + block_len_in_page - 1) ==
@@ -962,31 +313,31 @@ again:
replaced_size = data_size;
*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
- pagep);
+ donor_page_offset, pagep);
if (unlikely(*err < 0))
goto stop_journal;
/*
- * If orig extent was uninitialized it can become initialized
+ * If orig extent was unwritten it can become initialized
* at any time after i_data_sem was dropped, in order to
* serialize with delalloc we have recheck extent while we
* hold page's lock, if it is still the case data copy is not
* necessary, just swap data blocks between orig and donor.
*/
- if (uninit) {
+ if (unwritten) {
ext4_double_down_write_data_sem(orig_inode, donor_inode);
/* If any of extents in range became initialized we have to
* fallback to data copying */
- uninit = mext_check_coverage(orig_inode, orig_blk_offset,
- block_len_in_page, 1, err);
+ unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
+ block_len_in_page, 1, err);
if (*err)
goto drop_data_sem;
- uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
- block_len_in_page, 1, err);
+ unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
+ block_len_in_page, 1, err);
if (*err)
goto drop_data_sem;
- if (!uninit) {
+ if (!unwritten) {
ext4_double_up_write_data_sem(orig_inode, donor_inode);
goto data_copy;
}
@@ -997,9 +348,10 @@ again:
*err = -EBUSY;
goto drop_data_sem;
}
- replaced_count = mext_replace_branches(handle, orig_inode,
- donor_inode, orig_blk_offset,
- block_len_in_page, err);
+ replaced_count = ext4_swap_extents(handle, orig_inode,
+ donor_inode, orig_blk_offset,
+ donor_blk_offset,
+ block_len_in_page, 1, err);
drop_data_sem:
ext4_double_up_write_data_sem(orig_inode, donor_inode);
goto unlock_pages;
@@ -1016,10 +368,11 @@ data_copy:
*err = -EBUSY;
goto unlock_pages;
}
-
- replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
- orig_blk_offset,
- block_len_in_page, err);
+ ext4_double_down_write_data_sem(orig_inode, donor_inode);
+ replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
+ orig_blk_offset, donor_blk_offset,
+ block_len_in_page, 1, err);
+ ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (*err) {
if (replaced_count) {
block_len_in_page = replaced_count;
@@ -1049,10 +402,13 @@ unlock_pages:
page_cache_release(pagep[1]);
stop_journal:
ext4_journal_stop(handle);
+ if (*err == -ENOSPC &&
+ ext4_should_retry_alloc(sb, &retries))
+ goto again;
/* Buffer was busy because probably is pinned to journal transaction,
* force transaction commit may help to free it. */
- if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
- &retries))
+ if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
+ jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
goto again;
return replaced_count;
@@ -1063,9 +419,9 @@ repair_branches:
* Try to swap extents to it's original places
*/
ext4_double_down_write_data_sem(orig_inode, donor_inode);
- replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
- orig_blk_offset,
- block_len_in_page, &err2);
+ replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
+ orig_blk_offset, donor_blk_offset,
+ block_len_in_page, 0, &err2);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
if (replaced_count != block_len_in_page) {
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1095,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode,
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len)
{
- ext4_lblk_t orig_blocks, donor_blocks;
+ __u64 orig_eof, donor_eof;
unsigned int blkbits = orig_inode->i_blkbits;
unsigned int blocksize = 1 << blkbits;
+ orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
+ donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
+
+
if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
ext4_debug("ext4 move extent: suid or sgid is set"
" to donor file [ino:orig %lu, donor %lu]\n",
@@ -1114,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode,
ext4_debug("ext4 move extent: The argument files should "
"not be swapfile [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
+ return -EBUSY;
}
/* Ext4 move extent supports only extent based file */
@@ -1134,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode,
}
/* Start offset should be same */
- if (orig_start != donor_start) {
+ if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
+ (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
ext4_debug("ext4 move extent: orig and donor's start "
- "offset are not same [ino:orig %lu, donor %lu]\n",
+ "offset are not alligned [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if ((orig_start >= EXT_MAX_BLOCKS) ||
+ (donor_start >= EXT_MAX_BLOCKS) ||
(*len > EXT_MAX_BLOCKS) ||
+ (donor_start + *len >= EXT_MAX_BLOCKS) ||
(orig_start + *len >= EXT_MAX_BLOCKS)) {
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
-
- if (orig_inode->i_size > donor_inode->i_size) {
- donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
- /* TODO: eliminate this artificial restriction */
- if (orig_start >= donor_blocks) {
- ext4_debug("ext4 move extent: orig start offset "
- "[%llu] should be less than donor file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, donor_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- /* TODO: eliminate this artificial restriction */
- if (orig_start + *len > donor_blocks) {
- ext4_debug("ext4 move extent: End offset [%llu] should "
- "be less than donor file blocks [%u]."
- "So adjust length from %llu to %llu "
- "[ino:orig %lu, donor %lu]\n",
- orig_start + *len, donor_blocks,
- *len, donor_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = donor_blocks - orig_start;
- }
- } else {
- orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
- if (orig_start >= orig_blocks) {
- ext4_debug("ext4 move extent: start offset [%llu] "
- "should be less than original file blocks "
- "[%u] [ino:orig %lu, donor %lu]\n",
- orig_start, orig_blocks,
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
- if (orig_start + *len > orig_blocks) {
- ext4_debug("ext4 move extent: Adjust length "
- "from %llu to %llu. Because it should be "
- "less than original file blocks "
- "[ino:orig %lu, donor %lu]\n",
- *len, orig_blocks - orig_start,
- orig_inode->i_ino, donor_inode->i_ino);
- *len = orig_blocks - orig_start;
- }
- }
-
+ if (orig_eof < orig_start + *len - 1)
+ *len = orig_eof - orig_start;
+ if (donor_eof < donor_start + *len - 1)
+ *len = donor_eof - donor_start;
if (!*len) {
ext4_debug("ext4 move extent: len should not be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1210,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode,
*
* @o_filp: file structure of the original file
* @d_filp: file structure of the donor file
- * @orig_start: start offset in block for orig
- * @donor_start: start offset in block for donor
+ * @orig_blk: start offset in block for orig
+ * @donor_blk: start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* This function returns 0 and moved block length is set in moved_len
* if succeed, otherwise returns error value.
*
- * Note: ext4_move_extents() proceeds the following order.
- * 1:ext4_move_extents() calculates the last block number of moving extent
- * function by the start block number (orig_start) and the number of blocks
- * to be moved (len) specified as arguments.
- * If the {orig, donor}_start points a hole, the extent's start offset
- * pointed by ext_cur (current extent), holecheck_path, orig_path are set
- * after hole behind.
- * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
- * or the ext_cur exceeds the block_end which is last logical block number.
- * 3:To get the length of continues area, call mext_next_extent()
- * specified with the ext_cur (initial value is holecheck_path) re-cursive,
- * until find un-continuous extent, the start logical block number exceeds
- * the block_end or the extent points to the last extent.
- * 4:Exchange the original inode data with donor inode data
- * from orig_page_offset to seq_end_page.
- * The start indexes of data are specified as arguments.
- * That of the original inode is orig_page_offset,
- * and the donor inode is also orig_page_offset
- * (To easily handle blocksize != pagesize case, the offset for the
- * donor inode is block unit).
- * 5:Update holecheck_path and orig_path to points a next proceeding extent,
- * then returns to step 2.
- * 6:Release holecheck_path, orig_path and set the len to moved_len
- * which shows the number of moved blocks.
- * The moved_len is useful for the command to calculate the file offset
- * for starting next move extent ioctl.
- * 7:Return 0 on success, or a negative error value on failure.
*/
int
-ext4_move_extents(struct file *o_filp, struct file *d_filp,
- __u64 orig_start, __u64 donor_start, __u64 len,
- __u64 *moved_len)
+ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
+ __u64 donor_blk, __u64 len, __u64 *moved_len)
{
struct inode *orig_inode = file_inode(o_filp);
struct inode *donor_inode = file_inode(d_filp);
- struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
- struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
- ext4_lblk_t block_start = orig_start;
- ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
- ext4_lblk_t rest_blocks;
- pgoff_t orig_page_offset = 0, seq_end_page;
- int ret, depth, last_extent = 0;
+ struct ext4_ext_path *path = NULL;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
- int data_offset_in_page;
- int block_len_in_page;
- int uninit;
+ ext4_lblk_t o_end, o_start = orig_blk;
+ ext4_lblk_t d_start = donor_blk;
+ int ret;
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
@@ -1305,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
- ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
- donor_start, &len);
- if (ret)
- goto out;
-
- file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
- block_end = block_start + len - 1;
- if (file_end < block_end)
- len -= block_end - file_end;
-
- ret = get_ext_path(orig_inode, block_start, &orig_path);
- if (ret)
- goto out;
-
- /* Get path structure to check the hole */
- ret = get_ext_path(orig_inode, block_start, &holecheck_path);
+ ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
+ donor_blk, &len);
if (ret)
goto out;
+ o_end = o_start + len;
- depth = ext_depth(orig_inode);
- ext_cur = holecheck_path[depth].p_ext;
+ while (o_start < o_end) {
+ struct ext4_extent *ex;
+ ext4_lblk_t cur_blk, next_blk;
+ pgoff_t orig_page_index, donor_page_index;
+ int offset_in_page;
+ int unwritten, cur_len;
- /*
- * Get proper starting location of block replacement if block_start was
- * within the hole.
- */
- if (le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
- /*
- * The hole exists between extents or the tail of
- * original file.
- */
- last_extent = mext_next_extent(orig_inode,
- holecheck_path, &ext_cur);
- if (last_extent < 0) {
- ret = last_extent;
- goto out;
- }
- last_extent = mext_next_extent(orig_inode, orig_path,
- &ext_dummy);
- if (last_extent < 0) {
- ret = last_extent;
+ ret = get_ext_path(orig_inode, o_start, &path);
+ if (ret)
goto out;
- }
- seq_start = le32_to_cpu(ext_cur->ee_block);
- } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
- /* The hole exists at the beginning of original file. */
- seq_start = le32_to_cpu(ext_cur->ee_block);
- else
- seq_start = block_start;
-
- /* No blocks within the specified range. */
- if (le32_to_cpu(ext_cur->ee_block) > block_end) {
- ext4_debug("ext4 move extent: The specified range of file "
- "may be the hole\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* Adjust start blocks */
- add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
- ext4_ext_get_actual_len(ext_cur), block_end + 1) -
- max(le32_to_cpu(ext_cur->ee_block), block_start);
-
- while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
- seq_blocks += add_blocks;
-
- /* Adjust tail blocks */
- if (seq_start + seq_blocks - 1 > block_end)
- seq_blocks = block_end - seq_start + 1;
-
- ext_prev = ext_cur;
- last_extent = mext_next_extent(orig_inode, holecheck_path,
- &ext_cur);
- if (last_extent < 0) {
- ret = last_extent;
- break;
- }
- add_blocks = ext4_ext_get_actual_len(ext_cur);
-
- /*
- * Extend the length of contiguous block (seq_blocks)
- * if extents are contiguous.
- */
- if (ext4_can_extents_be_merged(orig_inode,
- ext_prev, ext_cur) &&
- block_end >= le32_to_cpu(ext_cur->ee_block) &&
- !last_extent)
+ ex = path[path->p_depth].p_ext;
+ next_blk = ext4_ext_next_allocated_block(path);
+ cur_blk = le32_to_cpu(ex->ee_block);
+ cur_len = ext4_ext_get_actual_len(ex);
+ /* Check hole before the start pos */
+ if (cur_blk + cur_len - 1 < o_start) {
+ if (next_blk == EXT_MAX_BLOCKS) {
+ o_start = o_end;
+ ret = -ENODATA;
+ goto out;
+ }
+ d_start += next_blk - o_start;
+ o_start = next_blk;
continue;
-
- /* Is original extent is uninitialized */
- uninit = ext4_ext_is_uninitialized(ext_prev);
-
- data_offset_in_page = seq_start % blocks_per_page;
-
- /*
- * Calculate data blocks count that should be swapped
- * at the first page.
- */
- if (data_offset_in_page + seq_blocks > blocks_per_page) {
- /* Swapped blocks are across pages */
- block_len_in_page =
- blocks_per_page - data_offset_in_page;
- } else {
- /* Swapped blocks are in a page */
- block_len_in_page = seq_blocks;
+ /* Check hole after the start pos */
+ } else if (cur_blk > o_start) {
+ /* Skip hole */
+ d_start += cur_blk - o_start;
+ o_start = cur_blk;
+ /* Extent inside requested range ?*/
+ if (cur_blk >= o_end)
+ goto out;
+ } else { /* in_range(o_start, o_blk, o_len) */
+ cur_len += cur_blk - o_start;
}
-
- orig_page_offset = seq_start >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_end_page = (seq_start + seq_blocks - 1) >>
- (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
- seq_start = le32_to_cpu(ext_cur->ee_block);
- rest_blocks = seq_blocks;
-
+ unwritten = ext4_ext_is_uninitialized(ex);
+ if (o_end - o_start < cur_len)
+ cur_len = o_end - o_start;
+
+ orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+ orig_inode->i_blkbits);
+ donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+ donor_inode->i_blkbits);
+ offset_in_page = o_start % blocks_per_page;
+ if (cur_len > blocks_per_page- offset_in_page)
+ cur_len = blocks_per_page - offset_in_page;
/*
* Up semaphore to avoid following problems:
* a. transaction deadlock among ext4_journal_start,
@@ -1428,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
* in move_extent_per_page
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
-
- while (orig_page_offset <= seq_end_page) {
-
- /* Swap original branches with new branches */
- block_len_in_page = move_extent_per_page(
- o_filp, donor_inode,
- orig_page_offset,
- data_offset_in_page,
- block_len_in_page, uninit,
- &ret);
-
- /* Count how many blocks we have exchanged */
- *moved_len += block_len_in_page;
- if (ret < 0)
- break;
- if (*moved_len > len) {
- EXT4_ERROR_INODE(orig_inode,
- "We replaced blocks too much! "
- "sum of replaced: %llu requested: %llu",
- *moved_len, len);
- ret = -EIO;
- break;
- }
-
- orig_page_offset++;
- data_offset_in_page = 0;
- rest_blocks -= block_len_in_page;
- if (rest_blocks > blocks_per_page)
- block_len_in_page = blocks_per_page;
- else
- block_len_in_page = rest_blocks;
- }
-
+ /* Swap original branches with new branches */
+ move_extent_per_page(o_filp, donor_inode,
+ orig_page_index, donor_page_index,
+ offset_in_page, cur_len,
+ unwritten, &ret);
ext4_double_down_write_data_sem(orig_inode, donor_inode);
if (ret < 0)
break;
-
- /* Decrease buffer counter */
- if (holecheck_path)
- ext4_ext_drop_refs(holecheck_path);
- ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
- if (ret)
- break;
- depth = holecheck_path->p_depth;
-
- /* Decrease buffer counter */
- if (orig_path)
- ext4_ext_drop_refs(orig_path);
- ret = get_ext_path(orig_inode, seq_start, &orig_path);
- if (ret)
- break;
-
- ext_cur = holecheck_path[depth].p_ext;
- add_blocks = ext4_ext_get_actual_len(ext_cur);
- seq_blocks = 0;
-
+ o_start += cur_len;
+ d_start += cur_len;
}
+ *moved_len = o_start - orig_blk;
+ if (*moved_len > len)
+ *moved_len = len;
+
out:
if (*moved_len) {
ext4_discard_preallocations(orig_inode);
ext4_discard_preallocations(donor_inode);
}
- if (orig_path) {
- ext4_ext_drop_refs(orig_path);
- kfree(orig_path);
- }
- if (holecheck_path) {
- ext4_ext_drop_refs(holecheck_path);
- kfree(holecheck_path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
ext4_double_up_write_data_sem(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
More information about the Devel
mailing list