[Devel] [PATCH 6/6] e4defrag2: fix collapse inode index tree issue
Dmitry Monakhov
dmonakhov at openvz.org
Mon May 16 04:53:36 PDT 2016
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
misc/e4defrag2.c | 68 +++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 55 insertions(+), 13 deletions(-)
diff --git a/misc/e4defrag2.c b/misc/e4defrag2.c
index 7aab2b4..d351965 100644
--- a/misc/e4defrag2.c
+++ b/misc/e4defrag2.c
@@ -242,6 +242,7 @@ struct fmap_extent_cache
{
unsigned fec_size; /* map array size */
unsigned fec_extents; /* number of valid entries */
+ struct fmap_extent *fec_xattr;
struct fmap_extent fec_map[];
};
@@ -252,6 +253,9 @@ struct fmap_extent_stat
unsigned group; /* Number of groups, counter is speculative */
unsigned local_ex; /* Number of extents from the same group as inode */
unsigned local_sz; /* Total len of local extents */
+ unsigned nr_idx; /* Number of index blocks */
+ __u64 xattr; /* xattr phys block */
+
};
/* Used space and integral inode usage stats */
@@ -750,9 +754,10 @@ static int __get_inode_fiemap(struct defrag_context *dfx, int fd,
(*fec)->fec_size = DEFAULT_FMAP_CACHE_SZ;
(*fec)->fec_extents = 0;
}
- if (fest)
+ if (fest) {
memset(fest, 0 , sizeof(*fest));
-
+ fest->nr_idx = st->st_blocks >> (blksz_log - 9);
+ }
ext_buf = fiemap_buf->fm_extents;
memset(fiemap_buf, 0, fie_buf_size);
fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
@@ -791,6 +796,12 @@ static int __get_inode_fiemap(struct defrag_context *dfx, int fd,
fest->group++;
prev_blk_grp = blk_grp;
}
+ /* We are work on livefs so race is possible */
+ if (fest->nr_idx < len) {
+ ret = -1;
+ goto out;
+ }
+ fest->nr_idx -= len;
}
if ((*fec)->fec_extents && lblk == lblk_last && pblk == pblk_last) {
@@ -834,12 +845,36 @@ static int __get_inode_fiemap(struct defrag_context *dfx, int fd,
*/
} while (fiemap_buf->fm_mapped_extents == EXTENT_MAX_COUNT &&
!(ext_buf[EXTENT_MAX_COUNT-1].fe_flags & FIEMAP_EXTENT_LAST));
+
+ /* get xattr block */
+ fiemap_buf->fm_flags |= FIEMAP_FLAG_XATTR;
+ fiemap_buf->fm_start = 0;
+ memset(ext_buf, 0, ext_buf_size);
+ ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
+ if (ret < 0 || fiemap_buf->fm_mapped_extents == 0) {
+ if (debug_flag & DBG_FIEMAP) {
+ fprintf(stderr, "%s: Can't get xattr info for"
+ " inode:%ld ret:%d mapped:%d\n",
+ __func__, st->st_ino, ret,
+ fiemap_buf->fm_mapped_extents);
+ }
+ goto out;
+ }
+ if (!(ext_buf[0].fe_flags & FIEMAP_EXTENT_DATA_INLINE)) {
+ fest->xattr = ext_buf[i].fe_physical >> blksz_log;
+ if (fest->nr_idx)
+ ret = -1;
+
+ fest->nr_idx--;
+ }
out:
/////////////FIXME:DEBUG
- if (debug_flag & DBG_FIEMAP && fest)
- printf("%s fmap stat ino:%ld hole:%d frag:%d local_ex:%d local_sz:%d group:%d\n",
+ if ((debug_flag & DBG_FIEMAP) && fest)
+ printf("%s fmap stat ino:%ld hole:%d frag:%d local_ex:%d "
+ "local_sz:%d group:%d nr_idx:%u xattr:%lld ret:%d\n",
__func__, st->st_ino, fest->hole, fest->frag,
- fest->local_ex, fest->local_sz, fest->group);
+ fest->local_ex, fest->local_sz, fest->group, fest->nr_idx,
+ fest->xattr, ret);
free(fiemap_buf);
@@ -1134,7 +1169,6 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd,
ret = do_iaf_defrag_one(dfx, dirfd, name, stat, fec, &fest);
if (!ret)
goto out;
-
}
if (stat->st_mtime < older_than)
@@ -1916,7 +1950,7 @@ static int prepare_donor(struct defrag_context *dfx, dgrp_t group,
printf("%s grp:%u donor_fd:%d blocks:%llu frag:%u\n",
__func__, group, donor->fd, blocks, max_frag);
}
- assert(blocks);
+ assert(blocks && max_frag);
/* First try to reuse existing donor if available */
if (donor->fd != -1) {
@@ -1954,23 +1988,28 @@ static int check_iaf(struct defrag_context *dfx, struct stat64 *stat,
__u64 eof_lblk;
//// FIXME free_space_average should be tunable
__u64 free_space_average = 64;
+ __u32 meta_blocks;
int ret = 1;
if (!S_ISREG(stat->st_mode))
ret = 0;
- if (fec->fec_extents < 2)
- ret = 0;
+ if (fec->fec_extents < 2) {
+ /*
+ * Older kernels can not collapse tree depth to zero for flat inodes
+ * Let's fix it by relocating it once again
+ */
+ if (fest->nr_idx == 0)
+ ret = 0;
+ }
if (fest->hole)
ret = 0;
-
eof_lblk = fec->fec_map[fec->fec_extents -1].lblk +
fec->fec_map[fec->fec_extents -1].len;
if (eof_lblk / fest->frag > free_space_average)
ret = 0;
-
if (debug_flag & DBG_RT)
printf("%s ino:%ld frag:%d eof_blk:%lld free_space_aver:%d ret:%d\n",
__FUNCTION__, stat->st_ino, eof_lblk, fest->frag,
@@ -2055,7 +2094,7 @@ static int do_iaf_defrag_one(struct defrag_context *dfx, int dirfd, const char *
__u64 eof_lblk = fec->fec_map[fec->fec_extents -1].lblk +
fec->fec_map[fec->fec_extents -1].len;
- assert(fest->frag >= 2);
+ assert(fest->frag >= 2 || fest->nr_idx);
ret = 0;
/* Need to reopen file for RW */
@@ -2088,6 +2127,8 @@ static int do_iaf_defrag_one(struct defrag_context *dfx, int dirfd, const char *
* FIXME: This should be tunable
*/
force_local = eof_lblk < 4;
+ if (!fest->local_ex)
+ force_local = 0;
if (debug_flag & (DBG_SCAN|DBG_IAF)) {
int i;
@@ -2100,7 +2141,8 @@ static int do_iaf_defrag_one(struct defrag_context *dfx, int dirfd, const char *
fec->fec_map[i].pblk);
}
- ret = prepare_donor(dfx, ino_grp, &donor, eof_lblk, force_local, fest->frag / 2);
+ ret = prepare_donor(dfx, ino_grp, &donor, eof_lblk, force_local,
+ (fest->nr_idx + fest->frag) / 2);
if (ret) {
if (debug_flag & (DBG_SCAN|DBG_IAF))
fprintf(stderr, "%s: group:%u Can not allocate donor"
--
1.7.1
More information about the Devel
mailing list