[Devel] [PATCH 2/6] e4defrag2: [TP case] force defrag for very low populated clusters
Dmitry Monakhov
dmonakhov at openvz.org
Mon May 16 04:53:32 PDT 2016
If cluster has small numbers of blocks used it is reasonable to
relocate such blocks regardless to inode's quality and free whole cluster.
https://jira.sw.ru/browse/PSBM-46563
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
misc/e4defrag2.c | 54 +++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/misc/e4defrag2.c b/misc/e4defrag2.c
index 797a342..9206c89 100644
--- a/misc/e4defrag2.c
+++ b/misc/e4defrag2.c
@@ -279,6 +279,7 @@ enum spext_flags
SP_FL_DIRLOCAL = 0x20,
SP_FL_CSUM = 0x40,
SP_FL_FMAP = 0x80,
+ SP_FL_TP_RELOC = 0x100,
};
struct rb_fhandle
@@ -383,6 +384,7 @@ struct defrag_context
unsigned cluster_size;
unsigned ief_reloc_cluster;
unsigned weight_scale;
+ unsigned tp_weight_scale;
unsigned extents_quality;
};
@@ -1098,6 +1100,7 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd,
int is_old = 0;
int is_rdonly = 0;
__u64 ief_blocks = 0;
+ __u64 tp_blocks = 0;
__u32 ino_flags = 0;
__u64 size_blk = dfx_sz2b(dfx, stat->st_size);
__u64 used_blk = dfx_sz2b(dfx, stat->st_blocks << 9);
@@ -1158,13 +1161,16 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd,
}
if (se->flags & SP_FL_IEF_RELOC)
ief_blocks += fec->fec_map[i].len;
+ if (se->flags & SP_FL_TP_RELOC)
+ tp_blocks += fec->fec_map[i].len;
+
fmap_csum_ext(fec->fec_map + i, &csum);
}
if (fest.local_ex == fec->fec_extents)
ino_flags |= SP_FL_LOCAL;
- if (ief_blocks) {
+ if (ief_blocks || tp_blocks) {
/*
* Even if some extents belong to IEF cluster, it is not a good
* idea to relocate the whole file. From other point of view,
@@ -1182,6 +1188,13 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd,
"size_blk:%lld used_blk:%lld\n",
__func__, stat->st_ino, ief_blocks,
size_blk, used_blk);
+ } else if (tp_blocks * 4 > size_blk) {
+ ino_flags |= SP_FL_IEF_RELOC | SP_FL_TP_RELOC;
+ if (debug_flag & DBG_SCAN && ief_blocks != size_blk)
+ printf("%s Force add %lu to IEF/TP set ief:%lld "
+ "size_blk:%lld used_blk:%lld\n",
+ __func__, stat->st_ino, ief_blocks,
+ size_blk, used_blk);
} else if (debug_flag & DBG_SCAN) {
printf("%s Reject %lu from IEF set ief:%lld "
"size_blk:%lld used_blk:%lld\n",
@@ -1592,6 +1605,7 @@ static void pass3_prep(struct defrag_context *dfx)
unsigned good = 0;
unsigned count = 0;
unsigned ief_ok = 0;
+ unsigned force_reloc = 0;
if (verbose)
printf("Pass3_prep: Scan and rate cached extents\n");
@@ -1610,18 +1624,29 @@ static void pass3_prep(struct defrag_context *dfx)
print_spex("\t\t\t", ex);
if (prev_cluster != cluster) {
- ief_ok = 0;
+ force_reloc = ief_ok = 0;
+ /* Is cluster has enough RO(good) data blocks ?*/
if (dfx->cluster_size >= used * dfx->weight_scale &&
- good * 1000 >= count * dfx->extents_quality &&
- cluster_node) {
+ good * 1000 >= count * dfx->extents_quality)
+ ief_ok = 1;
+
+ /* Thin provision corner case: If cluster has low number
+ * of data blocks it should be relocated regardless to
+ * block's quality in order to improve space efficency */
+ if (dfx->cluster_size >= used * dfx->tp_weight_scale) {
+ ief_ok = 1;
+ force_reloc = 1;
+ }
+
+ if (ief_ok && cluster_node) {
while (cluster_node != node) {
struct spextent *se =
node_to_spextent(cluster_node);
- ief_ok = 1;
se->flags |= SP_FL_IEF_RELOC;
+ if (force_reloc)
+ se->flags |= SP_FL_TP_RELOC;
if (debug_flag & DBG_TREE)
print_spex("\t\t\t->IEF", se);
-
ext_to_move++;
blocks_to_move += se->count;
cluster_node =
@@ -2010,7 +2035,7 @@ static int do_iaf_defrag_one(struct defrag_context *dfx, int dirfd, const char *
}
if (st2.st_ino != stat->st_ino) {
- if (debug_flag & DBG_RT)
+ if (debug_flag & DBG_RT)
fprintf(stderr, "%s: Race while reopen\n", __func__);
goto out_fd;
}
@@ -2183,6 +2208,7 @@ static int ief_defrag_group(struct defrag_context *dfx, dgrp_t idx)
struct donor_info donor;
struct group_info * group = dfx->group[idx];
__u64 blocks;
+ int force_local = 0;
/*
* Prepare stage
* Walk inodes in block order in order to warm up the page cache
@@ -2278,6 +2304,7 @@ static int ief_defrag_group(struct defrag_context *dfx, dgrp_t idx)
donor.offset = 0;
next_cluster:
blocks = 0;
+ force_local = dfx->ief_force_local;
/* Divide inodes in to reallocation clusters */
for (rfh = group->next; rfh != NULL; rfh = rfh->next) {
assert(!(rfh->flags & SP_FL_IGNORE));
@@ -2286,6 +2313,8 @@ next_cluster:
break;
blocks += rfh->fec->fec_map[rfh->fec->fec_extents -1].lblk +
rfh->fec->fec_map[rfh->fec->fec_extents -1].len;
+ if (rfh->flags & SP_FL_TP_RELOC)
+ force_local = 0;
}
prev = rfh;
@@ -2293,7 +2322,7 @@ next_cluster:
if (!blocks)
return 0;
- ret = prepare_donor(dfx, idx, &donor, blocks, dfx->ief_force_local, 2);
+ ret = prepare_donor(dfx, idx, &donor, blocks, force_local, 2);
if (ret) {
if (debug_flag & DBG_SCAN)
fprintf(stderr, "%s group:%u Can not allocate donor"
@@ -2415,6 +2444,7 @@ static void usage(void)
fprintf(stderr, "\t-m: dump fs and memory statistics at the end\n");
fprintf(stderr, "\t-n: dry run\n");
fprintf(stderr, "\t-s: scale factor\n");
+ fprintf(stderr, "\t-S: thin provision scale factor\n");
fprintf(stderr, "\t-q: defragmentation quality factor\n");
fprintf(stderr, "\t-t: interpret inodes modified earlier than N seconds ago as RO files\n");
fprintf(stderr, "\t-T: same as '-t' but use an absolute value\n");
@@ -2432,6 +2462,7 @@ int main(int argc, char *argv[])
int cluster_size = 1 << 20;
int reloc_cluster_size = 0;
int scale = 2;
+ int tp_scale = 32; /* 1/32 ==> 3% */
int quality = 700;
dgrp_t nr_grp;
int flex_bg = 0;
@@ -2440,7 +2471,7 @@ int main(int argc, char *argv[])
add_error_table(&et_ext2_error_table);
gettimeofday(&time_start, 0);
- while ((c = getopt(argc, argv, "a:C:c:d:fF:hlmnt:s:T:vq:")) != EOF) {
+ while ((c = getopt(argc, argv, "a:C:c:d:fF:hlmnt:s:S:T:vq:")) != EOF) {
switch (c) {
case 'a':
min_frag_size = strtoul(optarg, &end, 0);
@@ -2486,6 +2517,10 @@ int main(int argc, char *argv[])
scale = strtoul(optarg, &end, 0);
break;
+ case 'S':
+ tp_scale = strtoul(optarg, &end, 0);
+ break;
+
case 'q':
quality = strtoul(optarg, &end, 0);
if (quality > 1000)
@@ -2557,6 +2592,7 @@ int main(int argc, char *argv[])
dfx.iaf_cluster_size = min_frag_size >> dfx.blocksize_bits;
dfx.weight_scale = scale;
+ dfx.tp_weight_scale = tp_scale;
dfx.extents_quality = quality;
dfx.ro_fs = dry_run;
dfx.sp_root = RB_ROOT;
--
1.7.1
More information about the Devel
mailing list