[Devel] [PATCH RHEL7 COMMIT] ext4: Make cache hits/misses per-cpu counts
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Apr 13 19:22:22 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.46.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.46.5
------>
commit e92c4dde4105d1f583f5d83f5c29e08f21f00203
Author: Waiman Long <Waiman.Long at hpe.com>
Date: Fri Apr 13 19:22:22 2018 +0300
ext4: Make cache hits/misses per-cpu counts
This patch changes the es_stats_cache_hits and es_stats_cache_misses
statistics counts to percpu counters to reduce cacheline contention
issues whem multiple threads are trying to update those counts
simultaneously.
With a 38-threads fio I/O test with 2 shared files (on DAX-mount
NVDIMM) running on a 4-socket Haswell-EX server with 4.6-rc1 kernel,
the aggregated bandwidths before and after the patch were:
Test W/O patch With patch % change
---- --------- ---------- --------
Read-only 16499MB/s 17215MB/s +4.3%
Read-write 4361MB/s 4794MB/s +9.9%
Signed-off-by: Waiman Long <Waiman.Long at hpe.com>
The patch is not in mainstream yet, but was ack-ed already:
https://lkml.org/lkml/2016/4/29/584
This patch does not improve the performance of __es_shrink() itself,
but generally improves the fastpath.
Was found while digging:
https://jira.sw.ru/browse/PSBM-83335
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Acked-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
fs/ext4/extents_status.c | 38 +++++++++++++++++++++++++++++---------
fs/ext4/extents_status.h | 4 ++--
2 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 77f44d382aa5..a3b9c480ec20 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -769,6 +769,15 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
write_unlock(&EXT4_I(inode)->i_es_lock);
}
+/*
+ * For pure statistics count, use a large batch size to make sure that
+ * it does percpu update as much as possible.
+ */
+static inline void ext4_es_stats_inc(struct percpu_counter *fbc)
+{
+ __percpu_counter_add(fbc, 1, (1 << 30));
+}
+
/*
* ext4_es_lookup_extent() looks up an extent in extent status tree.
*
@@ -823,9 +832,9 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
- stats->es_stats_cache_hits++;
+ ext4_es_stats_inc(&stats->es_stats_cache_hits);
} else {
- stats->es_stats_cache_misses++;
+ ext4_es_stats_inc(&stats->es_stats_cache_misses);
}
read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -1122,9 +1131,9 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
- seq_printf(seq, " %lu/%lu cache hits/misses\n",
- es_stats->es_stats_cache_hits,
- es_stats->es_stats_cache_misses);
+ seq_printf(seq, " %lld/%lld cache hits/misses\n",
+ percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
+ percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
if (inode_cnt)
seq_printf(seq, " %d inodes on list\n", inode_cnt);
@@ -1188,8 +1197,6 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
sbi->s_es_nr_inode = 0;
spin_lock_init(&sbi->s_es_lock);
sbi->s_es_stats.es_stats_shrunk = 0;
- sbi->s_es_stats.es_stats_cache_hits = 0;
- sbi->s_es_stats.es_stats_cache_misses = 0;
sbi->s_es_stats.es_stats_scan_time = 0;
sbi->s_es_stats.es_stats_max_scan_time = 0;
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
@@ -1199,19 +1206,30 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
if (err)
goto err1;
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0, GFP_KERNEL);
+ if (err)
+ goto err2;
+
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0, GFP_KERNEL);
+ if (err)
+ goto err3;
+
sbi->s_es_shrinker.scan_objects = ext4_es_scan;
sbi->s_es_shrinker.count_objects = ext4_es_count;
sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
err = register_shrinker(&sbi->s_es_shrinker);
if (err)
- goto err2;
+ goto err4;
if (sbi->s_proc)
proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
&ext4_es_seq_shrinker_info_fops, sbi);
return 0;
-
+err4:
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
+err3:
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
err2:
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
err1:
@@ -1225,6 +1243,8 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
remove_proc_entry("es_shrinker_info", sbi->s_proc);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
unregister_shrinker(&sbi->s_es_shrinker);
}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index b0b78b95f481..ddebb0e255ca 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -66,10 +66,10 @@ struct ext4_es_tree {
struct ext4_es_stats {
unsigned long es_stats_shrunk;
- unsigned long es_stats_cache_hits;
- unsigned long es_stats_cache_misses;
u64 es_stats_scan_time;
u64 es_stats_max_scan_time;
+ struct percpu_counter es_stats_cache_hits;
+ struct percpu_counter es_stats_cache_misses;
struct percpu_counter es_stats_all_cnt;
struct percpu_counter es_stats_shk_cnt;
};
More information about the Devel
mailing list