[Devel] [PATCH RHEL8 COMMIT] mm/page_alloc: use sched_clock() instead of jiffies to measure latency

Mon May 24 20:15:12 MSK 2021

The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.32
------>
commit 824728f8e7fb02c6dd50c32fe00c833d132a61c5
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Mon May 24 20:04:14 2021 +0300

    mm/page_alloc: use sched_clock() instead of jiffies to measure latency
    
    sched_clock() (which is rdtsc() on x86) gives us more precise result
    than jiffies.
    
    Q: Why do we need greater accuracy?
    A: Because if we target to, say, 10000 IOPS (per cpu) then
       1 ms memory allocation latency is too much and we need
       to achieve less alloc latency and thus measure it.
    
    https://pmc.acronis.com/browse/VSTOR-19040
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    (cherry-picked from vz7 commit 99407f6d6f50 ("mm/page_alloc: use
    sched_clock() instead of jiffies to measure latency"))
    
    To_merge: 928833c25f22 ("core: Add glob_kstat, percpu kstat and account mm
    stat")
    and other patches.
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 mm/page_alloc.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d3222ac7e7ca..b5afa2acc15a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4556,9 +4556,11 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
 {
 #ifdef CONFIG_VE
 	unsigned long flags;
+	u64 current_clock, delta;
 	int ind, cpu;
 
-	time = jiffies_to_usecs(jiffies - time) * 1000;
+	current_clock = sched_clock();
+	delta = current_clock - time;
 	if (!(gfp_mask & __GFP_RECLAIM))
 		ind = KSTAT_ALLOCSTAT_ATOMIC;
 	else
@@ -4569,12 +4571,12 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
 
 	local_irq_save(flags);
 	cpu = smp_processor_id();
-	KSTAT_LAT_PCPU_ADD(&kstat_glob.alloc_lat[ind], time);
+	KSTAT_LAT_PCPU_ADD(&kstat_glob.alloc_lat[ind], delta);
 
 	if (in_task()) {
-		current->alloc_lat[ind].totlat += time;
+		current->alloc_lat[ind].totlat += delta;
 		current->alloc_lat[ind].count++;
-		update_maxlat(&current->alloc_lat[ind], time, jiffies);
+		update_maxlat(&current->alloc_lat[ind], delta, current_clock);
 	}
 
 	if (!page)
@@ -4594,7 +4596,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
 	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
 	struct alloc_context ac = { };
-	cycles_t start;
+	u64 start;
 
 	gfp_mask &= gfp_allowed_mask;
 	alloc_mask = gfp_mask;
@@ -4606,7 +4608,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 
 	finalise_ac(gfp_mask, &ac);
 
-	start = jiffies;
+	start = sched_clock();
 	/* First allocation attempt */
 	page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
 	if (likely(page))