[Devel] [PATCH rh7 1/2] mm/memcontrol: fix memory.high

Andrey Ryabinin aryabinin at virtuozzo.com
Fri Jul 5 17:17:48 MSK 2019


Our commit b607d3e1a953 ("ms/mm: memcontrol: fold mem_cgroup_do_charge()")
accidently loses piece of code making memory.high work. bring it back.

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/sched.h      |  3 ++
 include/linux/tracehook.h  |  3 ++
 mm/memcontrol.c            | 65 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aa8cef097055..265b5e350779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -138,6 +138,8 @@ extern void mem_cgroup_note_oom_kill(struct mem_cgroup *memcg,
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
 
+void mem_cgroup_handle_over_high(void);
+
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea08dfd17448..74e34bcd1e2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1824,6 +1824,9 @@ struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
 	unsigned int memcg_kmem_skip_account;
+
+	/* Number of pages to reclaim on returning to userland: */
+	unsigned int			memcg_nr_pages_over_high;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1e98b5530425..f4e830954674 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -47,6 +47,7 @@
 #define _LINUX_TRACEHOOK_H	1
 
 #include <linux/sched.h>
+#include <linux/memcontrol.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/task_work.h>
@@ -194,6 +195,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 	smp_mb__after_clear_bit();
 	if (unlikely(current->task_works))
 		task_work_run();
+
+	mem_cgroup_handle_over_high();
 }
 
 #endif	/* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e123323f1044..cc8cf887c205 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -55,6 +55,7 @@
 #include <linux/oom.h>
 #include <linux/virtinfo.h>
 #include <linux/migrate.h>
+#include <linux/tracehook.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -311,6 +312,7 @@ struct mem_cgroup {
 
 	/* vmpressure notifications */
 	struct vmpressure vmpressure;
+	struct work_struct high_work;
 
 	/*
 	 * the counter to account for kernel memory usage.
@@ -2996,6 +2998,44 @@ static bool kmem_reclaim_is_low(struct mem_cgroup *memcg)
 	return dcache_is_low(memcg);
 }
 
+static void reclaim_high(struct mem_cgroup *memcg,
+			 unsigned int nr_pages,
+			 gfp_t gfp_mask)
+{
+	do {
+		if (page_counter_read(&memcg->memory) <= memcg->high)
+			continue;
+
+		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 0);
+	} while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, high_work);
+	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+	unsigned int nr_pages = current->memcg_nr_pages_over_high;
+	struct mem_cgroup *memcg;
+
+	if (likely(!nr_pages))
+		return;
+
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	reclaim_high(memcg, nr_pages, GFP_KERNEL);
+	css_put(&memcg->css);
+	current->memcg_nr_pages_over_high = 0;
+}
+
 /**
  * mem_cgroup_try_charge - try charging a memcg
  * @memcg: memcg to charge
@@ -3195,6 +3235,28 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool kmem_charge
 	if (batch > nr_pages)
 		refill_stock(memcg, batch - nr_pages);
 done:
+	/*
+	 * If the hierarchy is above the normal consumption range, schedule
+	 * reclaim on returning to userland.  We can perform reclaim here
+	 * if __GFP_RECLAIM but let's always punt for simplicity and so that
+	 * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+	 * not recorded as it most likely matches current's and won't
+	 * change in the meantime.  As high limit is checked again before
+	 * reclaim, the cost of mismatch is negligible.
+	 */
+	do {
+		if (page_counter_read(&memcg->memory) > memcg->high) {
+			/* Don't bother a random interrupted task */
+			if (in_interrupt()) {
+				schedule_work(&memcg->high_work);
+				break;
+			}
+			current->memcg_nr_pages_over_high += batch;
+			set_notify_resume(current);
+			break;
+		}
+	} while ((memcg = parent_mem_cgroup(memcg)));
+
 	return 0;
 }
 
@@ -6445,6 +6507,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	memcg->move_charge_at_immigrate = 0;
+	INIT_WORK(&memcg->high_work, high_work_func);
 	mutex_init(&memcg->thresholds_lock);
 	spin_lock_init(&memcg->move_lock);
 	vmpressure_init(&memcg->vmpressure);
@@ -6635,6 +6698,8 @@ static void mem_cgroup_css_free(struct cgroup *cont)
 	mem_cgroup_reparent_charges(memcg);
 
 	vmpressure_cleanup(&memcg->vmpressure);
+	cancel_work_sync(&memcg->high_work);
+
 	memcg_destroy_kmem(memcg);
 	memcg_free_shrinker_maps(memcg);
 	__mem_cgroup_free(memcg);
-- 
2.21.0



More information about the Devel mailing list