[Devel] [PATCH RHEL7 COMMIT] mm/memcontrol: fix memory.high
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Jul 8 13:28:40 MSK 2019
The commit is pushed to "vz7.96.12" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.12
------>
commit 222af0e107e39792184bb516e793a583ab386fe7
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Mon Jul 8 13:28:38 2019 +0300
mm/memcontrol: fix memory.high
Our commit b607d3e1a953 ("ms/mm: memcontrol: fold mem_cgroup_do_charge()")
accidently loses piece of code making memory.high work. Bring it back.
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/memcontrol.h | 2 ++
include/linux/sched.h | 3 +++
include/linux/tracehook.h | 3 +++
mm/memcontrol.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 73 insertions(+)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aa8cef097055..265b5e350779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -138,6 +138,8 @@ extern void mem_cgroup_note_oom_kill(struct mem_cgroup *memcg,
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
+void mem_cgroup_handle_over_high(void);
+
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea08dfd17448..74e34bcd1e2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1824,6 +1824,9 @@ struct task_struct {
#endif
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
unsigned int memcg_kmem_skip_account;
+
+ /* Number of pages to reclaim on returning to userland: */
+ unsigned int memcg_nr_pages_over_high;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1e98b5530425..f4e830954674 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -47,6 +47,7 @@
#define _LINUX_TRACEHOOK_H 1
#include <linux/sched.h>
+#include <linux/memcontrol.h>
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/task_work.h>
@@ -194,6 +195,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
smp_mb__after_clear_bit();
if (unlikely(current->task_works))
task_work_run();
+
+ mem_cgroup_handle_over_high();
}
#endif /* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d11aa24ed207..0e6911b2dcc3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -55,6 +55,7 @@
#include <linux/oom.h>
#include <linux/virtinfo.h>
#include <linux/migrate.h>
+#include <linux/tracehook.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -311,6 +312,7 @@ struct mem_cgroup {
/* vmpressure notifications */
struct vmpressure vmpressure;
+ struct work_struct high_work;
/*
* the counter to account for kernel memory usage.
@@ -2996,6 +2998,44 @@ static bool kmem_reclaim_is_low(struct mem_cgroup *memcg)
return dcache_is_low(memcg);
}
+static void reclaim_high(struct mem_cgroup *memcg,
+ unsigned int nr_pages,
+ gfp_t gfp_mask)
+{
+ do {
+ if (page_counter_read(&memcg->memory) <= memcg->high)
+ continue;
+
+ try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 0);
+ } while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+ struct mem_cgroup *memcg;
+
+ memcg = container_of(work, struct mem_cgroup, high_work);
+ reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+ unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ struct mem_cgroup *memcg;
+
+ if (likely(!nr_pages))
+ return;
+
+ memcg = get_mem_cgroup_from_mm(current->mm);
+ reclaim_high(memcg, nr_pages, GFP_KERNEL);
+ css_put(&memcg->css);
+ current->memcg_nr_pages_over_high = 0;
+}
+
/**
* mem_cgroup_try_charge - try charging a memcg
* @memcg: memcg to charge
@@ -3195,6 +3235,28 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool kmem_charge
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
done:
+ /*
+ * If the hierarchy is above the normal consumption range, schedule
+ * reclaim on returning to userland. We can perform reclaim here
+ * if __GFP_RECLAIM but let's always punt for simplicity and so that
+ * GFP_KERNEL can consistently be used during reclaim. @memcg is
+ * not recorded as it most likely matches current's and won't
+ * change in the meantime. As high limit is checked again before
+ * reclaim, the cost of mismatch is negligible.
+ */
+ do {
+ if (page_counter_read(&memcg->memory) > memcg->high) {
+ /* Don't bother a random interrupted task */
+ if (in_interrupt()) {
+ schedule_work(&memcg->high_work);
+ break;
+ }
+ current->memcg_nr_pages_over_high += batch;
+ set_notify_resume(current);
+ break;
+ }
+ } while ((memcg = parent_mem_cgroup(memcg)));
+
return 0;
}
@@ -6435,6 +6497,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
memcg->last_scanned_node = MAX_NUMNODES;
INIT_LIST_HEAD(&memcg->oom_notify);
memcg->move_charge_at_immigrate = 0;
+ INIT_WORK(&memcg->high_work, high_work_func);
mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
vmpressure_init(&memcg->vmpressure);
@@ -6625,6 +6688,8 @@ static void mem_cgroup_css_free(struct cgroup *cont)
mem_cgroup_reparent_charges(memcg);
vmpressure_cleanup(&memcg->vmpressure);
+ cancel_work_sync(&memcg->high_work);
+
memcg_destroy_kmem(memcg);
memcg_free_shrinker_maps(memcg);
__mem_cgroup_free(memcg);
More information about the Devel
mailing list