[Devel] [PATCH VZ9] mm/memcontrol: Add page cache limit to cgroup-v2

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Fri Mar 22 07:58:10 MSK 2024


The interface is slightly reworked to be more v2 like:

- rename memory.cache.limit/usage_in_bytes -> memory.cache.max/current
- show "max" when uninitialized and allow to write it
- memcg_max_mutex with page_counter_set_max replaced with simple xchg
- we set limit first before looping and then try to enforce it if
  needed, no more enforce before setting logic
- retry reclaim couple of times if it fails to enforce the limit and
  then just give up (memory_max_write triggers oom in this case, but we
  probably do not want to trigger oom due to cache limit)

https://virtuozzo.atlassian.net/browse/PSBM-154207
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

Feature: mm: Memory cgroup page cache limit
---
 mm/memcontrol.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9000dc00ed03..28a39b50cbe1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -8810,3 +8810,78 @@ static int __init mem_cgroup_swap_init(void)
 subsys_initcall(mem_cgroup_swap_init);
 
 #endif /* CONFIG_SWAP */
+
+static int cache_max_show(struct seq_file *m, void *v)
+{
+	return seq_puts_memcg_tunable(m,
+		READ_ONCE(mem_cgroup_from_seq(m)->cache.max));
+}
+
+static ssize_t cache_max_write(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int nr_reclaims = MAX_RECLAIM_RETRIES;
+	unsigned long max;
+	int err;
+
+	buf = strstrip(buf);
+	err = page_counter_memparse(buf, "max", &max);
+	if (err)
+		return err;
+
+	xchg(&memcg->cache.max, max);
+
+	for (;;) {
+		unsigned long nr_cache = page_counter_read(&memcg->cache);
+
+		if (nr_cache <= max)
+			break;
+
+		if (signal_pending(current))
+			break;
+
+		if (!nr_reclaims)
+			break;
+
+		if (!try_to_free_mem_cgroup_pages(memcg, nr_cache - max,
+		    GFP_KERNEL, false))
+			nr_reclaims--;
+	}
+
+	memcg_wb_domain_size_changed(memcg);
+	return nbytes;
+}
+
+static u64 cache_current_read(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return (u64)page_counter_read(&memcg->cache) * PAGE_SIZE;
+}
+
+static struct cftype cache_files[] = {
+	{
+		.name = "cache.max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cache_max_show,
+		.write = cache_max_write,
+	},
+	{
+		.name = "cache.current",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cache_current_read,
+	},
+	{ }	/* terminate */
+};
+
+static int __init mem_cgroup_cache_init(void)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+
+	WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, cache_files));
+	return 0;
+}
+subsys_initcall(mem_cgroup_cache_init);
-- 
2.43.0



More information about the Devel mailing list