[Devel] [PATCH vz9 v3] mm: per memory cgroup page cache limit

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Jan 20 15:39:32 MSK 2023


From: Andrey Ryabinin <ryabinin.a.a at gmail.com>

Forward port feature: mm: per memory cgroup page cache limit.

The original implementation consisted of these commits:
commit 758d52e33a67 ("configs: Enable CONFIG_PAGE_EXTENSION")
commit 741beaa93c89 ("mm: introduce page vz extension (using page_ext)")
commit d42d3c8b849d ("mm/memcg: limit page cache in memcg hack")

This port drops the page vz extensions in favor of using a memcg_data
bit to mark a page as cache. The benefit is that the implementation
and porting got more simple. If we require new flags then the newly
introduced folio can be used.

Feature exposes two files to set limit and to check usage at
/sys/fs/cgroup/memory/pagecache_limiter/memory.cache.limit_in_bytes
/sys/fs/cgroup/memory/pagecache_limiter/memory.cache.usage_in_bytes
and is enabled via /sys/fs/cgroup/memory/pagecache_limiter/tasks.

https://jira.sw.ru/browse/PSBM-144609
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
Signed-off-by: Andrey Ryabinin <ryabinin.a.a at gmail.com>
---
 include/linux/memcontrol.h |  29 ++++-
 mm/filemap.c               |   3 +-
 mm/memcontrol.c            | 219 ++++++++++++++++++++++++++++++-------
 3 files changed, 207 insertions(+), 44 deletions(-)

v1->v2: addressing Pavel's comments for v1
        - fixed compilation without MEMCG
        - try to preserve author
        - fixed line alignment
        - add missed bug traps and WARN_ONs
        - fixed spelling error
v2->v3:
	- fixed author and commit message
	- added missing spaces


diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 561db06f1fd8..6ba36eb051b8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -273,6 +273,7 @@ struct mem_cgroup {
 	/* Legacy consumer-oriented counters */
 	struct page_counter kmem;		/* v1 only */
 	struct page_counter tcpmem;		/* v1 only */
+	struct page_counter cache;
 
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
@@ -405,8 +406,10 @@ enum page_memcg_data_flags {
 	MEMCG_DATA_OBJCGS = (1UL << 0),
 	/* page has been accounted as a non-slab kernel page */
 	MEMCG_DATA_KMEM = (1UL << 1),
+	/* page has been accounted as a cache page */
+	MEMCG_DATA_PGCACHE = (1UL << 2),
 	/* the next bit after the last actual flag */
-	__NR_MEMCG_DATA_FLAGS  = (1UL << 2),
+	__NR_MEMCG_DATA_FLAGS  = (1UL << 3),
 };
 
 #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
@@ -771,11 +774,25 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
 static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 				    gfp_t gfp)
 {
-	if (mem_cgroup_disabled())
-		return 0;
 	return __mem_cgroup_charge(folio, mm, gfp);
 }
 
+int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm,
+			    gfp_t gfp);
+
+/*
+ * folio_memcg_cache - Check if the folio has the pgcache flag set.
+ * @folio: Pointer to the folio.
+ *
+ * Checks if the folio has page cache flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
+ */
+static inline bool folio_memcg_cache(struct folio *folio)
+{
+	return folio->memcg_data & MEMCG_DATA_PGCACHE;
+}
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -1339,6 +1356,12 @@ static inline int mem_cgroup_charge(struct folio *folio,
 	return 0;
 }
 
+static inline int mem_cgroup_charge_cache(struct folio *folio,
+					 struct mm_struct *mm, gfp_t gfp)
+{
+	return 0;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d63e53980e4..d568ffc0d416 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -841,7 +841,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 	mapping_set_update(&xas, mapping);
 
 	if (!huge) {
-		int error = mem_cgroup_charge(folio, NULL, gfp);
+		int error = mem_cgroup_charge_cache(folio, NULL, gfp);
+
 		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
 		if (error)
 			return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6fa13539f3e5..025e38242681 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -218,6 +218,7 @@ enum res_type {
 	_OOM_TYPE,
 	_KMEM,
 	_TCP,
+	_CACHE,
 };
 
 #define MEMFILE_PRIVATE(x, val)	((x) << 16 | (val))
@@ -2207,6 +2208,7 @@ struct memcg_stock_pcp {
 	int nr_slab_unreclaimable_b;
 #endif
 
+	unsigned int cache_nr_pages;
 	struct work_struct work;
 	unsigned long flags;
 #define FLUSHING_CACHED_CHARGE	0
@@ -2248,7 +2250,8 @@ static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
  *
  * returns true if successful, false otherwise.
  */
-static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			  bool cache)
 {
 	struct memcg_stock_pcp *stock;
 	unsigned long flags;
@@ -2260,9 +2263,16 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
-		stock->nr_pages -= nr_pages;
-		ret = true;
+	if (memcg == stock->cached) {
+		if (cache && stock->cache_nr_pages >= nr_pages) {
+			stock->cache_nr_pages -= nr_pages;
+			ret = true;
+		}
+
+		if (!cache && stock->nr_pages >= nr_pages) {
+			stock->nr_pages -= nr_pages;
+			ret = true;
+		}
 	}
 
 	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
@@ -2276,15 +2286,20 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 static void drain_stock(struct memcg_stock_pcp *stock)
 {
 	struct mem_cgroup *old = stock->cached;
+	unsigned long nr_pages = stock->nr_pages + stock->cache_nr_pages;
 
 	if (!old)
 		return;
 
-	if (stock->nr_pages) {
-		page_counter_uncharge(&old->memory, stock->nr_pages);
+	if (stock->cache_nr_pages)
+		page_counter_uncharge(&old->cache, stock->cache_nr_pages);
+
+	if (nr_pages) {
+		page_counter_uncharge(&old->memory, nr_pages);
 		if (do_memsw_account())
-			page_counter_uncharge(&old->memsw, stock->nr_pages);
+			page_counter_uncharge(&old->memsw, nr_pages);
 		stock->nr_pages = 0;
+		stock->cache_nr_pages = 0;
 	}
 
 	css_put(&old->css);
@@ -2318,9 +2333,11 @@ static void drain_local_stock(struct work_struct *dummy)
  * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
-static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			  bool cache)
 {
 	struct memcg_stock_pcp *stock;
+	unsigned long stock_nr_pages;
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached != memcg) { /* reset if necessary */
@@ -2328,18 +2345,23 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 		css_get(&memcg->css);
 		stock->cached = memcg;
 	}
-	stock->nr_pages += nr_pages;
+	if (!cache)
+		stock->nr_pages += nr_pages;
+	else
+		stock->cache_nr_pages += nr_pages;
 
-	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+	stock_nr_pages = stock->nr_pages + stock->cache_nr_pages;
+	if (stock_nr_pages > MEMCG_CHARGE_BATCH)
 		drain_stock(stock);
 }
 
-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			bool cache)
 {
 	unsigned long flags;
 
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
-	__refill_stock(memcg, nr_pages);
+	__refill_stock(memcg, nr_pages, cache);
 	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
 }
 
@@ -2366,10 +2388,12 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
 		struct mem_cgroup *memcg;
 		bool flush = false;
+		unsigned long nr_pages = stock->nr_pages +
+					 stock->cache_nr_pages;
 
 		rcu_read_lock();
 		memcg = stock->cached;
-		if (memcg && stock->nr_pages &&
+		if (memcg && nr_pages &&
 		    mem_cgroup_is_descendant(memcg, root_memcg))
 			flush = true;
 		else if (obj_stock_flush_required(stock, root_memcg))
@@ -2406,17 +2430,27 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
 
 	do {
 		unsigned long pflags;
+		long cache_overused;
 
-		if (page_counter_read(&memcg->memory) <=
-		    READ_ONCE(memcg->memory.high))
-			continue;
+		if (page_counter_read(&memcg->memory) >
+		    READ_ONCE(memcg->memory.high)) {
+			memcg_memory_event(memcg, MEMCG_HIGH);
+
+			psi_memstall_enter(&pflags);
+			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
+					nr_pages, gfp_mask, true);
+			psi_memstall_leave(&pflags);
+		}
 
-		memcg_memory_event(memcg, MEMCG_HIGH);
+		cache_overused = page_counter_read(&memcg->cache) -
+				 memcg->cache.max;
 
-		psi_memstall_enter(&pflags);
-		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
-							     gfp_mask, true);
-		psi_memstall_leave(&pflags);
+		if (cache_overused > 0) {
+			psi_memstall_enter(&pflags);
+			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
+					cache_overused, gfp_mask, false);
+			psi_memstall_leave(&pflags);
+		}
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
 
@@ -2652,7 +2686,7 @@ void mem_cgroup_handle_over_high(void)
 }
 
 static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
-			unsigned int nr_pages)
+			    unsigned int nr_pages, bool cache_charge)
 {
 	unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
 	int nr_retries = MAX_RECLAIM_RETRIES;
@@ -2666,8 +2700,8 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	unsigned long pflags;
 
 retry:
-	if (consume_stock(memcg, nr_pages))
-		return 0;
+	if (consume_stock(memcg, nr_pages, cache_charge))
+		goto done;
 
 	if (!do_memsw_account() ||
 	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
@@ -2780,13 +2814,19 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	page_counter_charge(&memcg->memory, nr_pages);
 	if (do_memsw_account())
 		page_counter_charge(&memcg->memsw, nr_pages);
+	if (cache_charge)
+		page_counter_charge(&memcg->cache, nr_pages);
 
 	return 0;
 
 done_restock:
+	if (cache_charge)
+		page_counter_charge(&memcg->cache, batch);
+
 	if (batch > nr_pages)
-		refill_stock(memcg, batch - nr_pages);
+		refill_stock(memcg, batch - nr_pages, cache_charge);
 
+done:
 	/*
 	 * If the hierarchy is above the normal consumption range, schedule
 	 * reclaim on returning to userland.  We can perform reclaim here
@@ -2826,6 +2866,9 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
+		} else if (page_counter_read(&memcg->cache) > memcg->cache.max) {
+			if (!work_pending(&memcg->high_work))
+				schedule_work(&memcg->high_work);
 		}
 	} while ((memcg = parent_mem_cgroup(memcg)));
 
@@ -2833,12 +2876,12 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 }
 
 static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-			     unsigned int nr_pages)
+			     unsigned int nr_pages, bool cache_charge)
 {
 	if (mem_cgroup_is_root(memcg))
 		return 0;
 
-	return try_charge_memcg(memcg, gfp_mask, nr_pages);
+	return try_charge_memcg(memcg, gfp_mask, nr_pages, cache_charge);
 }
 
 static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -3024,7 +3067,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
 	memcg = get_mem_cgroup_from_objcg(objcg);
 
 	memcg_account_kmem(memcg, -nr_pages);
-	refill_stock(memcg, nr_pages);
+	refill_stock(memcg, nr_pages, false);
 
 	css_put(&memcg->css);
 }
@@ -3045,7 +3088,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
 
 	memcg = get_mem_cgroup_from_objcg(objcg);
 
-	ret = try_charge_memcg(memcg, gfp, nr_pages);
+	ret = try_charge_memcg(memcg, gfp, nr_pages, false);
 	if (ret)
 		goto out;
 
@@ -3204,7 +3247,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 			memcg = get_mem_cgroup_from_objcg(old);
 
 			memcg_account_kmem(memcg, -nr_pages);
-			__refill_stock(memcg, nr_pages);
+			__refill_stock(memcg, nr_pages, false);
 
 			css_put(&memcg->css);
 		}
@@ -3352,7 +3395,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 {
 	int ret = 0;
 
-	ret = try_charge(memcg, gfp, nr_pages);
+	ret = try_charge(memcg, gfp, nr_pages, false);
 	if (!ret)
 		page_counter_charge(&memcg->kmem, nr_pages);
 
@@ -3711,6 +3754,9 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 	case _TCP:
 		counter = &memcg->tcpmem;
 		break;
+	case _CACHE:
+		counter = &memcg->cache;
+		break;
 	default:
 		BUG();
 	}
@@ -3829,6 +3875,43 @@ static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
 	return ret;
 }
 
+static int memcg_update_cache_max(struct mem_cgroup *memcg,
+				 unsigned long limit)
+{
+	unsigned long nr_pages;
+	bool enlarge = false;
+	int ret;
+
+	do {
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+		mutex_lock(&memcg_max_mutex);
+
+		if (limit > memcg->cache.max)
+			enlarge = true;
+
+		ret = page_counter_set_max(&memcg->cache, limit);
+		mutex_unlock(&memcg_max_mutex);
+
+		if (!ret)
+			break;
+
+		nr_pages = max_t(long, 1, page_counter_read(&memcg->cache) - limit);
+		if (!try_to_free_mem_cgroup_pages(memcg, nr_pages,
+						GFP_KERNEL, false)) {
+			ret = -EBUSY;
+			break;
+		}
+	} while (1);
+
+	if (!ret && enlarge)
+		memcg_oom_recover(memcg);
+
+	return ret;
+}
+
 /*
  * The user of this function is...
  * RES_LIMIT.
@@ -3865,6 +3948,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
 		case _TCP:
 			ret = memcg_update_tcp_max(memcg, nr_pages);
 			break;
+		case _CACHE:
+			ret = memcg_update_cache_max(memcg, nr_pages);
+			break;
 		}
 		break;
 	case RES_SOFT_LIMIT:
@@ -3898,6 +3984,9 @@ static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
 	case _TCP:
 		counter = &memcg->tcpmem;
 		break;
+	case _CACHE:
+		counter = &memcg->cache;
+		break;
 	default:
 		BUG();
 	}
@@ -5541,6 +5630,17 @@ static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "pressure_level",
 	},
+	{
+		.name = "cache.limit_in_bytes",
+		.private = MEMFILE_PRIVATE(_CACHE, RES_LIMIT),
+		.write = mem_cgroup_write,
+		.read_u64 = mem_cgroup_read_u64,
+	},
+	{
+		.name = "cache.usage_in_bytes",
+		.private = MEMFILE_PRIVATE(_CACHE, RES_USAGE),
+		.read_u64 = mem_cgroup_read_u64,
+	},
 #ifdef CONFIG_NUMA
 	{
 		.name = "numa_stat",
@@ -5825,11 +5925,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->swap, &parent->swap);
 		page_counter_init(&memcg->kmem, &parent->kmem);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
+		page_counter_init(&memcg->cache, &parent->cache);
 	} else {
 		page_counter_init(&memcg->memory, NULL);
 		page_counter_init(&memcg->swap, NULL);
 		page_counter_init(&memcg->kmem, NULL);
 		page_counter_init(&memcg->tcpmem, NULL);
+		page_counter_init(&memcg->cache, NULL);
 
 		root_mem_cgroup = memcg;
 		return &memcg->css;
@@ -5950,6 +6052,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
+	page_counter_set_max(&memcg->cache, PAGE_COUNTER_MAX);
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
@@ -6051,7 +6154,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
 	int ret;
 
 	/* Try a single bulk charge without reclaim first, kswapd may wake */
-	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count,
+			 false);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
@@ -6059,7 +6163,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 
 	/* Try charges one by one with reclaim, but do not retry */
 	while (count--) {
-		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
+		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1, false);
 		if (ret)
 			return ret;
 		mc.precharge++;
@@ -7285,18 +7389,27 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 }
 
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
-			gfp_t gfp)
+			gfp_t gfp, bool cache_charge)
 {
 	long nr_pages = folio_nr_pages(folio);
 	int ret;
 
-	ret = try_charge(memcg, gfp, nr_pages);
+	ret = try_charge(memcg, gfp, nr_pages, cache_charge);
 	if (ret)
 		goto out;
 
 	css_get(&memcg->css);
 	commit_charge(folio, memcg);
 
+	/*
+	 * We always cleanup this flag on uncharging, it means
+	 * that during charging we shouldn't have this flag set
+	 */
+
+	VM_BUG_ON(folio_memcg_cache(folio));
+	if (cache_charge)
+		WRITE_ONCE(folio->memcg_data,
+			READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
 	local_irq_disable();
 	mem_cgroup_charge_statistics(memcg, nr_pages);
 	memcg_check_events(memcg, folio_nid(folio));
@@ -7305,18 +7418,32 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 	return ret;
 }
 
-int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+static int __mem_cgroup_charge_gen(struct folio *folio, struct mm_struct *mm,
+					gfp_t gfp_mask, bool cache_charge)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
+	if (mem_cgroup_disabled())
+		return 0;
+
 	memcg = get_mem_cgroup_from_mm(mm);
-	ret = charge_memcg(folio, memcg, gfp);
+	ret = charge_memcg(folio, memcg, gfp_mask, cache_charge);
 	css_put(&memcg->css);
 
 	return ret;
 }
 
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+{
+	return __mem_cgroup_charge_gen(folio, mm, gfp, false);
+}
+
+int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+{
+	return __mem_cgroup_charge_gen(folio, mm, gfp, true);
+}
+
 /**
  * mem_cgroup_swapin_charge_page - charge a newly allocated page for swapin
  * @page: page to charge
@@ -7347,7 +7474,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 		memcg = get_mem_cgroup_from_mm(mm);
 	rcu_read_unlock();
 
-	ret = charge_memcg(folio, memcg, gfp);
+	ret = charge_memcg(folio, memcg, gfp, false);
 
 	css_put(&memcg->css);
 	return ret;
@@ -7391,6 +7518,7 @@ struct uncharge_gather {
 	unsigned long nr_memory;
 	unsigned long pgpgout;
 	unsigned long nr_kmem;
+	unsigned long nr_pgcache;
 	int nid;
 };
 
@@ -7409,6 +7537,9 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 			page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
 		if (ug->nr_kmem)
 			memcg_account_kmem(ug->memcg, -ug->nr_kmem);
+		if (ug->nr_pgcache)
+			page_counter_uncharge(&ug->memcg->cache, ug->nr_pgcache);
+
 		memcg_oom_recover(ug->memcg);
 	}
 
@@ -7470,6 +7601,8 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 		folio->memcg_data = 0;
 		obj_cgroup_put(objcg);
 	} else {
+		if (folio_memcg_cache(folio))
+			ug->nr_pgcache += nr_pages;
 		/* LRU pages aren't accounted at the root level */
 		if (!mem_cgroup_is_root(memcg))
 			ug->nr_memory += nr_pages;
@@ -7553,6 +7686,12 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
 			page_counter_charge(&memcg->memsw, nr_pages);
 	}
 
+	WARN_ON((!PageAnon(&new->page) && !PageSwapBacked(&new->page)) |
+		folio_memcg_cache(new));
+
+	if (folio_memcg_cache(new))
+		page_counter_charge(&memcg->cache, nr_pages);
+
 	css_get(&memcg->css);
 	commit_charge(new, memcg);
 
@@ -7621,7 +7760,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
 		return false;
 	}
 
-	if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+	if (try_charge(memcg, gfp_mask, nr_pages, false) == 0) {
 		mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
 		return true;
 	}
@@ -7643,7 +7782,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 
 	mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
 
-	refill_stock(memcg, nr_pages);
+	refill_stock(memcg, nr_pages, false);
 }
 
 static int __init cgroup_memory(char *s)
-- 
2.31.1



More information about the Devel mailing list