[Devel] [PATCH vz9 v2] mm: per memory cgroup page cache limit

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Jan 20 11:47:58 MSK 2023


 From: Andrey Ryabinin <ryabinin.a.a at gmail.com>

Forward port feature: mm: per memory cgroup page cache limit.

The original implementation consisted of these commits:
commit 758d52e33a67 ("configs: Enable CONFIG_PAGE_EXTENSION")
commit 741beaa93c89 ("mm: introduce page vz extension (using page_ext)")
commit d42d3c8b849d ("mm/memcg: limit page cache in memcg hack")

This port drops the page vz extensions in favor of using a memcg_data
bit to mark a page as cache. The benefit is that the implementation
and porting got more simple. If we require new flags then the newly
introduced folio can be used.

https://jira.sw.ru/browse/PSBM-144609
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
 Signed-off-by: Andrey Ryabinin <ryabinin.a.a at gmail.com>
---
 include/linux/memcontrol.h |  29 ++++-
 mm/filemap.c               |   3 +-
 mm/memcontrol.c            | 219 ++++++++++++++++++++++++++++++-------
 3 files changed, 207 insertions(+), 44 deletions(-)

v1->v2: addressing Pavel's comments for v1
        - fixed compilation without MEMCG
        - try to preserve author
        - fixed line alignment
        - add missed bug traps and WARN_ONs
        - fixed spelling error

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 561db06f1fd8..1a49416300c9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -273,6 +273,7 @@ struct mem_cgroup {
 	/* Legacy consumer-oriented counters */
 	struct page_counter kmem;		/* v1 only */
 	struct page_counter tcpmem;		/* v1 only */
+	struct page_counter cache;
 
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
@@ -405,8 +406,10 @@ enum page_memcg_data_flags {
 	MEMCG_DATA_OBJCGS = (1UL << 0),
 	/* page has been accounted as a non-slab kernel page */
 	MEMCG_DATA_KMEM = (1UL << 1),
+	/* page has been accounted as a cache page */
+	MEMCG_DATA_PGCACHE = (1UL << 2),
 	/* the next bit after the last actual flag */
-	__NR_MEMCG_DATA_FLAGS  = (1UL << 2),
+	__NR_MEMCG_DATA_FLAGS  = (1UL << 3),
 };
 
 #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
@@ -771,11 +774,25 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
 static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 				    gfp_t gfp)
 {
-	if (mem_cgroup_disabled())
-		return 0;
 	return __mem_cgroup_charge(folio, mm, gfp);
 }
 
+int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm,
+			   gfp_t gfp);
+
+/*
+ * folio_memcg_cache - Check if the folio has the pgcache flag set.
+ * @folio: Pointer to the folio.
+ *
+ * Checks if the folio has page cache flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
+ */
+static inline bool folio_memcg_cache(struct folio *folio)
+{
+	return folio->memcg_data & MEMCG_DATA_PGCACHE;
+}
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -1339,6 +1356,12 @@ static inline int mem_cgroup_charge(struct folio *folio,
 	return 0;
 }
 
+static inline int mem_cgroup_charge_cache(struct folio *folio,
+					 struct mm_struct *mm, gfp_t gfp)
+{
+	return 0;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d63e53980e4..d568ffc0d416 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -841,7 +841,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 	mapping_set_update(&xas, mapping);
 
 	if (!huge) {
-		int error = mem_cgroup_charge(folio, NULL, gfp);
+		int error = mem_cgroup_charge_cache(folio, NULL, gfp);
+
 		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
 		if (error)
 			return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6fa13539f3e5..6b462152e77f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -218,6 +218,7 @@ enum res_type {
 	_OOM_TYPE,
 	_KMEM,
 	_TCP,
+	_CACHE,
 };
 
 #define MEMFILE_PRIVATE(x, val)	((x) << 16 | (val))
@@ -2207,6 +2208,7 @@ struct memcg_stock_pcp {
 	int nr_slab_unreclaimable_b;
 #endif
 
+	unsigned int cache_nr_pages;
 	struct work_struct work;
 	unsigned long flags;
 #define FLUSHING_CACHED_CHARGE	0
@@ -2248,7 +2250,8 @@ static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
  *
  * returns true if successful, false otherwise.
  */
-static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			  bool cache)
 {
 	struct memcg_stock_pcp *stock;
 	unsigned long flags;
@@ -2260,9 +2263,16 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
-		stock->nr_pages -= nr_pages;
-		ret = true;
+	if (memcg == stock->cached) {
+		if (cache && stock->cache_nr_pages >= nr_pages) {
+			stock->cache_nr_pages -= nr_pages;
+			ret = true;
+		}
+
+		if (!cache && stock->nr_pages >= nr_pages) {
+			stock->nr_pages -= nr_pages;
+			ret = true;
+		}
 	}
 
 	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
@@ -2276,15 +2286,20 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 static void drain_stock(struct memcg_stock_pcp *stock)
 {
 	struct mem_cgroup *old = stock->cached;
+	unsigned long nr_pages = stock->nr_pages + stock->cache_nr_pages;
 
 	if (!old)
 		return;
 
-	if (stock->nr_pages) {
-		page_counter_uncharge(&old->memory, stock->nr_pages);
+	if (stock->cache_nr_pages)
+		page_counter_uncharge(&old->cache, stock->cache_nr_pages);
+
+	if (nr_pages) {
+		page_counter_uncharge(&old->memory, nr_pages);
 		if (do_memsw_account())
-			page_counter_uncharge(&old->memsw, stock->nr_pages);
+			page_counter_uncharge(&old->memsw, nr_pages);
 		stock->nr_pages = 0;
+		stock->cache_nr_pages = 0;
 	}
 
 	css_put(&old->css);
@@ -2318,9 +2333,11 @@ static void drain_local_stock(struct work_struct *dummy)
  * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
-static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			  bool cache)
 {
 	struct memcg_stock_pcp *stock;
+	unsigned long stock_nr_pages;
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached != memcg) { /* reset if necessary */
@@ -2328,18 +2345,23 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 		css_get(&memcg->css);
 		stock->cached = memcg;
 	}
-	stock->nr_pages += nr_pages;
+	if (!cache)
+		stock->nr_pages += nr_pages;
+	else
+		stock->cache_nr_pages += nr_pages;
 
-	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+	stock_nr_pages = stock->nr_pages + stock->cache_nr_pages;
+	if (stock_nr_pages > MEMCG_CHARGE_BATCH)
 		drain_stock(stock);
 }
 
-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
+			bool cache)
 {
 	unsigned long flags;
 
 	local_lock_irqsave(&memcg_stock.stock_lock, flags);
-	__refill_stock(memcg, nr_pages);
+	__refill_stock(memcg, nr_pages, cache);
 	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
 }
 
@@ -2366,10 +2388,12 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
 		struct mem_cgroup *memcg;
 		bool flush = false;
+		unsigned long nr_pages = stock->nr_pages +
+					 stock->cache_nr_pages;
 
 		rcu_read_lock();
 		memcg = stock->cached;
-		if (memcg && stock->nr_pages &&
+		if (memcg && nr_pages &&
 		    mem_cgroup_is_descendant(memcg, root_memcg))
 			flush = true;
 		else if (obj_stock_flush_required(stock, root_memcg))
@@ -2406,17 +2430,27 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
 
 	do {
 		unsigned long pflags;
+		long cache_overused;
 
-		if (page_counter_read(&memcg->memory) <=
-		    READ_ONCE(memcg->memory.high))
-			continue;
+		if (page_counter_read(&memcg->memory) >
+		    READ_ONCE(memcg->memory.high)) {
+			memcg_memory_event(memcg, MEMCG_HIGH);
+
+			psi_memstall_enter(&pflags);
+			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
+					nr_pages, gfp_mask, true);
+			psi_memstall_leave(&pflags);
+		}
 
-		memcg_memory_event(memcg, MEMCG_HIGH);
+		cache_overused = page_counter_read(&memcg->cache) -
+				 memcg->cache.max;
 
-		psi_memstall_enter(&pflags);
-		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
-							     gfp_mask, true);
-		psi_memstall_leave(&pflags);
+		if (cache_overused > 0) {
+			psi_memstall_enter(&pflags);
+			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
+					cache_overused, gfp_mask, false);
+			psi_memstall_leave(&pflags);
+		}
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
 
@@ -2652,7 +2686,7 @@ void mem_cgroup_handle_over_high(void)
 }
 
 static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
-			unsigned int nr_pages)
+			   unsigned int nr_pages, bool cache_charge)
 {
 	unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
 	int nr_retries = MAX_RECLAIM_RETRIES;
@@ -2666,8 +2700,8 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	unsigned long pflags;
 
 retry:
-	if (consume_stock(memcg, nr_pages))
-		return 0;
+	if (consume_stock(memcg, nr_pages, cache_charge))
+		goto done;
 
 	if (!do_memsw_account() ||
 	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
@@ -2780,13 +2814,19 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	page_counter_charge(&memcg->memory, nr_pages);
 	if (do_memsw_account())
 		page_counter_charge(&memcg->memsw, nr_pages);
+	if (cache_charge)
+		page_counter_charge(&memcg->cache, nr_pages);
 
 	return 0;
 
 done_restock:
+	if (cache_charge)
+		page_counter_charge(&memcg->cache, batch);
+
 	if (batch > nr_pages)
-		refill_stock(memcg, batch - nr_pages);
+		refill_stock(memcg, batch - nr_pages, cache_charge);
 
+done:
 	/*
 	 * If the hierarchy is above the normal consumption range, schedule
 	 * reclaim on returning to userland.  We can perform reclaim here
@@ -2826,6 +2866,9 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
+		} else if (page_counter_read(&memcg->cache) > memcg->cache.max) {
+			if (!work_pending(&memcg->high_work))
+				schedule_work(&memcg->high_work);
 		}
 	} while ((memcg = parent_mem_cgroup(memcg)));
 
@@ -2833,12 +2876,12 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 }
 
 static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-			     unsigned int nr_pages)
+			     unsigned int nr_pages, bool cache_charge)
 {
 	if (mem_cgroup_is_root(memcg))
 		return 0;
 
-	return try_charge_memcg(memcg, gfp_mask, nr_pages);
+	return try_charge_memcg(memcg, gfp_mask, nr_pages, cache_charge);
 }
 
 static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -3024,7 +3067,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
 	memcg = get_mem_cgroup_from_objcg(objcg);
 
 	memcg_account_kmem(memcg, -nr_pages);
-	refill_stock(memcg, nr_pages);
+	refill_stock(memcg, nr_pages, false);
 
 	css_put(&memcg->css);
 }
@@ -3045,7 +3088,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
 
 	memcg = get_mem_cgroup_from_objcg(objcg);
 
-	ret = try_charge_memcg(memcg, gfp, nr_pages);
+	ret = try_charge_memcg(memcg, gfp, nr_pages, false);
 	if (ret)
 		goto out;
 
@@ -3204,7 +3247,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
 			memcg = get_mem_cgroup_from_objcg(old);
 
 			memcg_account_kmem(memcg, -nr_pages);
-			__refill_stock(memcg, nr_pages);
+			__refill_stock(memcg, nr_pages, false);
 
 			css_put(&memcg->css);
 		}
@@ -3352,7 +3395,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 {
 	int ret = 0;
 
-	ret = try_charge(memcg, gfp, nr_pages);
+	ret = try_charge(memcg, gfp, nr_pages, false);
 	if (!ret)
 		page_counter_charge(&memcg->kmem, nr_pages);
 
@@ -3711,6 +3754,9 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 	case _TCP:
 		counter = &memcg->tcpmem;
 		break;
+	case _CACHE:
+		counter = &memcg->cache;
+		break;
 	default:
 		BUG();
 	}
@@ -3829,6 +3875,43 @@ static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
 	return ret;
 }
 
+static int memcg_update_cache_max(struct mem_cgroup *memcg,
+				 unsigned long limit)
+{
+	unsigned long nr_pages;
+	bool enlarge = false;
+	int ret;
+
+	do {
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+		mutex_lock(&memcg_max_mutex);
+
+		if (limit > memcg->cache.max)
+			enlarge = true;
+
+		ret = page_counter_set_max(&memcg->cache, limit);
+		mutex_unlock(&memcg_max_mutex);
+
+		if (!ret)
+			break;
+
+		nr_pages = max_t(long, 1, page_counter_read(&memcg->cache) - limit);
+		if (!try_to_free_mem_cgroup_pages(memcg, nr_pages,
+						GFP_KERNEL, false)) {
+			ret = -EBUSY;
+			break;
+		}
+	} while (1);
+
+	if (!ret && enlarge)
+		memcg_oom_recover(memcg);
+
+	return ret;
+}
+
 /*
  * The user of this function is...
  * RES_LIMIT.
@@ -3865,6 +3948,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
 		case _TCP:
 			ret = memcg_update_tcp_max(memcg, nr_pages);
 			break;
+		case _CACHE:
+			ret = memcg_update_cache_max(memcg, nr_pages);
+			break;
 		}
 		break;
 	case RES_SOFT_LIMIT:
@@ -3898,6 +3984,9 @@ static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
 	case _TCP:
 		counter = &memcg->tcpmem;
 		break;
+	case _CACHE:
+		counter = &memcg->cache;
+		break;
 	default:
 		BUG();
 	}
@@ -5541,6 +5630,17 @@ static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "pressure_level",
 	},
+	{
+		.name = "cache.limit_in_bytes",
+		.private = MEMFILE_PRIVATE(_CACHE, RES_LIMIT),
+		.write = mem_cgroup_write,
+		.read_u64 = mem_cgroup_read_u64,
+	},
+	{
+		.name = "cache.usage_in_bytes",
+		.private = MEMFILE_PRIVATE(_CACHE, RES_USAGE),
+		.read_u64 = mem_cgroup_read_u64,
+	},
 #ifdef CONFIG_NUMA
 	{
 		.name = "numa_stat",
@@ -5825,11 +5925,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->swap, &parent->swap);
 		page_counter_init(&memcg->kmem, &parent->kmem);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
+		page_counter_init(&memcg->cache, &parent->cache);
 	} else {
 		page_counter_init(&memcg->memory, NULL);
 		page_counter_init(&memcg->swap, NULL);
 		page_counter_init(&memcg->kmem, NULL);
 		page_counter_init(&memcg->tcpmem, NULL);
+		page_counter_init(&memcg->cache, NULL);
 
 		root_mem_cgroup = memcg;
 		return &memcg->css;
@@ -5950,6 +6052,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
 	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
+	page_counter_set_max(&memcg->cache, PAGE_COUNTER_MAX);
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
@@ -6051,7 +6154,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
 	int ret;
 
 	/* Try a single bulk charge without reclaim first, kswapd may wake */
-	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count,
+			 false);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
@@ -6059,7 +6163,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 
 	/* Try charges one by one with reclaim, but do not retry */
 	while (count--) {
-		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
+		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1, false);
 		if (ret)
 			return ret;
 		mc.precharge++;
@@ -7285,18 +7389,27 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 }
 
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
-			gfp_t gfp)
+			gfp_t gfp, bool cache_charge)
 {
 	long nr_pages = folio_nr_pages(folio);
 	int ret;
 
-	ret = try_charge(memcg, gfp, nr_pages);
+	ret = try_charge(memcg, gfp, nr_pages, cache_charge);
 	if (ret)
 		goto out;
 
 	css_get(&memcg->css);
 	commit_charge(folio, memcg);
 
+	/*
+	 * We always cleanup this flag on uncharging, it means
+	 * that during charging we shouldn't have this flag set
+	 */
+
+	VM_BUG_ON(folio_memcg_cache(folio));
+	if (cache_charge)
+		WRITE_ONCE(folio->memcg_data,
+			READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
 	local_irq_disable();
 	mem_cgroup_charge_statistics(memcg, nr_pages);
 	memcg_check_events(memcg, folio_nid(folio));
@@ -7305,18 +7418,32 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 	return ret;
 }
 
-int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+static int __mem_cgroup_charge_gen(struct folio *folio, struct mm_struct *mm,
+					gfp_t gfp_mask, bool cache_charge)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
+	if (mem_cgroup_disabled())
+		return 0;
+
 	memcg = get_mem_cgroup_from_mm(mm);
-	ret = charge_memcg(folio, memcg, gfp);
+	ret = charge_memcg(folio, memcg, gfp_mask, cache_charge);
 	css_put(&memcg->css);
 
 	return ret;
 }
 
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+{
+	return __mem_cgroup_charge_gen(folio, mm, gfp, false);
+}
+
+int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
+{
+	return __mem_cgroup_charge_gen(folio, mm, gfp, true);
+}
+
 /**
  * mem_cgroup_swapin_charge_page - charge a newly allocated page for swapin
  * @page: page to charge
@@ -7347,7 +7474,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 		memcg = get_mem_cgroup_from_mm(mm);
 	rcu_read_unlock();
 
-	ret = charge_memcg(folio, memcg, gfp);
+	ret = charge_memcg(folio, memcg, gfp, false);
 
 	css_put(&memcg->css);
 	return ret;
@@ -7391,6 +7518,7 @@ struct uncharge_gather {
 	unsigned long nr_memory;
 	unsigned long pgpgout;
 	unsigned long nr_kmem;
+	unsigned long nr_pgcache;
 	int nid;
 };
 
@@ -7409,6 +7537,9 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 			page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
 		if (ug->nr_kmem)
 			memcg_account_kmem(ug->memcg, -ug->nr_kmem);
+		if (ug->nr_pgcache)
+			page_counter_uncharge(&ug->memcg->cache, ug->nr_pgcache);
+
 		memcg_oom_recover(ug->memcg);
 	}
 
@@ -7470,6 +7601,8 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 		folio->memcg_data = 0;
 		obj_cgroup_put(objcg);
 	} else {
+		if (folio_memcg_cache(folio))
+			ug->nr_pgcache += nr_pages;
 		/* LRU pages aren't accounted at the root level */
 		if (!mem_cgroup_is_root(memcg))
 			ug->nr_memory += nr_pages;
@@ -7553,6 +7686,12 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
 			page_counter_charge(&memcg->memsw, nr_pages);
 	}
 
+	WARN_ON((!PageAnon(&new->page) && !PageSwapBacked(&new->page)) |
+		folio_memcg_cache(new));
+
+	if (folio_memcg_cache(new))
+		page_counter_charge(&memcg->cache, nr_pages);
+
 	css_get(&memcg->css);
 	commit_charge(new, memcg);
 
@@ -7621,7 +7760,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
 		return false;
 	}
 
-	if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+	if (try_charge(memcg, gfp_mask, nr_pages, false) == 0) {
 		mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
 		return true;
 	}
@@ -7643,7 +7782,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 
 	mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
 
-	refill_stock(memcg, nr_pages);
+	refill_stock(memcg, nr_pages, false);
 }
 
 static int __init cgroup_memory(char *s)
-- 
2.31.1



More information about the Devel mailing list