[Devel] [PATCH vz9 v2] mm: per memory cgroup page cache limit

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jan 20 14:34:06 MSK 2023


On 20.01.2023 09:47, Alexander Atanasov wrote:
>   From: Andrey Ryabinin <ryabinin.a.a at gmail.com>

After applying this patch using "git am", the original author is lost.
Please resend so the original author is kept.

> 
> Forward port feature: mm: per memory cgroup page cache limit.
> 
> The original implementation consisted of these commits:
> commit 758d52e33a67 ("configs: Enable CONFIG_PAGE_EXTENSION")
> commit 741beaa93c89 ("mm: introduce page vz extension (using page_ext)")
> commit d42d3c8b849d ("mm/memcg: limit page cache in memcg hack")
> 
> This port drops the page vz extensions in favor of using a memcg_data
> bit to mark a page as cache. The benefit is that the implementation
> and porting got more simple. If we require new flags then the newly
> introduced folio can be used.
> 
> https://jira.sw.ru/browse/PSBM-144609
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
>   Signed-off-by: Andrey Ryabinin <ryabinin.a.a at gmail.com>
> ---
>   include/linux/memcontrol.h |  29 ++++-
>   mm/filemap.c               |   3 +-
>   mm/memcontrol.c            | 219 ++++++++++++++++++++++++++++++-------
>   3 files changed, 207 insertions(+), 44 deletions(-)
> 
> v1->v2: addressing Pavel's comments for v1
>          - fixed compilation without MEMCG
>          - try to preserve author
>          - fixed line alignment
>          - add missed bug traps and WARN_ONs
>          - fixed spelling error
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 561db06f1fd8..1a49416300c9 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -273,6 +273,7 @@ struct mem_cgroup {
>   	/* Legacy consumer-oriented counters */
>   	struct page_counter kmem;		/* v1 only */
>   	struct page_counter tcpmem;		/* v1 only */
> +	struct page_counter cache;
>   
>   	/* Range enforcement for interrupt charges */
>   	struct work_struct high_work;
> @@ -405,8 +406,10 @@ enum page_memcg_data_flags {
>   	MEMCG_DATA_OBJCGS = (1UL << 0),
>   	/* page has been accounted as a non-slab kernel page */
>   	MEMCG_DATA_KMEM = (1UL << 1),
> +	/* page has been accounted as a cache page */
> +	MEMCG_DATA_PGCACHE = (1UL << 2),
>   	/* the next bit after the last actual flag */
> -	__NR_MEMCG_DATA_FLAGS  = (1UL << 2),
> +	__NR_MEMCG_DATA_FLAGS  = (1UL << 3),
>   };
>   
>   #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
> @@ -771,11 +774,25 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
>   static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
>   				    gfp_t gfp)
>   {
> -	if (mem_cgroup_disabled())
> -		return 0;
>   	return __mem_cgroup_charge(folio, mm, gfp);
>   }
>   
> +int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm,
> +			   gfp_t gfp);
> +
> +/*
> + * folio_memcg_cache - Check if the folio has the pgcache flag set.
> + * @folio: Pointer to the folio.
> + *
> + * Checks if the folio has page cache flag set. The caller must ensure
> + * that the folio has an associated memory cgroup. It's not safe to call
> + * this function against some types of folios, e.g. slab folios.
> + */
> +static inline bool folio_memcg_cache(struct folio *folio)
> +{
> +	return folio->memcg_data & MEMCG_DATA_PGCACHE;
> +}
> +
>   int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
>   				  gfp_t gfp, swp_entry_t entry);
>   void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
> @@ -1339,6 +1356,12 @@ static inline int mem_cgroup_charge(struct folio *folio,
>   	return 0;
>   }
>   
> +static inline int mem_cgroup_charge_cache(struct folio *folio,
> +					 struct mm_struct *mm, gfp_t gfp)
> +{
> +	return 0;
> +}
> +
>   static inline int mem_cgroup_swapin_charge_page(struct page *page,
>   			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
>   {
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 2d63e53980e4..d568ffc0d416 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -841,7 +841,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
>   	mapping_set_update(&xas, mapping);
>   
>   	if (!huge) {
> -		int error = mem_cgroup_charge(folio, NULL, gfp);
> +		int error = mem_cgroup_charge_cache(folio, NULL, gfp);
> +
>   		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
>   		if (error)
>   			return error;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6fa13539f3e5..6b462152e77f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -218,6 +218,7 @@ enum res_type {
>   	_OOM_TYPE,
>   	_KMEM,
>   	_TCP,
> +	_CACHE,
>   };
>   
>   #define MEMFILE_PRIVATE(x, val)	((x) << 16 | (val))
> @@ -2207,6 +2208,7 @@ struct memcg_stock_pcp {
>   	int nr_slab_unreclaimable_b;
>   #endif
>   
> +	unsigned int cache_nr_pages;
>   	struct work_struct work;
>   	unsigned long flags;
>   #define FLUSHING_CACHED_CHARGE	0
> @@ -2248,7 +2250,8 @@ static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
>    *
>    * returns true if successful, false otherwise.
>    */
> -static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> +static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
> +			  bool cache)
>   {
>   	struct memcg_stock_pcp *stock;
>   	unsigned long flags;
> @@ -2260,9 +2263,16 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>   	local_lock_irqsave(&memcg_stock.stock_lock, flags);
>   
>   	stock = this_cpu_ptr(&memcg_stock);
> -	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
> -		stock->nr_pages -= nr_pages;
> -		ret = true;
> +	if (memcg == stock->cached) {
> +		if (cache && stock->cache_nr_pages >= nr_pages) {
> +			stock->cache_nr_pages -= nr_pages;
> +			ret = true;
> +		}
> +
> +		if (!cache && stock->nr_pages >= nr_pages) {
> +			stock->nr_pages -= nr_pages;
> +			ret = true;
> +		}
>   	}
>   
>   	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
> @@ -2276,15 +2286,20 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>   static void drain_stock(struct memcg_stock_pcp *stock)
>   {
>   	struct mem_cgroup *old = stock->cached;
> +	unsigned long nr_pages = stock->nr_pages + stock->cache_nr_pages;
>   
>   	if (!old)
>   		return;
>   
> -	if (stock->nr_pages) {
> -		page_counter_uncharge(&old->memory, stock->nr_pages);
> +	if (stock->cache_nr_pages)
> +		page_counter_uncharge(&old->cache, stock->cache_nr_pages);
> +
> +	if (nr_pages) {
> +		page_counter_uncharge(&old->memory, nr_pages);
>   		if (do_memsw_account())
> -			page_counter_uncharge(&old->memsw, stock->nr_pages);
> +			page_counter_uncharge(&old->memsw, nr_pages);
>   		stock->nr_pages = 0;
> +		stock->cache_nr_pages = 0;
>   	}
>   
>   	css_put(&old->css);
> @@ -2318,9 +2333,11 @@ static void drain_local_stock(struct work_struct *dummy)
>    * Cache charges(val) to local per_cpu area.
>    * This will be consumed by consume_stock() function, later.
>    */
> -static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> +static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
> +			  bool cache)
>   {
>   	struct memcg_stock_pcp *stock;
> +	unsigned long stock_nr_pages;
>   
>   	stock = this_cpu_ptr(&memcg_stock);
>   	if (stock->cached != memcg) { /* reset if necessary */
> @@ -2328,18 +2345,23 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
>   		css_get(&memcg->css);
>   		stock->cached = memcg;
>   	}
> -	stock->nr_pages += nr_pages;
> +	if (!cache)
> +		stock->nr_pages += nr_pages;
> +	else
> +		stock->cache_nr_pages += nr_pages;
>   
> -	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
> +	stock_nr_pages = stock->nr_pages + stock->cache_nr_pages;
> +	if (stock_nr_pages > MEMCG_CHARGE_BATCH)
>   		drain_stock(stock);
>   }
>   
> -static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
> +static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
> +			bool cache)
>   {
>   	unsigned long flags;
>   
>   	local_lock_irqsave(&memcg_stock.stock_lock, flags);
> -	__refill_stock(memcg, nr_pages);
> +	__refill_stock(memcg, nr_pages, cache);
>   	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
>   }
>   
> @@ -2366,10 +2388,12 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
>   		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
>   		struct mem_cgroup *memcg;
>   		bool flush = false;
> +		unsigned long nr_pages = stock->nr_pages +
> +					 stock->cache_nr_pages;
>   
>   		rcu_read_lock();
>   		memcg = stock->cached;
> -		if (memcg && stock->nr_pages &&
> +		if (memcg && nr_pages &&
>   		    mem_cgroup_is_descendant(memcg, root_memcg))
>   			flush = true;
>   		else if (obj_stock_flush_required(stock, root_memcg))
> @@ -2406,17 +2430,27 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
>   
>   	do {
>   		unsigned long pflags;
> +		long cache_overused;
>   
> -		if (page_counter_read(&memcg->memory) <=
> -		    READ_ONCE(memcg->memory.high))
> -			continue;
> +		if (page_counter_read(&memcg->memory) >
> +		    READ_ONCE(memcg->memory.high)) {
> +			memcg_memory_event(memcg, MEMCG_HIGH);
> +
> +			psi_memstall_enter(&pflags);
> +			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
> +					nr_pages, gfp_mask, true);
> +			psi_memstall_leave(&pflags);
> +		}
>   
> -		memcg_memory_event(memcg, MEMCG_HIGH);
> +		cache_overused = page_counter_read(&memcg->cache) -
> +				 memcg->cache.max;
>   
> -		psi_memstall_enter(&pflags);
> -		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
> -							     gfp_mask, true);
> -		psi_memstall_leave(&pflags);
> +		if (cache_overused > 0) {
> +			psi_memstall_enter(&pflags);
> +			nr_reclaimed += try_to_free_mem_cgroup_pages(memcg,
> +					cache_overused, gfp_mask, false);
> +			psi_memstall_leave(&pflags);
> +		}
>   	} while ((memcg = parent_mem_cgroup(memcg)) &&
>   		 !mem_cgroup_is_root(memcg));
>   
> @@ -2652,7 +2686,7 @@ void mem_cgroup_handle_over_high(void)
>   }
>   
>   static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
> -			unsigned int nr_pages)
> +			   unsigned int nr_pages, bool cache_charge)
>   {
>   	unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
>   	int nr_retries = MAX_RECLAIM_RETRIES;
> @@ -2666,8 +2700,8 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
>   	unsigned long pflags;
>   
>   retry:
> -	if (consume_stock(memcg, nr_pages))
> -		return 0;
> +	if (consume_stock(memcg, nr_pages, cache_charge))
> +		goto done;
>   
>   	if (!do_memsw_account() ||
>   	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
> @@ -2780,13 +2814,19 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
>   	page_counter_charge(&memcg->memory, nr_pages);
>   	if (do_memsw_account())
>   		page_counter_charge(&memcg->memsw, nr_pages);
> +	if (cache_charge)
> +		page_counter_charge(&memcg->cache, nr_pages);
>   
>   	return 0;
>   
>   done_restock:
> +	if (cache_charge)
> +		page_counter_charge(&memcg->cache, batch);
> +
>   	if (batch > nr_pages)
> -		refill_stock(memcg, batch - nr_pages);
> +		refill_stock(memcg, batch - nr_pages, cache_charge);
>   
> +done:
>   	/*
>   	 * If the hierarchy is above the normal consumption range, schedule
>   	 * reclaim on returning to userland.  We can perform reclaim here
> @@ -2826,6 +2866,9 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
>   			current->memcg_nr_pages_over_high += batch;
>   			set_notify_resume(current);
>   			break;
> +		} else if (page_counter_read(&memcg->cache) > memcg->cache.max) {
> +			if (!work_pending(&memcg->high_work))
> +				schedule_work(&memcg->high_work);
>   		}
>   	} while ((memcg = parent_mem_cgroup(memcg)));
>   
> @@ -2833,12 +2876,12 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
>   }
>   
>   static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
> -			     unsigned int nr_pages)
> +			     unsigned int nr_pages, bool cache_charge)
>   {
>   	if (mem_cgroup_is_root(memcg))
>   		return 0;
>   
> -	return try_charge_memcg(memcg, gfp_mask, nr_pages);
> +	return try_charge_memcg(memcg, gfp_mask, nr_pages, cache_charge);
>   }
>   
>   static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
> @@ -3024,7 +3067,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
>   	memcg = get_mem_cgroup_from_objcg(objcg);
>   
>   	memcg_account_kmem(memcg, -nr_pages);
> -	refill_stock(memcg, nr_pages);
> +	refill_stock(memcg, nr_pages, false);
>   
>   	css_put(&memcg->css);
>   }
> @@ -3045,7 +3088,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
>   
>   	memcg = get_mem_cgroup_from_objcg(objcg);
>   
> -	ret = try_charge_memcg(memcg, gfp, nr_pages);
> +	ret = try_charge_memcg(memcg, gfp, nr_pages, false);
>   	if (ret)
>   		goto out;
>   
> @@ -3204,7 +3247,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
>   			memcg = get_mem_cgroup_from_objcg(old);
>   
>   			memcg_account_kmem(memcg, -nr_pages);
> -			__refill_stock(memcg, nr_pages);
> +			__refill_stock(memcg, nr_pages, false);
>   
>   			css_put(&memcg->css);
>   		}
> @@ -3352,7 +3395,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
>   {
>   	int ret = 0;
>   
> -	ret = try_charge(memcg, gfp, nr_pages);
> +	ret = try_charge(memcg, gfp, nr_pages, false);
>   	if (!ret)
>   		page_counter_charge(&memcg->kmem, nr_pages);
>   
> @@ -3711,6 +3754,9 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
>   	case _TCP:
>   		counter = &memcg->tcpmem;
>   		break;
> +	case _CACHE:
> +		counter = &memcg->cache;
> +		break;
>   	default:
>   		BUG();
>   	}
> @@ -3829,6 +3875,43 @@ static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
>   	return ret;
>   }
>   
> +static int memcg_update_cache_max(struct mem_cgroup *memcg,
> +				 unsigned long limit)
> +{
> +	unsigned long nr_pages;
> +	bool enlarge = false;
> +	int ret;
> +
> +	do {
> +		if (signal_pending(current)) {
> +			ret = -EINTR;
> +			break;
> +		}
> +		mutex_lock(&memcg_max_mutex);
> +
> +		if (limit > memcg->cache.max)
> +			enlarge = true;
> +
> +		ret = page_counter_set_max(&memcg->cache, limit);
> +		mutex_unlock(&memcg_max_mutex);
> +
> +		if (!ret)
> +			break;
> +
> +		nr_pages = max_t(long, 1, page_counter_read(&memcg->cache) - limit);
> +		if (!try_to_free_mem_cgroup_pages(memcg, nr_pages,
> +						GFP_KERNEL, false)) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +	} while (1);
> +
> +	if (!ret && enlarge)
> +		memcg_oom_recover(memcg);
> +
> +	return ret;
> +}
> +
>   /*
>    * The user of this function is...
>    * RES_LIMIT.
> @@ -3865,6 +3948,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
>   		case _TCP:
>   			ret = memcg_update_tcp_max(memcg, nr_pages);
>   			break;
> +		case _CACHE:
> +			ret = memcg_update_cache_max(memcg, nr_pages);
> +			break;
>   		}
>   		break;
>   	case RES_SOFT_LIMIT:
> @@ -3898,6 +3984,9 @@ static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
>   	case _TCP:
>   		counter = &memcg->tcpmem;
>   		break;
> +	case _CACHE:
> +		counter = &memcg->cache;
> +		break;
>   	default:
>   		BUG();
>   	}
> @@ -5541,6 +5630,17 @@ static struct cftype mem_cgroup_legacy_files[] = {
>   	{
>   		.name = "pressure_level",
>   	},
> +	{
> +		.name = "cache.limit_in_bytes",
> +		.private = MEMFILE_PRIVATE(_CACHE, RES_LIMIT),
> +		.write = mem_cgroup_write,
> +		.read_u64 = mem_cgroup_read_u64,
> +	},
> +	{
> +		.name = "cache.usage_in_bytes",
> +		.private = MEMFILE_PRIVATE(_CACHE, RES_USAGE),
> +		.read_u64 = mem_cgroup_read_u64,
> +	},
>   #ifdef CONFIG_NUMA
>   	{
>   		.name = "numa_stat",
> @@ -5825,11 +5925,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>   		page_counter_init(&memcg->swap, &parent->swap);
>   		page_counter_init(&memcg->kmem, &parent->kmem);
>   		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
> +		page_counter_init(&memcg->cache, &parent->cache);
>   	} else {
>   		page_counter_init(&memcg->memory, NULL);
>   		page_counter_init(&memcg->swap, NULL);
>   		page_counter_init(&memcg->kmem, NULL);
>   		page_counter_init(&memcg->tcpmem, NULL);
> +		page_counter_init(&memcg->cache, NULL);
>   
>   		root_mem_cgroup = memcg;
>   		return &memcg->css;
> @@ -5950,6 +6052,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
>   	page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX);
>   	page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
>   	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
> +	page_counter_set_max(&memcg->cache, PAGE_COUNTER_MAX);
>   	page_counter_set_min(&memcg->memory, 0);
>   	page_counter_set_low(&memcg->memory, 0);
>   	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
> @@ -6051,7 +6154,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
>   	int ret;
>   
>   	/* Try a single bulk charge without reclaim first, kswapd may wake */
> -	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
> +	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count,
> +			 false);
>   	if (!ret) {
>   		mc.precharge += count;
>   		return ret;
> @@ -6059,7 +6163,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
>   
>   	/* Try charges one by one with reclaim, but do not retry */
>   	while (count--) {
> -		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
> +		ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1, false);
>   		if (ret)
>   			return ret;
>   		mc.precharge++;
> @@ -7285,18 +7389,27 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
>   }
>   
>   static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
> -			gfp_t gfp)
> +			gfp_t gfp, bool cache_charge)
>   {
>   	long nr_pages = folio_nr_pages(folio);
>   	int ret;
>   
> -	ret = try_charge(memcg, gfp, nr_pages);
> +	ret = try_charge(memcg, gfp, nr_pages, cache_charge);
>   	if (ret)
>   		goto out;
>   
>   	css_get(&memcg->css);
>   	commit_charge(folio, memcg);
>   
> +	/*
> +	 * We always cleanup this flag on uncharging, it means
> +	 * that during charging we shouldn't have this flag set
> +	 */
> +
> +	VM_BUG_ON(folio_memcg_cache(folio));
> +	if (cache_charge)
> +		WRITE_ONCE(folio->memcg_data,
> +			READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
>   	local_irq_disable();
>   	mem_cgroup_charge_statistics(memcg, nr_pages);
>   	memcg_check_events(memcg, folio_nid(folio));
> @@ -7305,18 +7418,32 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
>   	return ret;
>   }
>   
> -int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
> +static int __mem_cgroup_charge_gen(struct folio *folio, struct mm_struct *mm,
> +					gfp_t gfp_mask, bool cache_charge)
>   {
>   	struct mem_cgroup *memcg;
>   	int ret;
>   
> +	if (mem_cgroup_disabled())
> +		return 0;
> +
>   	memcg = get_mem_cgroup_from_mm(mm);
> -	ret = charge_memcg(folio, memcg, gfp);
> +	ret = charge_memcg(folio, memcg, gfp_mask, cache_charge);
>   	css_put(&memcg->css);
>   
>   	return ret;
>   }
>   
> +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
> +{
> +	return __mem_cgroup_charge_gen(folio, mm, gfp, false);
> +}
> +
> +int mem_cgroup_charge_cache(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
> +{
> +	return __mem_cgroup_charge_gen(folio, mm, gfp, true);
> +}
> +
>   /**
>    * mem_cgroup_swapin_charge_page - charge a newly allocated page for swapin
>    * @page: page to charge
> @@ -7347,7 +7474,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
>   		memcg = get_mem_cgroup_from_mm(mm);
>   	rcu_read_unlock();
>   
> -	ret = charge_memcg(folio, memcg, gfp);
> +	ret = charge_memcg(folio, memcg, gfp, false);
>   
>   	css_put(&memcg->css);
>   	return ret;
> @@ -7391,6 +7518,7 @@ struct uncharge_gather {
>   	unsigned long nr_memory;
>   	unsigned long pgpgout;
>   	unsigned long nr_kmem;
> +	unsigned long nr_pgcache;
>   	int nid;
>   };
>   
> @@ -7409,6 +7537,9 @@ static void uncharge_batch(const struct uncharge_gather *ug)
>   			page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
>   		if (ug->nr_kmem)
>   			memcg_account_kmem(ug->memcg, -ug->nr_kmem);
> +		if (ug->nr_pgcache)
> +			page_counter_uncharge(&ug->memcg->cache, ug->nr_pgcache);
> +
>   		memcg_oom_recover(ug->memcg);
>   	}
>   
> @@ -7470,6 +7601,8 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
>   		folio->memcg_data = 0;
>   		obj_cgroup_put(objcg);
>   	} else {
> +		if (folio_memcg_cache(folio))
> +			ug->nr_pgcache += nr_pages;
>   		/* LRU pages aren't accounted at the root level */
>   		if (!mem_cgroup_is_root(memcg))
>   			ug->nr_memory += nr_pages;
> @@ -7553,6 +7686,12 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
>   			page_counter_charge(&memcg->memsw, nr_pages);
>   	}
>   
> +	WARN_ON((!PageAnon(&new->page) && !PageSwapBacked(&new->page)) |
> +		folio_memcg_cache(new));
> +
> +	if (folio_memcg_cache(new))
> +		page_counter_charge(&memcg->cache, nr_pages);
> +
>   	css_get(&memcg->css);
>   	commit_charge(new, memcg);
>   
> @@ -7621,7 +7760,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
>   		return false;
>   	}
>   
> -	if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
> +	if (try_charge(memcg, gfp_mask, nr_pages, false) == 0) {
>   		mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
>   		return true;
>   	}
> @@ -7643,7 +7782,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>   
>   	mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
>   
> -	refill_stock(memcg, nr_pages);
> +	refill_stock(memcg, nr_pages, false);
>   }
>   
>   static int __init cgroup_memory(char *s)


More information about the Devel mailing list