[Devel] [PATCH vz10 v3 1/2] mm/memcontrol: fix cache counter imbalance in mem_cgroup_replace_folio()

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Jul 30 06:19:07 MSK 2025


Looks good for all three patches:

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

On 7/29/25 00:46, Konstantin Khorenko wrote:
> The function was checking cache flag on the new folio instead of the old one,
> causing cache counter imbalance when replacing cache pages.
> The new folio is freshly allocated and never has the cache flag set, so
> we need to copy the flag from the old folio and charge the cache counter
> accordingly.
> 
> This fixes potential cache accounting issues during page cache replacement
> operations like page migration between NUMA nodes.
> 
> Another possible cause of the imbalance is the commit_charge() function
> which unconditionally drops MEMCG_DATA_PGCACHE flag on
> folio->memcg_data, so enhance commit_charge() to preserve the flag.
> 
> The way of adding a new arguments was chosen in order not to miss the
> appearance of new commit_charge() calls on rebases.
> 
> Fixes: 6a2ca4d515c5 ("mm: Memory cgroup page cache limit")
> https://virtuozzo.atlassian.net/browse/VSTOR-111756
> 
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> 
> Feature: mm: Memory cgroup page cache limit
> ---
> v3:
>   * enhance mem_cgroup_commit_charge() with cache parameter as well
>   * in mem_cgroup_replace_folio() put cache counter handling under
>     !mem_cgroup_is_root() condition like other memory counters
> 
> v2:
>   * fixed 2 other functions where commit_charge() could lose the cache flag
>   * fixed mem_cgroup_move_account() which also rewrites the memdata not
>     preserving cache flag
>   * moved sanity checks under if condition
> 
>   include/linux/memcontrol.h |  6 ++--
>   mm/hugetlb.c               |  3 +-
>   mm/memcontrol-v1.c         |  5 ++-
>   mm/memcontrol.c            | 70 +++++++++++++++++++-------------------
>   4 files changed, 45 insertions(+), 39 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index c97bd1a4b3c95..e0df2d395d03f 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -696,7 +696,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
>   		page_counter_read(&memcg->memory);
>   }
>   
> -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
> +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> +			      bool cache_charge);
>   
>   int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
>   
> @@ -1256,7 +1257,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
>   }
>   
>   static inline void mem_cgroup_commit_charge(struct folio *folio,
> -		struct mem_cgroup *memcg)
> +					    struct mem_cgroup *memcg,
> +					    bool cache_charge)
>   {
>   }
>   
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index bd46cf476a0e3..93886dba813e0 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3106,7 +3106,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>   	}
>   
>   	if (!memcg_charge_ret)
> -		mem_cgroup_commit_charge(folio, memcg);
> +		/* huge pages are not used for page cache */
> +		mem_cgroup_commit_charge(folio, memcg, false);
>   	mem_cgroup_put(memcg);
>   
>   	return folio;
> diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
> index 756bc518cee3f..fd0a563a352b1 100644
> --- a/mm/memcontrol-v1.c
> +++ b/mm/memcontrol-v1.c
> @@ -852,7 +852,10 @@ static int mem_cgroup_move_account(struct folio *folio,
>   
>   	/* Warning should never happen, so don't worry about refcount non-0 */
>   	WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
> -	folio->memcg_data = (unsigned long)to;
> +	/* Preserve MEMCG_DATA_PGCACHE flag if it was set */
> +	WRITE_ONCE(folio->memcg_data,
> +		   (unsigned long)to |
> +		   (READ_ONCE(folio->memcg_data) & MEMCG_DATA_PGCACHE));
>   
>   	__folio_memcg_unlock(from);
>   
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d0fe610306a02..28b8f50504e06 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2524,7 +2524,8 @@ void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
>   		page_counter_uncharge(&memcg->memsw, nr_pages);
>   }
>   
> -static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> +			  unsigned long cache_flag)
>   {
>   	VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio);
>   	/*
> @@ -2536,18 +2537,20 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
>   	 * - exclusive reference
>   	 * - mem_cgroup_trylock_pages()
>   	 */
> -	folio->memcg_data = (unsigned long)memcg;
> +	WRITE_ONCE(folio->memcg_data, (unsigned long)memcg | cache_flag);
>   }
>   
>   /**
>    * mem_cgroup_commit_charge - commit a previously successful try_charge().
>    * @folio: folio to commit the charge to.
>    * @memcg: memcg previously charged.
> + * @cache_charge: if we charge for page cache or not.
>    */
> -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> +			      bool cache_charge)
>   {
>   	css_get(&memcg->css);
> -	commit_charge(folio, memcg);
> +	commit_charge(folio, memcg, cache_charge ? MEMCG_DATA_PGCACHE : 0);
>   	memcg1_commit_charge(folio, memcg);
>   }
>   
> @@ -5267,18 +5270,13 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
>   	ret = try_charge(memcg, gfp, folio_nr_pages(folio), cache_charge);
>   	if (ret)
>   		goto out;
> -
> -	mem_cgroup_commit_charge(folio, memcg);
> -
>   	/*
>   	 * We always cleanup this flag on uncharging, it means
>   	 * that during charging we shouldn't have this flag set
>   	 */
> -
>   	VM_BUG_ON(folio_memcg_cache(folio));
> -	if (cache_charge)
> -		WRITE_ONCE(folio->memcg_data,
> -			READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
> +
> +	mem_cgroup_commit_charge(folio, memcg, cache_charge);
>   out:
>   	return ret;
>   }
> @@ -5551,6 +5549,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
>   {
>   	struct mem_cgroup *memcg;
>   	long nr_pages = folio_nr_pages(new);
> +	unsigned long cache_flag;
>   
>   	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
>   	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> @@ -5575,32 +5574,33 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
>   		if (do_memsw_account())
>   			page_counter_charge(&memcg->memsw, nr_pages);
>   	}
> -
> -	/*
> -	 * finist explained the idea behind adding a WARN_ON() here:
> -	 * - we do not want to check flags correctness on each flag change
> -	 *   because of performance
> -	 * - we do want to have a warning in case we somehow messed-up and
> -	 *   have got a folio with wrong bits set
> -	 * - we do not insist to catch every first buggy page with wrong
> -	 *   bits
> -	 *
> -	 * But we still want to get a warning about the problem sooner or
> -	 * later if the problem with flags exists.
> -	 *
> -	 * To achieve this check if a folio that is marked as cache does
> -	 * not have any other incompatible flags set.
> -	 */
> -	WARN_ON(folio_memcg_cache(new) &&
> -		(folio_test_slab(new)	      || folio_test_anon(new)	   ||
> -		 folio_test_swapbacked(new)   || folio_test_swapcache(new) ||
> -		 folio_test_mappedtodisk(new) || folio_test_ksm(new)));
> -
> -	if (folio_memcg_cache(new))
> +	cache_flag = READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE;
> +	if (cache_flag && !mem_cgroup_is_root(memcg)) {
>   		page_counter_charge(&memcg->cache, nr_pages);
>   
> +		/*
> +		 * finist explained the idea behind adding a WARN_ON() here:
> +		 * - we do not want to check flags correctness on each flag change
> +		 *   because of performance
> +		 * - we do want to have a warning in case we somehow messed-up and
> +		 *   have got a folio with wrong bits set
> +		 * - we do not insist to catch every first buggy page with wrong
> +		 *   bits
> +		 *
> +		 * But we still want to get a warning about the problem sooner or
> +		 * later if the problem with flags exists.
> +		 *
> +		 * To achieve this check if a folio that is marked as cache does
> +		 * not have any other incompatible flags set.
> +		 */
> +		WARN_ON(folio_test_slab(new) || folio_test_swapcache(new)  ||
> +			folio_test_anon(new) || folio_test_swapbacked(new) ||
> +			folio_test_ksm(new)  || folio_test_mappedtodisk(new));
> +	}
> +
>   	css_get(&memcg->css);
> -	commit_charge(new, memcg);
> +	commit_charge(new, memcg, cache_flag);
> +
>   	memcg1_commit_charge(new, memcg);
>   }
>   
> @@ -5639,7 +5639,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
>   		return;
>   
>   	/* Transfer the charge and the css ref */
> -	commit_charge(new, memcg);
> +	commit_charge(new, memcg, READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE);
>   
>   	/* Warning should never happen, so don't worry about refcount non-0 */
>   	WARN_ON_ONCE(folio_unqueue_deferred_split(old));

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.



More information about the Devel mailing list