[Devel] [PATCH vz10 v3 1/2] mm/memcontrol: fix cache counter imbalance in mem_cgroup_replace_folio()
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Wed Jul 30 06:19:07 MSK 2025
Looks good for all three patches:
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
On 7/29/25 00:46, Konstantin Khorenko wrote:
> The function was checking cache flag on the new folio instead of the old one,
> causing cache counter imbalance when replacing cache pages.
> The new folio is freshly allocated and never has the cache flag set, so
> we need to copy the flag from the old folio and charge the cache counter
> accordingly.
>
> This fixes potential cache accounting issues during page cache replacement
> operations like page migration between NUMA nodes.
>
> Another possible cause of the imbalance is the commit_charge() function
> which unconditionally drops MEMCG_DATA_PGCACHE flag on
> folio->memcg_data, so enhance commit_charge() to preserve the flag.
>
> The way of adding a new arguments was chosen in order not to miss the
> appearance of new commit_charge() calls on rebases.
>
> Fixes: 6a2ca4d515c5 ("mm: Memory cgroup page cache limit")
> https://virtuozzo.atlassian.net/browse/VSTOR-111756
>
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
>
> Feature: mm: Memory cgroup page cache limit
> ---
> v3:
> * enhance mem_cgroup_commit_charge() with cache parameter as well
> * in mem_cgroup_replace_folio() put cache counter handling under
> !mem_cgroup_is_root() condition like other memory counters
>
> v2:
> * fixed 2 other functions where commit_charge() could lose the cache flag
> * fixed mem_cgroup_move_account() which also rewrites the memdata not
> preserving cache flag
> * moved sanity checks under if condition
>
> include/linux/memcontrol.h | 6 ++--
> mm/hugetlb.c | 3 +-
> mm/memcontrol-v1.c | 5 ++-
> mm/memcontrol.c | 70 +++++++++++++++++++-------------------
> 4 files changed, 45 insertions(+), 39 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index c97bd1a4b3c95..e0df2d395d03f 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -696,7 +696,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
> page_counter_read(&memcg->memory);
> }
>
> -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
> +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> + bool cache_charge);
>
> int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
>
> @@ -1256,7 +1257,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
> }
>
> static inline void mem_cgroup_commit_charge(struct folio *folio,
> - struct mem_cgroup *memcg)
> + struct mem_cgroup *memcg,
> + bool cache_charge)
> {
> }
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index bd46cf476a0e3..93886dba813e0 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3106,7 +3106,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> }
>
> if (!memcg_charge_ret)
> - mem_cgroup_commit_charge(folio, memcg);
> + /* huge pages are not used for page cache */
> + mem_cgroup_commit_charge(folio, memcg, false);
> mem_cgroup_put(memcg);
>
> return folio;
> diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
> index 756bc518cee3f..fd0a563a352b1 100644
> --- a/mm/memcontrol-v1.c
> +++ b/mm/memcontrol-v1.c
> @@ -852,7 +852,10 @@ static int mem_cgroup_move_account(struct folio *folio,
>
> /* Warning should never happen, so don't worry about refcount non-0 */
> WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
> - folio->memcg_data = (unsigned long)to;
> + /* Preserve MEMCG_DATA_PGCACHE flag if it was set */
> + WRITE_ONCE(folio->memcg_data,
> + (unsigned long)to |
> + (READ_ONCE(folio->memcg_data) & MEMCG_DATA_PGCACHE));
>
> __folio_memcg_unlock(from);
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d0fe610306a02..28b8f50504e06 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2524,7 +2524,8 @@ void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
> page_counter_uncharge(&memcg->memsw, nr_pages);
> }
>
> -static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> + unsigned long cache_flag)
> {
> VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio);
> /*
> @@ -2536,18 +2537,20 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> * - exclusive reference
> * - mem_cgroup_trylock_pages()
> */
> - folio->memcg_data = (unsigned long)memcg;
> + WRITE_ONCE(folio->memcg_data, (unsigned long)memcg | cache_flag);
> }
>
> /**
> * mem_cgroup_commit_charge - commit a previously successful try_charge().
> * @folio: folio to commit the charge to.
> * @memcg: memcg previously charged.
> + * @cache_charge: if we charge for page cache or not.
> */
> -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg,
> + bool cache_charge)
> {
> css_get(&memcg->css);
> - commit_charge(folio, memcg);
> + commit_charge(folio, memcg, cache_charge ? MEMCG_DATA_PGCACHE : 0);
> memcg1_commit_charge(folio, memcg);
> }
>
> @@ -5267,18 +5270,13 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
> ret = try_charge(memcg, gfp, folio_nr_pages(folio), cache_charge);
> if (ret)
> goto out;
> -
> - mem_cgroup_commit_charge(folio, memcg);
> -
> /*
> * We always cleanup this flag on uncharging, it means
> * that during charging we shouldn't have this flag set
> */
> -
> VM_BUG_ON(folio_memcg_cache(folio));
> - if (cache_charge)
> - WRITE_ONCE(folio->memcg_data,
> - READ_ONCE(folio->memcg_data) | MEMCG_DATA_PGCACHE);
> +
> + mem_cgroup_commit_charge(folio, memcg, cache_charge);
> out:
> return ret;
> }
> @@ -5551,6 +5549,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
> {
> struct mem_cgroup *memcg;
> long nr_pages = folio_nr_pages(new);
> + unsigned long cache_flag;
>
> VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
> VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
> @@ -5575,32 +5574,33 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
> if (do_memsw_account())
> page_counter_charge(&memcg->memsw, nr_pages);
> }
> -
> - /*
> - * finist explained the idea behind adding a WARN_ON() here:
> - * - we do not want to check flags correctness on each flag change
> - * because of performance
> - * - we do want to have a warning in case we somehow messed-up and
> - * have got a folio with wrong bits set
> - * - we do not insist to catch every first buggy page with wrong
> - * bits
> - *
> - * But we still want to get a warning about the problem sooner or
> - * later if the problem with flags exists.
> - *
> - * To achieve this check if a folio that is marked as cache does
> - * not have any other incompatible flags set.
> - */
> - WARN_ON(folio_memcg_cache(new) &&
> - (folio_test_slab(new) || folio_test_anon(new) ||
> - folio_test_swapbacked(new) || folio_test_swapcache(new) ||
> - folio_test_mappedtodisk(new) || folio_test_ksm(new)));
> -
> - if (folio_memcg_cache(new))
> + cache_flag = READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE;
> + if (cache_flag && !mem_cgroup_is_root(memcg)) {
> page_counter_charge(&memcg->cache, nr_pages);
>
> + /*
> + * finist explained the idea behind adding a WARN_ON() here:
> + * - we do not want to check flags correctness on each flag change
> + * because of performance
> + * - we do want to have a warning in case we somehow messed-up and
> + * have got a folio with wrong bits set
> + * - we do not insist to catch every first buggy page with wrong
> + * bits
> + *
> + * But we still want to get a warning about the problem sooner or
> + * later if the problem with flags exists.
> + *
> + * To achieve this check if a folio that is marked as cache does
> + * not have any other incompatible flags set.
> + */
> + WARN_ON(folio_test_slab(new) || folio_test_swapcache(new) ||
> + folio_test_anon(new) || folio_test_swapbacked(new) ||
> + folio_test_ksm(new) || folio_test_mappedtodisk(new));
> + }
> +
> css_get(&memcg->css);
> - commit_charge(new, memcg);
> + commit_charge(new, memcg, cache_flag);
> +
> memcg1_commit_charge(new, memcg);
> }
>
> @@ -5639,7 +5639,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
> return;
>
> /* Transfer the charge and the css ref */
> - commit_charge(new, memcg);
> + commit_charge(new, memcg, READ_ONCE(old->memcg_data) & MEMCG_DATA_PGCACHE);
>
> /* Warning should never happen, so don't worry about refcount non-0 */
> WARN_ON_ONCE(folio_unqueue_deferred_split(old));
--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
More information about the Devel
mailing list