[Devel] [PATCH RHEL7 COMMIT] mm: fix division by zero in dcache_is_low()

Konstantin Khorenko khorenko at virtuozzo.com
Tue Jul 25 18:20:00 MSK 2017


Please consider preparing a ReadyKernel patch for this.

After vz7 update 5 release.

https://readykernel.com/

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 07/25/2017 06:05 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-514.26.1.vz7.33.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-514.26.1.vz7.33.16
> ------>
> commit edc10f4e06976abaa6008b3403f28722d4ca86db
> Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Date:   Tue Jul 25 19:05:53 2017 +0400
>
>     mm: fix division by zero in dcache_is_low()
>
>     At first we check if sysctl_vfs_cache_min_ratio <= 0 and
>     if it's not we use it as denominator. If sysctl_vfs_cache_min_ratio
>     set to zero after the check but before division it would cause
>     division by zero, hence the kernel crash:
>
>     	divide error: 0000 [#1] SMP
>     	RIP: 0010:[<ffffffff81206fb5>]  [<ffffffff81206fb5>] mem_cgroup_dcache_is_low+0x55/0x80
>     	Call Trace:
>     	 super_cache_count+0xf4/0x180
>     	 shrink_slab+0x166/0x410
>     	 shrink_zone+0x11a/0x2d0
>     	 do_try_to_free_pages+0x1a0/0x570
>     	 try_to_free_mem_cgroup_pages+0xc6/0x160
>     	 mem_cgroup_reclaim+0x6b/0x180 [kpatch_cumulative_26_1_r1]
>     	 try_charge+0x18d/0x4e0 [kpatch_cumulative_26_1_r1]
>     	 mem_cgroup_try_charge+0x78/0x130
>     	 add_to_page_cache_locked+0x97/0x300
>     	 alloc_pages_current+0xaa/0x170
>     	 add_to_page_cache_lru+0x37/0xb0
>     	 grab_cache_page_write_begin+0x89/0xd0
>     	 ext4_da_write_begin+0xad/0x3a0 [ext4]
>     	 generic_file_buffered_write_iter+0x107/0x280
>     	 generic_file_write_iter+0x183/0x3c0
>     	 generic_file_aio_write+0x8b/0xb0
>     	 generic_file_aio_write+0x59/0xa0
>     	 ext4_file_write+0xdb/0x470 [ext4]
>     	 set_fd_set+0x21/0x30
>     	 core_sys_select+0x245/0x300
>     	 do_sync_write+0x90/0xe0
>     	 vfs_write+0xbd/0x1e0
>     	 SyS_write+0x7f/0xe0
>     	 system_call_fastpath+0x16/0x1b
>
>     Use READ_ONCE to cache sysctl_vfs_cache_min_ratio value into the
>     local variable, so we could use the cached value and not worry
>     about racy sysctl_vfs_cache_min_ratio updates.
>
>     https://jira.sw.ru/browse/PSBM-69018
>     Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> ---
>  fs/super.c                 | 7 ++++---
>  include/linux/memcontrol.h | 5 +++--
>  mm/memcontrol.c            | 4 ++--
>  3 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/fs/super.c b/fs/super.c
> index 7470621..aefaf15 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -51,18 +51,19 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
>  static bool dcache_is_low(struct mem_cgroup *memcg)
>  {
>  	unsigned long anon, file, dcache;
> +	int vfs_cache_min_ratio = READ_ONCE(sysctl_vfs_cache_min_ratio);
>
> -	if (sysctl_vfs_cache_min_ratio <= 0)
> +	if (vfs_cache_min_ratio <= 0)
>  		return false;
>
>  	if (memcg)
> -		return mem_cgroup_dcache_is_low(memcg);
> +		return mem_cgroup_dcache_is_low(memcg, vfs_cache_min_ratio);
>
>  	anon = global_page_state(NR_ANON_PAGES);
>  	file = global_page_state(NR_FILE_PAGES);
>  	dcache = global_page_state(NR_SLAB_RECLAIMABLE);
>
> -	return dcache / sysctl_vfs_cache_min_ratio <
> +	return dcache / vfs_cache_min_ratio <
>  			(anon + file + dcache) / 100;
>  }
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index f6747e4..1a52e58 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -96,7 +96,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
>  /*
>   * For memory reclaim.
>   */
> -bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg);
> +bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio);
>  bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
>  bool mem_cgroup_cleancache_disabled(struct page *page);
>  int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
> @@ -315,7 +315,8 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
>  {
>  }
>
> -static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
> +static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg,
> +	int vfs_cache_min_ratio)
>  {
>  	return false;
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1bc092..2dfe420 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1585,7 +1585,7 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
>  	return inactive * inactive_ratio < active;
>  }
>
> -bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
> +bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio)
>  {
>  	unsigned long anon, file, dcache;
>
> @@ -1593,7 +1593,7 @@ bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
>  	file = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
>  	dcache = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
>
> -	return dcache / sysctl_vfs_cache_min_ratio <
> +	return dcache / vfs_cache_min_ratio <
>  			(anon + file + dcache) / 100;
>  }
>
> .
>


More information about the Devel mailing list