[Devel] [PATCH rh7 4/4] ub: add heuristic check for memory overcommit

Andrey Ryabinin aryabinin at virtuozzo.com
Thu May 5 05:46:45 PDT 2016



On 05/05/2016 01:23 PM, Vladimir Davydov wrote:
> Currently, we only have the hard limit for virtual address space size
> inside containers. This is inflexible, because setting it to a small
> value will cause many normal allocations to fail. BTW that's why it is
> left unlimited by default in Vz7. OTOH allowing an application to
> allocate as much virtual address space as it wants may be bad for some
> application expect to be stopped gracefully by mmap returning ENOMEM
> instead of being killed by OOM.
> 
> So this patch introduces the "heuristic" mode of overcommit accounting
> inside containers similar to the one used on most hosts by default
> (vm.overcommit_memory sysctl set to 0). It can be toggled system-wide by
> changing the value of ubc.overcommit_memory sysctl. Per-beancounter
> configuration is not supported yet, but it may be added later if needed.
> 
> If enabled (ubc.overcommit_memory = 0, this is the default), an
> application inside a container will fail to allocate a virtual address
> range iff its length is greater than the amount of reclaimable memory
> accounted to the container. Note, the UBC_PRIVVMPAGES limit is still
> taken into account. If disabled (ubc.overcommit_memory = 1), only the
> UBC_PRIVVMPAGES limit will be checked.
> 
> https://jira.sw.ru/browse/PSBM-45695
> 
> Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>

Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>


> ---
>  include/bc/vmpages.h    |  4 ++++
>  kernel/bc/beancounter.c |  7 +++++++
>  kernel/bc/vm_pages.c    | 28 ++++++++++++++++++++++++++++
>  mm/memcontrol.c         | 21 +++++++++++++++++++++
>  mm/mmap.c               |  9 +++------
>  5 files changed, 63 insertions(+), 6 deletions(-)
> 
> diff --git a/include/bc/vmpages.h b/include/bc/vmpages.h
> index bf63b885441c..72a5d8ecb94b 100644
> --- a/include/bc/vmpages.h
> +++ b/include/bc/vmpages.h
> @@ -17,6 +17,8 @@
>  #include <bc/beancounter.h>
>  #include <bc/decl.h>
>  
> +extern int ub_overcommit_memory;
> +
>  /*
>   * Check whether vma has private or copy-on-write mapping.
>   */
> @@ -47,4 +49,6 @@ UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
>  UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
>  			unsigned long size))
>  
> +UB_DECLARE_FUNC(int, ub_enough_memory(struct mm_struct *mm, long pages))
> +
>  #endif /* __UB_PAGES_H_ */
> diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
> index 5023bd2b208d..18188f7a42e8 100644
> --- a/kernel/bc/beancounter.c
> +++ b/kernel/bc/beancounter.c
> @@ -1135,6 +1135,13 @@ static ctl_table ub_sysctl_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= &proc_resource_precharge,
>  	},
> +	{
> +		.procname	= "overcommit_memory",
> +		.data		= &ub_overcommit_memory,
> +		.maxlen		= sizeof(ub_overcommit_memory),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
>  #ifdef CONFIG_BC_IO_ACCOUNTING
>  	{
>  		.procname	= "dirty_ratio",
> diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
> index 5e588d1f036c..b04ea13d9fad 100644
> --- a/kernel/bc/vm_pages.c
> +++ b/kernel/bc/vm_pages.c
> @@ -24,6 +24,8 @@
>  #include <bc/vmpages.h>
>  #include <bc/proc.h>
>  
> +int ub_overcommit_memory;
> +
>  int ub_memory_charge(struct mm_struct *mm, unsigned long size,
>  		unsigned vm_flags, struct file *vm_file, int sv)
>  {
> @@ -119,6 +121,32 @@ void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
>  	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
>  }
>  
> +extern int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages);
> +
> +int ub_enough_memory(struct mm_struct *mm, long pages)
> +{
> +	struct user_beancounter *ub;
> +	struct cgroup_subsys_state *css;
> +	int ret;
> +
> +	if (!mm)
> +		return 0;
> +
> +	ub = mm->mm_ub;
> +
> +	if (ub->ub_parms[UB_PRIVVMPAGES].held >
> +	    ub->ub_parms[UB_PRIVVMPAGES].barrier)
> +		return -ENOMEM;
> +
> +	if (ub_overcommit_memory)
> +		return 0;
> +
> +	css = ub_get_mem_css(ub);
> +	ret = mem_cgroup_enough_memory(mem_cgroup_from_cont(css->cgroup), pages);
> +	css_put(css);
> +	return ret;
> +}
> +
>  static int bc_fill_sysinfo(struct user_beancounter *ub,
>  		unsigned long meminfo_val, struct sysinfo *si)
>  {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f52cd8ec02f0..b57705523be1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4976,6 +4976,27 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
>  	mi->shmem = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
>  }
>  
> +int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
> +{
> +	long free;
> +
> +	/* unused memory */
> +	free = (res_counter_read_u64(&memcg->memsw, RES_LIMIT) -
> +		res_counter_read_u64(&memcg->memsw, RES_USAGE)) >> PAGE_SHIFT;
> +
> +	/* reclaimable slabs */
> +	free += res_counter_read_u64(&memcg->dcache, RES_USAGE) >> PAGE_SHIFT;
> +
> +	/* assume file cache is reclaimable */
> +	free += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
> +
> +	/* but do not count shmem pages as they can't be purged,
> +	 * only swapped out */
> +	free -= mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
> +
> +	return free < pages ? -ENOMEM : 0;
> +}
> +
>  static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
>  {
>  	u64 val;
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 417163e18d32..fcd1ea3c327d 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -135,13 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
>  {
>  	unsigned long free, allowed, reserve;
>  
> -	vm_acct_memory(pages);
> +	if (mm && ub_enough_memory(mm, pages) != 0)
> +		return -ENOMEM;
>  
> -#ifdef CONFIG_BEANCOUNTERS
> -	if (mm && mm->mm_ub->ub_parms[UB_PRIVVMPAGES].held <=
> -			mm->mm_ub->ub_parms[UB_VMGUARPAGES].barrier)
> -		return 0;
> -#endif
> +	vm_acct_memory(pages);
>  
>  	/*
>  	 * Sometimes we want to use more memory than we have
> 


More information about the Devel mailing list