[Devel] [PATCH rh7 4/4] ub: add heuristic check for memory overcommit
Andrey Ryabinin
aryabinin at virtuozzo.com
Thu May 5 05:46:45 PDT 2016
On 05/05/2016 01:23 PM, Vladimir Davydov wrote:
> Currently, we only have the hard limit for virtual address space size
> inside containers. This is inflexible, because setting it to a small
> value will cause many normal allocations to fail. BTW that's why it is
> left unlimited by default in Vz7. OTOH allowing an application to
> allocate as much virtual address space as it wants may be bad for some
> application expect to be stopped gracefully by mmap returning ENOMEM
> instead of being killed by OOM.
>
> So this patch introduces the "heuristic" mode of overcommit accounting
> inside containers similar to the one used on most hosts by default
> (vm.overcommit_memory sysctl set to 0). It can be toggled system-wide by
> changing the value of ubc.overcommit_memory sysctl. Per-beancounter
> configuration is not supported yet, but it may be added later if needed.
>
> If enabled (ubc.overcommit_memory = 0, this is the default), an
> application inside a container will fail to allocate a virtual address
> range iff its length is greater than the amount of reclaimable memory
> accounted to the container. Note, the UBC_PRIVVMPAGES limit is still
> taken into account. If disabled (ubc.overcommit_memory = 1), only the
> UBC_PRIVVMPAGES limit will be checked.
>
> https://jira.sw.ru/browse/PSBM-45695
>
> Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> ---
> include/bc/vmpages.h | 4 ++++
> kernel/bc/beancounter.c | 7 +++++++
> kernel/bc/vm_pages.c | 28 ++++++++++++++++++++++++++++
> mm/memcontrol.c | 21 +++++++++++++++++++++
> mm/mmap.c | 9 +++------
> 5 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/include/bc/vmpages.h b/include/bc/vmpages.h
> index bf63b885441c..72a5d8ecb94b 100644
> --- a/include/bc/vmpages.h
> +++ b/include/bc/vmpages.h
> @@ -17,6 +17,8 @@
> #include <bc/beancounter.h>
> #include <bc/decl.h>
>
> +extern int ub_overcommit_memory;
> +
> /*
> * Check whether vma has private or copy-on-write mapping.
> */
> @@ -47,4 +49,6 @@ UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
> UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
> unsigned long size))
>
> +UB_DECLARE_FUNC(int, ub_enough_memory(struct mm_struct *mm, long pages))
> +
> #endif /* __UB_PAGES_H_ */
> diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
> index 5023bd2b208d..18188f7a42e8 100644
> --- a/kernel/bc/beancounter.c
> +++ b/kernel/bc/beancounter.c
> @@ -1135,6 +1135,13 @@ static ctl_table ub_sysctl_table[] = {
> .mode = 0644,
> .proc_handler = &proc_resource_precharge,
> },
> + {
> + .procname = "overcommit_memory",
> + .data = &ub_overcommit_memory,
> + .maxlen = sizeof(ub_overcommit_memory),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> #ifdef CONFIG_BC_IO_ACCOUNTING
> {
> .procname = "dirty_ratio",
> diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
> index 5e588d1f036c..b04ea13d9fad 100644
> --- a/kernel/bc/vm_pages.c
> +++ b/kernel/bc/vm_pages.c
> @@ -24,6 +24,8 @@
> #include <bc/vmpages.h>
> #include <bc/proc.h>
>
> +int ub_overcommit_memory;
> +
> int ub_memory_charge(struct mm_struct *mm, unsigned long size,
> unsigned vm_flags, struct file *vm_file, int sv)
> {
> @@ -119,6 +121,32 @@ void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
> uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
> }
>
> +extern int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages);
> +
> +int ub_enough_memory(struct mm_struct *mm, long pages)
> +{
> + struct user_beancounter *ub;
> + struct cgroup_subsys_state *css;
> + int ret;
> +
> + if (!mm)
> + return 0;
> +
> + ub = mm->mm_ub;
> +
> + if (ub->ub_parms[UB_PRIVVMPAGES].held >
> + ub->ub_parms[UB_PRIVVMPAGES].barrier)
> + return -ENOMEM;
> +
> + if (ub_overcommit_memory)
> + return 0;
> +
> + css = ub_get_mem_css(ub);
> + ret = mem_cgroup_enough_memory(mem_cgroup_from_cont(css->cgroup), pages);
> + css_put(css);
> + return ret;
> +}
> +
> static int bc_fill_sysinfo(struct user_beancounter *ub,
> unsigned long meminfo_val, struct sysinfo *si)
> {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f52cd8ec02f0..b57705523be1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4976,6 +4976,27 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
> mi->shmem = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
> }
>
> +int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
> +{
> + long free;
> +
> + /* unused memory */
> + free = (res_counter_read_u64(&memcg->memsw, RES_LIMIT) -
> + res_counter_read_u64(&memcg->memsw, RES_USAGE)) >> PAGE_SHIFT;
> +
> + /* reclaimable slabs */
> + free += res_counter_read_u64(&memcg->dcache, RES_USAGE) >> PAGE_SHIFT;
> +
> + /* assume file cache is reclaimable */
> + free += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
> +
> + /* but do not count shmem pages as they can't be purged,
> + * only swapped out */
> + free -= mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
> +
> + return free < pages ? -ENOMEM : 0;
> +}
> +
> static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
> {
> u64 val;
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 417163e18d32..fcd1ea3c327d 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -135,13 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
> {
> unsigned long free, allowed, reserve;
>
> - vm_acct_memory(pages);
> + if (mm && ub_enough_memory(mm, pages) != 0)
> + return -ENOMEM;
>
> -#ifdef CONFIG_BEANCOUNTERS
> - if (mm && mm->mm_ub->ub_parms[UB_PRIVVMPAGES].held <=
> - mm->mm_ub->ub_parms[UB_VMGUARPAGES].barrier)
> - return 0;
> -#endif
> + vm_acct_memory(pages);
>
> /*
> * Sometimes we want to use more memory than we have
>
More information about the Devel
mailing list