[Devel] [PATCH rh7 4/4] ub: add heuristic check for memory overcommit

Vladimir Davydov vdavydov at virtuozzo.com
Thu May 5 03:23:29 PDT 2016


Currently, we only have the hard limit for virtual address space size
inside containers. This is inflexible, because setting it to a small
value will cause many normal allocations to fail. BTW that's why it is
left unlimited by default in Vz7. OTOH allowing an application to
allocate as much virtual address space as it wants may be bad for some
application expect to be stopped gracefully by mmap returning ENOMEM
instead of being killed by OOM.

So this patch introduces the "heuristic" mode of overcommit accounting
inside containers similar to the one used on most hosts by default
(vm.overcommit_memory sysctl set to 0). It can be toggled system-wide by
changing the value of ubc.overcommit_memory sysctl. Per-beancounter
configuration is not supported yet, but it may be added later if needed.

If enabled (ubc.overcommit_memory = 0, this is the default), an
application inside a container will fail to allocate a virtual address
range iff its length is greater than the amount of reclaimable memory
accounted to the container. Note, the UBC_PRIVVMPAGES limit is still
taken into account. If disabled (ubc.overcommit_memory = 1), only the
UBC_PRIVVMPAGES limit will be checked.

https://jira.sw.ru/browse/PSBM-45695

Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 include/bc/vmpages.h    |  4 ++++
 kernel/bc/beancounter.c |  7 +++++++
 kernel/bc/vm_pages.c    | 28 ++++++++++++++++++++++++++++
 mm/memcontrol.c         | 21 +++++++++++++++++++++
 mm/mmap.c               |  9 +++------
 5 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/include/bc/vmpages.h b/include/bc/vmpages.h
index bf63b885441c..72a5d8ecb94b 100644
--- a/include/bc/vmpages.h
+++ b/include/bc/vmpages.h
@@ -17,6 +17,8 @@
 #include <bc/beancounter.h>
 #include <bc/decl.h>
 
+extern int ub_overcommit_memory;
+
 /*
  * Check whether vma has private or copy-on-write mapping.
  */
@@ -47,4 +49,6 @@ UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
 UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
 			unsigned long size))
 
+UB_DECLARE_FUNC(int, ub_enough_memory(struct mm_struct *mm, long pages))
+
 #endif /* __UB_PAGES_H_ */
diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
index 5023bd2b208d..18188f7a42e8 100644
--- a/kernel/bc/beancounter.c
+++ b/kernel/bc/beancounter.c
@@ -1135,6 +1135,13 @@ static ctl_table ub_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_resource_precharge,
 	},
+	{
+		.procname	= "overcommit_memory",
+		.data		= &ub_overcommit_memory,
+		.maxlen		= sizeof(ub_overcommit_memory),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_BC_IO_ACCOUNTING
 	{
 		.procname	= "dirty_ratio",
diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
index 5e588d1f036c..b04ea13d9fad 100644
--- a/kernel/bc/vm_pages.c
+++ b/kernel/bc/vm_pages.c
@@ -24,6 +24,8 @@
 #include <bc/vmpages.h>
 #include <bc/proc.h>
 
+int ub_overcommit_memory;
+
 int ub_memory_charge(struct mm_struct *mm, unsigned long size,
 		unsigned vm_flags, struct file *vm_file, int sv)
 {
@@ -119,6 +121,32 @@ void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
 	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
 }
 
+extern int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages);
+
+int ub_enough_memory(struct mm_struct *mm, long pages)
+{
+	struct user_beancounter *ub;
+	struct cgroup_subsys_state *css;
+	int ret;
+
+	if (!mm)
+		return 0;
+
+	ub = mm->mm_ub;
+
+	if (ub->ub_parms[UB_PRIVVMPAGES].held >
+	    ub->ub_parms[UB_PRIVVMPAGES].barrier)
+		return -ENOMEM;
+
+	if (ub_overcommit_memory)
+		return 0;
+
+	css = ub_get_mem_css(ub);
+	ret = mem_cgroup_enough_memory(mem_cgroup_from_cont(css->cgroup), pages);
+	css_put(css);
+	return ret;
+}
+
 static int bc_fill_sysinfo(struct user_beancounter *ub,
 		unsigned long meminfo_val, struct sysinfo *si)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f52cd8ec02f0..b57705523be1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4976,6 +4976,27 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
 	mi->shmem = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
 }
 
+int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
+{
+	long free;
+
+	/* unused memory */
+	free = (res_counter_read_u64(&memcg->memsw, RES_LIMIT) -
+		res_counter_read_u64(&memcg->memsw, RES_USAGE)) >> PAGE_SHIFT;
+
+	/* reclaimable slabs */
+	free += res_counter_read_u64(&memcg->dcache, RES_USAGE) >> PAGE_SHIFT;
+
+	/* assume file cache is reclaimable */
+	free += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+
+	/* but do not count shmem pages as they can't be purged,
+	 * only swapped out */
+	free -= mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
+
+	return free < pages ? -ENOMEM : 0;
+}
+
 static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
 	u64 val;
diff --git a/mm/mmap.c b/mm/mmap.c
index 417163e18d32..fcd1ea3c327d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -135,13 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
 	unsigned long free, allowed, reserve;
 
-	vm_acct_memory(pages);
+	if (mm && ub_enough_memory(mm, pages) != 0)
+		return -ENOMEM;
 
-#ifdef CONFIG_BEANCOUNTERS
-	if (mm && mm->mm_ub->ub_parms[UB_PRIVVMPAGES].held <=
-			mm->mm_ub->ub_parms[UB_VMGUARPAGES].barrier)
-		return 0;
-#endif
+	vm_acct_memory(pages);
 
 	/*
 	 * Sometimes we want to use more memory than we have
-- 
2.1.4



More information about the Devel mailing list