[Devel] [patch rh7 1/2] vdso: x86 -- Eliminate race in vdso allocation, v2

Cyrill Gorcunov gorcunov at virtuozzo.com
Tue Jun 23 15:17:46 PDT 2015


As noted by vdavydov@ we've a race here if two tasks are trying to
allocate vdso concurently. Fix it using lockless test and mutex for
vdso allocation.

https://jira.sw.ru/browse/PSBM-30093
https://bugzilla.openvz.org/show_bug.cgi?id=2768

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
CC: Vladimir Davydov <vdavydov at virtuozzo.com>
CC: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 arch/x86/vdso/vdso32-setup.c |   48 ++++++++++++++++++++++++++++++-----------
 arch/x86/vdso/vma.c          |   50 ++++++++++++++++++++++++++++++-------------
 2 files changed, 71 insertions(+), 27 deletions(-)

Index: linux-pcs7.git/arch/x86/vdso/vdso32-setup.c
===================================================================
--- linux-pcs7.git.orig/arch/x86/vdso/vdso32-setup.c
+++ linux-pcs7.git/arch/x86/vdso/vdso32-setup.c
@@ -307,6 +307,8 @@ int __init sysenter_setup(void)
 	return 0;
 }
 
+static DEFINE_MUTEX(vdso32_mutex);
+
 static struct page **uts_prep_vdso_pages_locked(int map)
 {
 	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
@@ -314,6 +316,7 @@ static struct page **uts_prep_vdso_pages
 	struct ve_struct *ve = get_exec_env();
 	struct page **pages = vdso32_pages;
 	int n1, n2, n3, new_version;
+	struct page **new_pages, **p;
 	void *addr;
 
 	/*
@@ -321,7 +324,16 @@ static struct page **uts_prep_vdso_pages
 	 */
 	if (uts_ns == &init_uts_ns)
 		return vdso32_pages;
-	else if (uts_ns->vdso32.pages)
+
+	/*
+	 * Dirty lockless hack. Strictly speaking
+	 * we need to return @p here if it's non-nil,
+	 * but since there only one trasition possible
+	 * { =0 ; !=0 } we simply return @uts_ns->vdso32.pages
+	 */
+	p = ACCESS_ONCE(uts_ns->vdso32.pages);
+	smp_read_barrier_depends();
+	if (p)
 		return uts_ns->vdso32.pages;
 
 	up_write(&mm->mmap_sem);
@@ -346,41 +358,51 @@ static struct page **uts_prep_vdso_pages
 		goto out;
 	}
 
+	mutex_lock(&vdso32_mutex);
+	if (uts_ns->vdso32.pages) {
+		pages = uts_ns->vdso32.pages;
+		goto out_unlock;
+	}
+
 	uts_ns->vdso32.nr_pages		= 1;
 	uts_ns->vdso32.size		= PAGE_SIZE;
 	uts_ns->vdso32.version_off	= (unsigned long)VDSO32_SYMBOL(0, linux_version_code);
-	uts_ns->vdso32.pages		= kmalloc(sizeof(struct page *), GFP_KERNEL);
-	if (!uts_ns->vdso32.pages) {
+	new_pages			= kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!new_pages) {
 		pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
 		pages = ERR_PTR(-ENOMEM);
-		goto out;
+		goto out_unlock;
 	}
 
-	uts_ns->vdso32.pages[0] = alloc_page(GFP_KERNEL);
-	if (!uts_ns->vdso32.pages[0]) {
+	new_pages[0] = alloc_page(GFP_KERNEL);
+	if (!new_pages[0]) {
 		pr_err("Can't allocate page for VE %d\n", ve->veid);
-		kfree(uts_ns->vdso32.pages);
-		uts_ns->vdso32.pages = NULL;
+		kfree(new_pages);
 		pages = ERR_PTR(-ENOMEM);
-		goto out;
+		goto out_unlock;
 	}
 
-	copy_page(page_address(uts_ns->vdso32.pages[0]), page_address(vdso32_pages[0]));
-	pages = uts_ns->vdso32.pages;
+	copy_page(page_address(new_pages[0]), page_address(vdso32_pages[0]));
 
-	addr = page_address(uts_ns->vdso32.pages[0]);
+	addr = page_address(new_pages[0]);
 	*((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+	smp_wmb();
+
+	pages = uts_ns->vdso32.pages = new_pages;
+
 	pr_debug("vDSO version transition %d -> %d for VE %d\n",
 		 LINUX_VERSION_CODE, new_version, ve->veid);
 
 #ifdef CONFIG_X86_32
-	__set_fixmap(FIX_VDSO, page_to_pfn(uts_ns->vdso32.pages[0]) << PAGE_SHIFT,
+	__set_fixmap(FIX_VDSO, page_to_pfn(new_pages[0]) << PAGE_SHIFT,
 		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
 
 	/* flush stray tlbs */
 	flush_tlb_all();
 #endif
 
+out_unlock:
+	mutex_unlock(&vdso32_mutex);
 out:
 	down_write(&mm->mmap_sem);
 	return pages;
Index: linux-pcs7.git/arch/x86/vdso/vma.c
===================================================================
--- linux-pcs7.git.orig/arch/x86/vdso/vma.c
+++ linux-pcs7.git/arch/x86/vdso/vma.c
@@ -194,11 +194,14 @@ up_fail:
 	return ret;
 }
 
+static DEFINE_MUTEX(vdso_mutex);
+
 static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
 	struct ve_struct *ve = get_exec_env();
 	int i, n1, n2, n3, new_version;
+	struct page **new_pages, **p;
 
 	/*
 	 * For node or in case we've not changed UTS simply
@@ -209,7 +212,15 @@ static int uts_arch_setup_additional_pag
 	 */
 	if (uts_ns == &init_uts_ns)
 		goto map_init_uts;
-	else if (uts_ns->vdso.pages)
+	/*
+	 * Dirty lockless hack. Strictly speaking
+	 * we need to return @p here if it's non-nil,
+	 * but since there only one trasition possible
+	 * { =0 ; !=0 } we simply return @uts_ns->vdso.pages
+	 */
+	p = ACCESS_ONCE(uts_ns->vdso.pages);
+	smp_read_barrier_depends();
+	if (p)
 		goto map_uts;
 
 	if (sscanf(uts_ns->name.release, "%d.%d.%d", &n1, &n2, &n3) == 3) {
@@ -232,13 +243,19 @@ static int uts_arch_setup_additional_pag
 		goto map_init_uts;
 	}
 
+	mutex_lock(&vdso_mutex);
+	if (uts_ns->vdso.pages) {
+		mutex_unlock(&vdso_mutex);
+		goto map_uts;
+	}
+
 	uts_ns->vdso.nr_pages	= init_uts_ns.vdso.nr_pages;
 	uts_ns->vdso.size	= init_uts_ns.vdso.size;
 	uts_ns->vdso.version_off= init_uts_ns.vdso.version_off;
-	uts_ns->vdso.pages	= kmalloc(sizeof(struct page *) * init_uts_ns.vdso.nr_pages, GFP_KERNEL);
-	if (!uts_ns->vdso.pages) {
+	new_pages		= kmalloc(sizeof(struct page *) * init_uts_ns.vdso.nr_pages, GFP_KERNEL);
+	if (!new_pages) {
 		pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
-		return -ENOMEM;
+		goto out_unlock;
 	}
 
 	for (i = 0; i < uts_ns->vdso.nr_pages; i++) {
@@ -246,26 +263,28 @@ static int uts_arch_setup_additional_pag
 		if (!p) {
 			pr_err("Can't allocate page for VE %d\n", ve->veid);
 			for (; i > 0; i--)
-				put_page(uts_ns->vdso.pages[i - 1]);
-			kfree(uts_ns->vdso.pages);
-			uts_ns->vdso.pages = NULL;
-			return -ENOMEM;
+				put_page(new_pages[i - 1]);
+			kfree(new_pages);
+			goto out_unlock;
 		}
-		uts_ns->vdso.pages[i] = p;
+		new_pages[i] = p;
 		copy_page(page_address(p), page_address(init_uts_ns.vdso.pages[i]));
 	}
 
-	uts_ns->vdso.addr = vmap(uts_ns->vdso.pages, uts_ns->vdso.nr_pages, 0, PAGE_KERNEL);
+	uts_ns->vdso.addr = vmap(new_pages, uts_ns->vdso.nr_pages, 0, PAGE_KERNEL);
 	if (!uts_ns->vdso.addr) {
 		pr_err("Can't map vDSO pages for VE %d\n", ve->veid);
 		for (i = 0; i < uts_ns->vdso.nr_pages; i++)
-			put_page(uts_ns->vdso.pages[i]);
-		kfree(uts_ns->vdso.pages);
-		uts_ns->vdso.pages = NULL;
-		return -ENOMEM;
+			put_page(new_pages[i]);
+		kfree(new_pages);
+		goto out_unlock;
 	}
 
 	*((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version;
+	smp_wmb();
+	uts_ns->vdso.pages = new_pages;
+	mutex_unlock(&vdso_mutex);
+
 	pr_debug("vDSO version transition %d -> %d for VE %d\n",
 		 LINUX_VERSION_CODE, new_version, ve->veid);
 
@@ -273,6 +292,9 @@ map_uts:
 	return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages, uts_ns->vdso.size);
 map_init_uts:
 	return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages, init_uts_ns.vdso.size);
+out_unlock:
+	mutex_unlock(&vdso_mutex);
+	return -ENOMEM;
 }
 
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)




More information about the Devel mailing list