[Devel] [patch rh7 1/2] vdso: x86 -- Eliminate race in vdso allocation, v2
Cyrill Gorcunov
gorcunov at virtuozzo.com
Tue Jun 23 15:17:46 PDT 2015
As noted by vdavydov@ we've a race here if two tasks are trying to
allocate vdso concurently. Fix it using lockless test and mutex for
vdso allocation.
https://jira.sw.ru/browse/PSBM-30093
https://bugzilla.openvz.org/show_bug.cgi?id=2768
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
CC: Vladimir Davydov <vdavydov at virtuozzo.com>
CC: Konstantin Khorenko <khorenko at virtuozzo.com>
---
arch/x86/vdso/vdso32-setup.c | 48 ++++++++++++++++++++++++++++++-----------
arch/x86/vdso/vma.c | 50 ++++++++++++++++++++++++++++++-------------
2 files changed, 71 insertions(+), 27 deletions(-)
Index: linux-pcs7.git/arch/x86/vdso/vdso32-setup.c
===================================================================
--- linux-pcs7.git.orig/arch/x86/vdso/vdso32-setup.c
+++ linux-pcs7.git/arch/x86/vdso/vdso32-setup.c
@@ -307,6 +307,8 @@ int __init sysenter_setup(void)
return 0;
}
+static DEFINE_MUTEX(vdso32_mutex);
+
static struct page **uts_prep_vdso_pages_locked(int map)
{
struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
@@ -314,6 +316,7 @@ static struct page **uts_prep_vdso_pages
struct ve_struct *ve = get_exec_env();
struct page **pages = vdso32_pages;
int n1, n2, n3, new_version;
+ struct page **new_pages, **p;
void *addr;
/*
@@ -321,7 +324,16 @@ static struct page **uts_prep_vdso_pages
*/
if (uts_ns == &init_uts_ns)
return vdso32_pages;
- else if (uts_ns->vdso32.pages)
+
+ /*
+ * Dirty lockless hack. Strictly speaking
+ * we need to return @p here if it's non-nil,
+ * but since there only one trasition possible
+ * { =0 ; !=0 } we simply return @uts_ns->vdso32.pages
+ */
+ p = ACCESS_ONCE(uts_ns->vdso32.pages);
+ smp_read_barrier_depends();
+ if (p)
return uts_ns->vdso32.pages;
up_write(&mm->mmap_sem);
@@ -346,41 +358,51 @@ static struct page **uts_prep_vdso_pages
goto out;
}
+ mutex_lock(&vdso32_mutex);
+ if (uts_ns->vdso32.pages) {
+ pages = uts_ns->vdso32.pages;
+ goto out_unlock;
+ }
+
uts_ns->vdso32.nr_pages = 1;
uts_ns->vdso32.size = PAGE_SIZE;
uts_ns->vdso32.version_off = (unsigned long)VDSO32_SYMBOL(0, linux_version_code);
- uts_ns->vdso32.pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
- if (!uts_ns->vdso32.pages) {
+ new_pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+ if (!new_pages) {
pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
pages = ERR_PTR(-ENOMEM);
- goto out;
+ goto out_unlock;
}
- uts_ns->vdso32.pages[0] = alloc_page(GFP_KERNEL);
- if (!uts_ns->vdso32.pages[0]) {
+ new_pages[0] = alloc_page(GFP_KERNEL);
+ if (!new_pages[0]) {
pr_err("Can't allocate page for VE %d\n", ve->veid);
- kfree(uts_ns->vdso32.pages);
- uts_ns->vdso32.pages = NULL;
+ kfree(new_pages);
pages = ERR_PTR(-ENOMEM);
- goto out;
+ goto out_unlock;
}
- copy_page(page_address(uts_ns->vdso32.pages[0]), page_address(vdso32_pages[0]));
- pages = uts_ns->vdso32.pages;
+ copy_page(page_address(new_pages[0]), page_address(vdso32_pages[0]));
- addr = page_address(uts_ns->vdso32.pages[0]);
+ addr = page_address(new_pages[0]);
*((int *)(addr + uts_ns->vdso32.version_off)) = new_version;
+ smp_wmb();
+
+ pages = uts_ns->vdso32.pages = new_pages;
+
pr_debug("vDSO version transition %d -> %d for VE %d\n",
LINUX_VERSION_CODE, new_version, ve->veid);
#ifdef CONFIG_X86_32
- __set_fixmap(FIX_VDSO, page_to_pfn(uts_ns->vdso32.pages[0]) << PAGE_SHIFT,
+ __set_fixmap(FIX_VDSO, page_to_pfn(new_pages[0]) << PAGE_SHIFT,
map ? PAGE_READONLY_EXEC : PAGE_NONE);
/* flush stray tlbs */
flush_tlb_all();
#endif
+out_unlock:
+ mutex_unlock(&vdso32_mutex);
out:
down_write(&mm->mmap_sem);
return pages;
Index: linux-pcs7.git/arch/x86/vdso/vma.c
===================================================================
--- linux-pcs7.git.orig/arch/x86/vdso/vma.c
+++ linux-pcs7.git/arch/x86/vdso/vma.c
@@ -194,11 +194,14 @@ up_fail:
return ret;
}
+static DEFINE_MUTEX(vdso_mutex);
+
static int uts_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
struct ve_struct *ve = get_exec_env();
int i, n1, n2, n3, new_version;
+ struct page **new_pages, **p;
/*
* For node or in case we've not changed UTS simply
@@ -209,7 +212,15 @@ static int uts_arch_setup_additional_pag
*/
if (uts_ns == &init_uts_ns)
goto map_init_uts;
- else if (uts_ns->vdso.pages)
+ /*
+ * Dirty lockless hack. Strictly speaking
+ * we need to return @p here if it's non-nil,
+ * but since there only one trasition possible
+ * { =0 ; !=0 } we simply return @uts_ns->vdso.pages
+ */
+ p = ACCESS_ONCE(uts_ns->vdso.pages);
+ smp_read_barrier_depends();
+ if (p)
goto map_uts;
if (sscanf(uts_ns->name.release, "%d.%d.%d", &n1, &n2, &n3) == 3) {
@@ -232,13 +243,19 @@ static int uts_arch_setup_additional_pag
goto map_init_uts;
}
+ mutex_lock(&vdso_mutex);
+ if (uts_ns->vdso.pages) {
+ mutex_unlock(&vdso_mutex);
+ goto map_uts;
+ }
+
uts_ns->vdso.nr_pages = init_uts_ns.vdso.nr_pages;
uts_ns->vdso.size = init_uts_ns.vdso.size;
uts_ns->vdso.version_off= init_uts_ns.vdso.version_off;
- uts_ns->vdso.pages = kmalloc(sizeof(struct page *) * init_uts_ns.vdso.nr_pages, GFP_KERNEL);
- if (!uts_ns->vdso.pages) {
+ new_pages = kmalloc(sizeof(struct page *) * init_uts_ns.vdso.nr_pages, GFP_KERNEL);
+ if (!new_pages) {
pr_err("Can't allocate vDSO pages array for VE %d\n", ve->veid);
- return -ENOMEM;
+ goto out_unlock;
}
for (i = 0; i < uts_ns->vdso.nr_pages; i++) {
@@ -246,26 +263,28 @@ static int uts_arch_setup_additional_pag
if (!p) {
pr_err("Can't allocate page for VE %d\n", ve->veid);
for (; i > 0; i--)
- put_page(uts_ns->vdso.pages[i - 1]);
- kfree(uts_ns->vdso.pages);
- uts_ns->vdso.pages = NULL;
- return -ENOMEM;
+ put_page(new_pages[i - 1]);
+ kfree(new_pages);
+ goto out_unlock;
}
- uts_ns->vdso.pages[i] = p;
+ new_pages[i] = p;
copy_page(page_address(p), page_address(init_uts_ns.vdso.pages[i]));
}
- uts_ns->vdso.addr = vmap(uts_ns->vdso.pages, uts_ns->vdso.nr_pages, 0, PAGE_KERNEL);
+ uts_ns->vdso.addr = vmap(new_pages, uts_ns->vdso.nr_pages, 0, PAGE_KERNEL);
if (!uts_ns->vdso.addr) {
pr_err("Can't map vDSO pages for VE %d\n", ve->veid);
for (i = 0; i < uts_ns->vdso.nr_pages; i++)
- put_page(uts_ns->vdso.pages[i]);
- kfree(uts_ns->vdso.pages);
- uts_ns->vdso.pages = NULL;
- return -ENOMEM;
+ put_page(new_pages[i]);
+ kfree(new_pages);
+ goto out_unlock;
}
*((int *)(uts_ns->vdso.addr + uts_ns->vdso.version_off)) = new_version;
+ smp_wmb();
+ uts_ns->vdso.pages = new_pages;
+ mutex_unlock(&vdso_mutex);
+
pr_debug("vDSO version transition %d -> %d for VE %d\n",
LINUX_VERSION_CODE, new_version, ve->veid);
@@ -273,6 +292,9 @@ map_uts:
return setup_additional_pages(bprm, uses_interp, uts_ns->vdso.pages, uts_ns->vdso.size);
map_init_uts:
return setup_additional_pages(bprm, uses_interp, init_uts_ns.vdso.pages, init_uts_ns.vdso.size);
+out_unlock:
+ mutex_unlock(&vdso_mutex);
+ return -ENOMEM;
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
More information about the Devel
mailing list