[Devel] [PATCH RHEL7 COMMIT] arch/x86: only enable cpuid faulting for containers

Konstantin Khorenko khorenko at virtuozzo.com
Tue Oct 20 09:00:21 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.8.9
------>
commit eaa837c8e6a3ea9910a3310973be4583f589a1c1
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Tue Oct 20 20:00:21 2015 +0400

    arch/x86: only enable cpuid faulting for containers
    
    Port diff-arch-x86-only-enable-cpuid-faulting-for-containers
    
    Hypervisors aren't ready for the fact that CPUID may fault, so let's
    only enable the feature for processes inside containers.
    
    The feature is flipped on context switch. To avoid overhead, we cache
    the current state (enabled/disabled) in a per cpu var. We also do not
    flip it when switching to a kernel thread (e.g. idle task). As a result,
    the overhead will be negligible unless VMs and containers are not mixed
    on the same cpu.
    
    Note, the behaviour of /proc/cpuinfo already conforms to this change -
    we respect masking only inside containers.
    
    https://jira.sw.ru/browse/PSBM-30516
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    
    Reviewed-by: Denis V. Lunev <den at openvz.org>
    
    Q from den@:
    If mm created (inherited through fork()) with another CPU ID (is VE0) resides
    in container and this feature is on, migration should be prohibited.
    
    Input from xemul@:
    in case a process entered CT and did not exec, it has glibc files pointed to
    outside => no online migration.
    =============================================================================
    
    Changes in port:
     - merge diff-ve-arch-x86-only-enable-cpuid-faulting-for-containers-a
     - use on_each_cpu for toggling running tasks' cpuid faulting feature
    
    https://jira.sw.ru/browse/PSBM-33638
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 arch/x86/include/asm/processor.h |  3 +++
 arch/x86/kernel/cpu/intel.c      | 19 ++++++++++++++-----
 arch/x86/kernel/cpuid_fault.c    | 23 +++++++++++++++++++++++
 kernel/sched/core.c              |  4 ++++
 kernel/ve/ve.c                   |  8 ++++++++
 5 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 37db43e..e3b63b9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -975,4 +975,7 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 
+extern void (*set_cpuid_faulting_cb)(bool enable);
+extern void set_cpuid_faulting(bool enable);
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 98da793..0c94651 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -352,20 +352,29 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
 	}
 }
 
+static void intel_set_cpuid_faulting(bool enable)
+{
+	unsigned int l1, l2;
+
+	rdmsr(MSR_MISC_FEATURES_ENABLES, l1, l2);
+	l1 &= ~1;
+	if (enable)
+		l1 |= 1;
+	wrmsr(MSR_MISC_FEATURES_ENABLES, l1, l2);
+}
+
 static void intel_cpuid_faulting_init(struct cpuinfo_x86 *c)
 {
 	unsigned int l1, l2;
 
-	/* check if cpuid faulting is supported */
 	if (rdmsr_safe(MSR_PLATFORM_INFO, &l1, &l2) != 0 ||
 	    !(l1 & (1 << 31)))
 		return;
 
-	/* enable cpuid faulting */
-	rdmsr(MSR_MISC_FEATURES_ENABLES, l1, l2);
-	wrmsr(MSR_MISC_FEATURES_ENABLES, l1 | 1, l2);
-
 	set_cpu_cap(c, X86_FEATURE_CPUID_FAULTING);
+	set_cpuid_faulting_cb = intel_set_cpuid_faulting;
+
+	intel_set_cpuid_faulting(false);
 }
 
 static void init_intel(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpuid_fault.c b/arch/x86/kernel/cpuid_fault.c
index 34dda40..fdfbf94 100644
--- a/arch/x86/kernel/cpuid_fault.c
+++ b/arch/x86/kernel/cpuid_fault.c
@@ -29,6 +29,29 @@ struct cpuid_override_table {
 static struct cpuid_override_table __rcu *cpuid_override;
 static DEFINE_SPINLOCK(cpuid_override_lock);
 
+#define cpuid_override_active		(!!rcu_access_pointer(cpuid_override))
+
+void (*set_cpuid_faulting_cb)(bool enable);
+static DEFINE_PER_CPU(bool, cpuid_faulting_enabled);
+
+void set_cpuid_faulting(bool enable)
+{
+	bool *enabled;
+
+	if (!cpu_has_cpuid_faulting)
+		return;
+	if (!cpuid_override_active)
+		enable = false;
+
+	enabled = &get_cpu_var(cpuid_faulting_enabled);
+	if (*enabled != enable) {
+		set_cpuid_faulting_cb(enable);
+		*enabled = enable;
+	}
+	put_cpu_var(cpuid_faulting_enabled);
+}
+EXPORT_SYMBOL(set_cpuid_faulting);
+
 static void cpuid_override_update(struct cpuid_override_table *new_table)
 {
 	struct cpuid_override_table *old_table;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1277c18..57649dd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2200,6 +2200,10 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	}
 
 	tick_nohz_task_switch(current);
+
+	/* kernel threads don't care about cpuid faulting */
+	if (current->mm)
+		set_cpuid_faulting(!ve_is_super(get_exec_env()));
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 39a95e8..d86efb9 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -736,6 +736,11 @@ static int ve_can_attach(struct cgroup *cg, struct cgroup_taskset *tset)
 	return 0;
 }
 
+static void ve_update_cpuid_faulting(void *dummy)
+{
+	set_cpuid_faulting(!ve_is_super(get_exec_env()));
+}
+
 static void ve_attach(struct cgroup *cg, struct cgroup_taskset *tset)
 {
 	struct ve_struct *ve = cgroup_ve(cg);
@@ -755,6 +760,9 @@ static void ve_attach(struct cgroup *cg, struct cgroup_taskset *tset)
 
 		task->task_ve = ve;
 	}
+
+	/* Adjust cpuid faulting */
+	on_each_cpu(ve_update_cpuid_faulting, NULL, 1);
 }
 
 static int ve_state_read(struct cgroup *cg, struct cftype *cft,



More information about the Devel mailing list