[Devel] [PATCH vz8 2/3] x86: make ARCH_[SET|GET]_CPUID friends with /proc/vz/cpuid_override

Andrey Ryabinin aryabinin at virtuozzo.com
Fri Oct 30 14:45:14 MSK 2020


We are using cpuid faults to emulate cpuid in containers. This
conflicts with arch_prctl(ARCH_SET_CPUID, 0) which allows to enable
cpuid faulting so that cpuid instruction causes SIGSEGV.

Add TIF_CPUID_OVERRIDE thread info flag which is added on all
!ve0 tasks. And check this flag along with TIF_NOCPUID to
decide whether we need to enable/disable cpuid faults or not.

https://jira.sw.ru/browse/PSBM-121823
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 arch/x86/include/asm/thread_info.h |  4 +++-
 arch/x86/kernel/cpuid_fault.c      |  3 ++-
 arch/x86/kernel/process.c          | 13 +++++++++----
 arch/x86/kernel/traps.c            |  3 +++
 kernel/ve/ve.c                     |  1 +
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c0da378eed8b..6ffb64d25383 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,6 +92,7 @@ struct thread_info {
 #define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
+#define TIF_CPUID_OVERRIDE	18	/* CPUID emulation enabled */
 #define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
@@ -122,6 +123,7 @@ struct thread_info {
 #define _TIF_NOCPUID		(1 << TIF_NOCPUID)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
+#define _TIF_CPUID_OVERRIDE	(1 << TIF_CPUID_OVERRIDE)
 #define _TIF_NOHZ		(1 << TIF_NOHZ)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
@@ -153,7 +155,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE						\
 	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
-	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
+	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_CPUID_OVERRIDE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/kernel/cpuid_fault.c b/arch/x86/kernel/cpuid_fault.c
index 339e2638c3b8..1e8ffacc4412 100644
--- a/arch/x86/kernel/cpuid_fault.c
+++ b/arch/x86/kernel/cpuid_fault.c
@@ -6,7 +6,8 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/ve.h>
-#include <asm/uaccess.h>
+#include <linux/veowner.h>
+#include <linux/uaccess.h>
 
 struct cpuid_override_entry {
 	unsigned int op;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e5c5b1d724ab..788b9b8f8f9c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -209,7 +209,8 @@ static void set_cpuid_faulting(bool on)
 static void disable_cpuid(void)
 {
 	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+	if (!test_and_set_thread_flag(TIF_NOCPUID) ||
+		test_thread_flag(TIF_CPUID_OVERRIDE)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOCPUID in the current running context.
@@ -222,7 +223,8 @@ static void disable_cpuid(void)
 static void enable_cpuid(void)
 {
 	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+	if (test_and_clear_thread_flag(TIF_NOCPUID) &&
+		!test_thread_flag(TIF_CPUID_OVERRIDE)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOCPUID in the current running context.
@@ -505,6 +507,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev, *next;
 	unsigned long tifp, tifn;
+	bool prev_cpuid, next_cpuid;
 
 	prev = &prev_p->thread;
 	next = &next_p->thread;
@@ -529,8 +532,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 	if ((tifp ^ tifn) & _TIF_NOTSC)
 		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
 
-	if ((tifp ^ tifn) & _TIF_NOCPUID)
-		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+	prev_cpuid = (tifp & _TIF_NOCPUID) || (tifp & _TIF_CPUID_OVERRIDE);
+	next_cpuid = (tifn & _TIF_NOCPUID) || (tifn & _TIF_CPUID_OVERRIDE);
+	if (prev_cpuid != next_cpuid)
+		set_cpuid_faulting(next_cpuid);
 
 	if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
 		__speculation_ctrl_update(tifp, tifn);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c43e3b80e50f..d0b379cf0484 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -526,6 +526,9 @@ static int check_cpuid_fault(struct pt_regs *regs, long error_code)
 	if (error_code != 0)
 		return 0;
 
+	if (test_thread_flag(TIF_NOCPUID))
+		return 0;
+
 	addr = convert_ip_to_linear(current, regs);
 	if (get_user(opcode, (unsigned short __user *)addr))
 		return 0;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 935e13340051..4a7a66600da3 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -816,6 +816,7 @@ static void ve_attach(struct cgroup_taskset *tset)
 		/* Leave parent exec domain */
 		task->parent_exec_id--;
 
+		set_tsk_thread_flag(task, TIF_CPUID_OVERRIDE);
 		task->task_ve = ve;
 	}
 }
-- 
2.26.2



More information about the Devel mailing list