[Devel] [PATCH vz9 18/27] x86: make ARCH_[SET|GET]_CPUID friends with /proc/vz/cpuid_override

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Wed Oct 6 11:57:32 MSK 2021


From: Andrey Ryabinin <aryabinin at virtuozzo.com>

We are using cpuid faults to emulate cpuid in containers. This
conflicts with arch_prctl(ARCH_SET_CPUID, 0) which allows to enable
cpuid faulting so that cpuid instruction causes SIGSEGV.

Add TIF_CPUID_OVERRIDE thread info flag which is added on all
!ve0 tasks. And check this flag along with TIF_NOCPUID to
decide whether we need to enable/disable cpuid faults or not.

https://jira.sw.ru/browse/PSBM-121823

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>

(cherry-picked from vz8 commit f7458a78593c ("x86: make ARCH_[SET|GET]_CPUID
friends with /proc/vz/cpuid_override"))

Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 arch/x86/include/asm/thread_info.h |  4 +++-
 arch/x86/kernel/process.c          | 13 +++++++++----
 arch/x86/kernel/traps.c            |  3 +++
 kernel/ve/ve.c                     |  3 +++
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index de406d93b515..850827fc0123 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,6 +95,7 @@ struct thread_info {
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
+#define TIF_CPUID_OVERRIDE	26	/* CPUID emulation enabled */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 
@@ -117,13 +118,14 @@ struct thread_info {
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
+#define _TIF_CPUID_OVERRIDE	(1 << TIF_CPUID_OVERRIDE)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
 	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
-	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD | _TIF_CPUID_OVERRIDE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ac178b578637..14fd45ae4a25 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -290,7 +290,8 @@ static void set_cpuid_faulting(bool on)
 static void disable_cpuid(void)
 {
 	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+	if (!test_and_set_thread_flag(TIF_NOCPUID) ||
+		test_thread_flag(TIF_CPUID_OVERRIDE)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOCPUID in the current running context.
@@ -303,7 +304,8 @@ static void disable_cpuid(void)
 static void enable_cpuid(void)
 {
 	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+	if (test_and_clear_thread_flag(TIF_NOCPUID) &&
+		!test_thread_flag(TIF_CPUID_OVERRIDE)) {
 		/*
 		 * Must flip the CPU state synchronously with
 		 * TIF_NOCPUID in the current running context.
@@ -636,6 +638,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	unsigned long tifp, tifn;
+	bool prev_cpuid, next_cpuid;
 
 	tifn = READ_ONCE(task_thread_info(next_p)->flags);
 	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
@@ -658,8 +661,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 	if ((tifp ^ tifn) & _TIF_NOTSC)
 		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
 
-	if ((tifp ^ tifn) & _TIF_NOCPUID)
-		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+	prev_cpuid = (tifp & _TIF_NOCPUID) || (tifp & _TIF_CPUID_OVERRIDE);
+	next_cpuid = (tifn & _TIF_NOCPUID) || (tifn & _TIF_CPUID_OVERRIDE);
+	if (prev_cpuid != next_cpuid)
+		set_cpuid_faulting(next_cpuid);
 
 	if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
 		__speculation_ctrl_update(tifp, tifn);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b56deed1702c..20882bb12518 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ static int check_cpuid_fault(struct pt_regs *regs, long error_code)
 	if (error_code != 0)
 		return 0;
 
+	if (test_thread_flag(TIF_NOCPUID))
+		return 0;
+
 	addr = convert_ip_to_linear(current, regs);
 	if (get_user(opcode, (unsigned short __user *)addr))
 		return 0;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 4caa8a1f27ca..9dabec194988 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -875,6 +875,9 @@ static void ve_attach(struct cgroup_taskset *tset)
 		task->parent_exec_id--;
 
 		ve_set_task_start_time(ve, task);
+
+		set_tsk_thread_flag(task, TIF_CPUID_OVERRIDE);
+
 		task->task_ve = ve;
 	}
 }
-- 
2.30.2



More information about the Devel mailing list