[Devel] [PATCH 08/11] KVM: x86: Replace late check_nested_events() hack with more precise fix

Thu Jun 23 12:55:20 MSK 2022

From: Paolo Bonzini <pbonzini at redhat.com>

Add an argument to interrupt_allowed and nmi_allowed, to checking if
interrupt injection is blocked.  Use the hook to handle the case where
an interrupt arrives between check_nested_events() and the injection
logic.  Drop the retry of check_nested_events() that hack-a-fixed the
same condition.

Blocking injection is also a bit of a hack, e.g. KVM should do exiting
and non-exiting interrupt processing in a single pass, but it's a more
precise hack.  The old comment is also misleading, e.g. KVM_REQ_EVENT is
purely an optimization, setting it on every run loop (which KVM doesn't
do) should not affect functionality, only performance.

Signed-off-by: Sean Christopherson <sean.j.christopherson at intel.com>
Message-Id: <20200423022550.15113-13-sean.j.christopherson at intel.com>
[Extend to SVM, add SMI and NMI.  Even though NMI and SMI cannot come
 asynchronously right now, making the fix generic is easy and removes a
 special case. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
(cherry picked from commit c300ab9f08df9e4b9f39d53a0691e234330df124)

1. inject_pending_event seriously different in VZ7, we have no
kvm_event_needs_reinjection(vcpu) check.

2. nested_svm_nmi() call [RH7 specific] is removed from svm_nmi_allowed()

3. vmx_interrupt_allowed called from vmx.c directly too,
calls were adjusted with 2nd argument (false) provided

4. nested_exit_on_smi helper was added

https://jira.sw.ru/browse/PSBM-139278

Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
 arch/x86/include/asm/kvm_host.h |  6 +++---
 arch/x86/kvm/mmu.c              |  2 +-
 arch/x86/kvm/svm.c              | 28 +++++++++++++++++++++++----
 arch/x86/kvm/vmx.c              | 24 +++++++++++++++++------
 arch/x86/kvm/x86.c              | 34 ++++++++++-----------------------
 5 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a5f9a82ccfac..494b271a9b3c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -995,8 +995,8 @@ struct kvm_x86_ops {
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
-	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
-	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	bool (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+	bool (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
@@ -1095,7 +1095,7 @@ struct kvm_x86_ops {
 	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
 	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
-	int (*smi_allowed)(struct kvm_vcpu *vcpu);
+	bool (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
 	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
 	int (*enable_smi_window)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 206b4dd9f6d1..c1b529cb515a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4014,7 +4014,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
 		return false;
 
-	return kvm_x86_ops->interrupt_allowed(vcpu);
+	return kvm_arch_interrupt_allowed(vcpu);
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index edaff4472958..c475139dd1e5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4990,13 +4990,17 @@ static bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
-static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+static bool svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	if (svm->nested.nested_run_pending)
 		return false;
 
-	return !svm_nmi_blocked(vcpu) && nested_svm_nmi(svm); // CHECKME
+	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
+		return false;
+
+	return !svm_nmi_blocked(vcpu);
 }
 
 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -5034,12 +5038,19 @@ static bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
 		return !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
 }
 
-static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
+static bool svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	if (svm->nested.nested_run_pending)
 		return false;
 
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
+		return false;
+
 	return !svm_interrupt_blocked(vcpu);
 }
 
@@ -5783,12 +5794,21 @@ static bool svm_smi_blocked(struct kvm_vcpu *vcpu)
 	return is_smm(vcpu);
 }
 
-static int svm_smi_allowed(struct kvm_vcpu *vcpu)
+static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
+{
+	return (svm->nested.intercept & (1ULL << INTERCEPT_SMI));
+}
+
+static bool svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	if (svm->nested.nested_run_pending)
 		return false;
 
+	/* An SMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
+		return false;
+
 	return !svm_smi_blocked(vcpu);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f8a4b03e9934..f13f5616184d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5876,11 +5876,15 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 	}
 }
 
-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
+static bool vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	if (to_vmx(vcpu)->nested.nested_run_pending)
 		return 0;
 
+	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
+		return false;
+
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
 		return 0;
 
@@ -5889,11 +5893,19 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 		   | GUEST_INTR_STATE_NMI));
 }
 
-static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	if (to_vmx(vcpu)->nested.nested_run_pending)
 		return false;
 
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
+	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+		return false;
+
+	/* and yes, this is correct %) */
 	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
 		return true;
 
@@ -6750,7 +6762,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 	intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
 
 	while (vmx->emulation_required && count-- != 0) {
-		if (intr_window_requested && vmx_interrupt_allowed(vcpu))
+		if (intr_window_requested && vmx_interrupt_allowed(vcpu, false))
 			return handle_interrupt_window(&vmx->vcpu);
 
 		if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
@@ -8806,7 +8818,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
 	    !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
 					get_vmcs12(vcpu))))) {
-		if (vmx_interrupt_allowed(vcpu)) {
+		if (vmx_interrupt_allowed(vcpu, false)) {
 			vmx->soft_vnmi_blocked = 0;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
@@ -9017,7 +9029,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 	 * is run without virtual interrupt delivery.
 	 */
 	if (!kvm_event_needs_reinjection(vcpu) &&
-	    vmx_interrupt_allowed(vcpu)) {
+	    vmx_interrupt_allowed(vcpu, false)) {
 		kvm_queue_interrupt(vcpu, max_irr, false);
 		vmx_inject_irq(vcpu);
 	}
@@ -12390,7 +12402,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 			~FEATURE_CONTROL_LMCE;
 }
 
-static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
+static bool vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	/* we need a nested vmexit to enter SMM, postpone if run is pending */
 	if (to_vmx(vcpu)->nested.nested_run_pending)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4207dbeb3768..de2d845810ca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6674,32 +6674,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
 		}
 
 		kvm_x86_ops->queue_exception(vcpu);
-	} else if (vcpu->arch.smi_pending && kvm_x86_ops->smi_allowed(vcpu)) {
+	} else if (vcpu->arch.smi_pending && kvm_x86_ops->smi_allowed(vcpu, true)) {
 		vcpu->arch.smi_pending = false;
 		enter_smm(vcpu);
-	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+	} else if (vcpu->arch.nmi_pending &&
+		   kvm_x86_ops->nmi_allowed(vcpu, true)) {
 		--vcpu->arch.nmi_pending;
 		vcpu->arch.nmi_injected = true;
 		kvm_x86_ops->set_nmi(vcpu);
-	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
-		/*
-		 * TODO/FIXME: We are calling check_nested_events again
-		 * here to avoid a race condition. We should really be
-		 * setting KVM_REQ_EVENT only on certain events
-		 * and not unconditionally.
-		 * See https://lkml.org/lkml/2014/7/2/60 for discussion
-		 * about this proposal and current concerns
-		 */
-		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
-			r = kvm_x86_ops->check_nested_events(vcpu);
-			if (r != 0)
-				return r;
-		}
-		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
-					    false);
-			kvm_x86_ops->set_irq(vcpu);
-		}
+	} else if (kvm_cpu_has_injectable_intr(vcpu) &&
+		   kvm_x86_ops->interrupt_allowed(vcpu, true)) {
+		kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
+		kvm_x86_ops->set_irq(vcpu);
 	}
 
 	return 0;
@@ -8812,11 +8798,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 
 	if (test_bit(KVM_REQ_NMI, &vcpu->requests) ||
 	    (vcpu->arch.nmi_pending &&
-	     kvm_x86_ops->nmi_allowed(vcpu)))
+	     kvm_x86_ops->nmi_allowed(vcpu, false)))
 		return true;
 
 	if (test_bit(KVM_REQ_SMI, &vcpu->requests) ||
-	    (vcpu->arch.smi_pending && kvm_x86_ops->smi_allowed(vcpu)))
+	    (vcpu->arch.smi_pending && kvm_x86_ops->smi_allowed(vcpu, false)))
 		return true;
 
 	if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8842,7 +8828,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	return kvm_x86_ops->interrupt_allowed(vcpu);
+	return kvm_x86_ops->interrupt_allowed(vcpu, false);
 }
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
-- 
2.36.1