[Devel] [PATCH RHEL7] KVM: X86: Implement "send IPI" hypercall
Valeriy Vdovin
valeriy.vdovin at virtuozzo.com
Thu Aug 6 20:17:56 MSK 2020
From: Wanpeng Li <wanpengli at tencent.com>
Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster
mode. Intel guest can enter x2apic cluster mode when interrupt remmaping
is enabled in qemu, however, latest AMD EPYC still just supports xapic
mode which can get great improvement by Exit-less IPIs. This patchset
lets a guest send multicast IPIs, with at most 128 destinations per
hypercall in 64-bit mode and 64 vCPUs per hypercall in 32-bit mode.
Hardware: Xeon Skylake 2.5GHz, 2 sockets, 40 cores, 80 threads, the VM
is 80 vCPUs, IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141):
x2apic cluster mode, vanilla
Dry-run: 0, 2392199 ns
Self-IPI: 6907514, 15027589 ns
Normal IPI: 223910476, 251301666 ns
Broadcast IPI: 0, 9282161150 ns
Broadcast lock: 0, 8812934104 ns
x2apic cluster mode, pv-ipi
Dry-run: 0, 2449341 ns
Self-IPI: 6720360, 15028732 ns
Normal IPI: 228643307, 255708477 ns
Broadcast IPI: 0, 7572293590 ns => 22% performance boost
Broadcast lock: 0, 8316124651 ns
x2apic physical mode, vanilla
Dry-run: 0, 3135933 ns
Self-IPI: 8572670, 17901757 ns
Normal IPI: 226444334, 255421709 ns
Broadcast IPI: 0, 19845070887 ns
Broadcast lock: 0, 19827383656 ns
x2apic physical mode, pv-ipi
Dry-run: 0, 2446381 ns
Self-IPI: 6788217, 15021056 ns
Normal IPI: 219454441, 249583458 ns
Broadcast IPI: 0, 7806540019 ns => 154% performance boost
Broadcast lock: 0, 9143618799 ns
Cc: Paolo Bonzini <pbonzini at redhat.com>
Cc: Radim Krčmář <rkrcmar at redhat.com>
Cc: Vitaly Kuznetsov <vkuznets at redhat.com>
Signed-off-by: Wanpeng Li <wanpengli at tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
(cherry-picked from commit 4180bf1b655a791a0a6ef93a2ffffc762722c782)
https://jira.sw.ru/browse/PSBM-104805
Signed-off-by: Valeriy.Vdovin <valeriy.vdovin at virtuozzo.com>
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
Documentation/virtual/kvm/cpuid.txt | 4 ++++
Documentation/virtual/kvm/hypercalls.txt | 20 ++++++++++++++++
arch/x86/include/asm/kvm_host.h | 4 ++++
arch/x86/include/uapi/asm/kvm_para.h | 3 ++-
arch/x86/kvm/cpuid.c | 3 ++-
arch/x86/kvm/lapic.c | 40 ++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 3 +++
include/uapi/linux/kvm_para.h | 2 ++
8 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 3c65feb..41217d6 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|| || before enabling paravirtualized
|| || spinlock support.
------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
+ || || before using paravirtualized
+ || || send IPIs.
+------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index 6b6dc98..8fa78a1 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -113,3 +113,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Send IPIs to multiple vCPUs.
+
+a0: lower part of the bitmap of destination APIC IDs
+a1: higher part of the bitmap of destination APIC IDs
+a2: the lowest APIC ID in bitmap
+a3: APIC ICR
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+128 destinations per hypercall in 64-bit mode and 64 vCPUs per
+hypercall in 32-bit mode. The destinations are represented by a
+bitmap contained in the first two arguments (a0 and a1). Bit 0 of
+a0 corresponds to the APIC ID in the third argument (a2), bit 1
+corresponds to the APIC ID a2+1, and so on.
+
+Returns the number of CPUs to which the IPIs were delivered successfully.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index daf419e..0942f29 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1433,6 +1433,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address);
u64 kvm_get_arch_capabilities(void);
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, int min,
+ unsigned long icr, int op_64_bit);
+
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index f1e4876..22b177ba 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -24,7 +24,8 @@
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
-#define KVM_FEATURE_POLL_CONTROL 12
+#define KVM_FEATURE_PV_SEND_IPI 11
+#define KVM_FEATURE_POLL_CONTROL 12
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index fe8b927..96a6bac 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -600,7 +600,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
- (1 << KVM_FEATURE_POLL_CONTROL);
+ (1 << KVM_FEATURE_POLL_CONTROL) |
+ (1 << KVM_FEATURE_PV_SEND_IPI);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1bd31ee..6dc47c6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -542,6 +542,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map);
}
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, int min,
+ unsigned long icr, int op_64_bit)
+{
+ int i;
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu;
+ struct kvm_lapic_irq irq = {0};
+ int cluster_size = op_64_bit ? 64 : 32;
+ int count = 0;
+
+ irq.vector = icr & APIC_VECTOR_MASK;
+ irq.delivery_mode = icr & APIC_MODE_MASK;
+ irq.level = (icr & APIC_INT_ASSERT) != 0;
+ irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+ if (icr & APIC_DEST_MASK)
+ return -KVM_EINVAL;
+ if (icr & APIC_SHORT_MASK)
+ return -KVM_EINVAL;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ /* Bits above cluster_size are masked in the caller. */
+ for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+
+ min += cluster_size;
+ for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+
+ rcu_read_unlock();
+ return count;
+}
+
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f38b92f..4be882f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6538,6 +6538,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_CLOCK_PAIRING:
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
+ case KVM_HC_SEND_IPI:
+ ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+ break;
default:
ret = -KVM_ENOSYS;
break;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index fed506a..633a77f 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -12,6 +12,7 @@
/* Return values for hypercalls */
#define KVM_ENOSYS 1000
#define KVM_EFAULT EFAULT
+#define KVM_EINVAL EINVAL
#define KVM_E2BIG E2BIG
#define KVM_EPERM EPERM
#define KVM_EOPNOTSUPP 95
@@ -25,6 +26,7 @@
#define KVM_HC_MIPS_EXIT_VM 7
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
#define KVM_HC_CLOCK_PAIRING 9
+#define KVM_HC_SEND_IPI 10
/*
* hypercalls use architecture specific
--
1.8.3.1
More information about the Devel
mailing list