[Devel] [PATCH RHEL7 COMMIT] ms/KVM: x86: drop TSC offsetting kvm_x86_ops to fix KVM_GET/SET_CLOCK
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Nov 16 04:46:05 PST 2016
The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.19.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.19.9
------>
commit 4e92c1145557efd8aeb0a425b1809e65327de540
Author: Paolo Bonzini <pbonzini at redhat.com>
Date: Wed Nov 16 16:46:04 2016 +0400
ms/KVM: x86: drop TSC offsetting kvm_x86_ops to fix KVM_GET/SET_CLOCK
Since commit a545ab6a0085 ("kvm: x86: add tsc_offset field to struct
kvm_vcpu_arch", 2016-09-07) the offset between host and L1 TSC is
cached and need not be fished out of the VMCS or VMCB. This means
that we can implement adjust_tsc_offset_guest and read_l1_tsc
entirely in generic code. The simplification is particularly
significant for VMX code, where vmx->nested.vmcs01_tsc_offset
was duplicating what is now in vcpu->arch.tsc_offset. Therefore
the vmcs01_tsc_offset can be dropped completely.
More importantly, this fixes KVM_GET_CLOCK/KVM_SET_CLOCK
which, after commit 108b249c453d ("KVM: x86: introduce get_kvmclock_ns",
2016-09-01) called read_l1_tsc while the VMCS was not loaded.
It thus returned bogus values on Intel CPUs.
Fixes: 108b249c453dd7132599ab6dc7e435a7036c193f
Reported-by: Roman Kagan <rkagan at virtuozzo.com>
Reviewed-by: Radim KrÄmáŠ<rkrcmar at redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
(cherry-picked from commit ea26e4ec08d4727e3a9e48a6b74695861effcbd9)
modifications to hyperv.c added
fix #PSBM-54338
Signed-off-by: Denis Plotnikov <dplotnikov at virtuozzo.com>
Signed-off-by: Roman Kagan <rkagan at virtuozzo.com>
---
arch/x86/include/asm/kvm_host.h | 3 ---
arch/x86/kvm/hyperv.c | 2 +-
arch/x86/kvm/svm.c | 23 -----------------------
arch/x86/kvm/vmx.c | 39 +++------------------------------------
arch/x86/kvm/x86.c | 8 ++++----
5 files changed, 8 insertions(+), 67 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa86551..2348a55 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -821,7 +821,6 @@ struct kvm_x86_ops {
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
bool (*invpcid_supported)(void);
- void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -831,8 +830,6 @@ struct kvm_x86_ops {
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
-
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
int (*check_intercept)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b5ebcaf..9a5df5c 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -398,7 +398,7 @@ static u64 get_time_ref_counter(struct kvm *kvm)
return div_u64(get_kvmclock_ns(kvm), 100);
vcpu = kvm_get_vcpu(kvm, 0);
- tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
+ tsc = kvm_read_l1_tsc(vcpu, native_read_tsc());
return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
+ hv->tsc_ref.tsc_offset;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9021e79..3e51a1f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -975,21 +975,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- svm->vmcb->control.tsc_offset += adjustment;
- if (is_guest_mode(vcpu))
- svm->nested.hsave->control.tsc_offset += adjustment;
- else
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- svm->vmcb->control.tsc_offset - adjustment,
- svm->vmcb->control.tsc_offset);
-
- mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-}
-
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -2961,12 +2946,6 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
-static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
- struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
- return vmcb->control.tsc_offset + host_tsc;
-}
-
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4341,8 +4320,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
.write_tsc_offset = svm_write_tsc_offset,
- .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
- .read_l1_tsc = svm_read_l1_tsc,
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 45007f1..6497232 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -396,7 +396,6 @@ struct nested_vmx {
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct list_head vmcs02_pool;
int vmcs02_num;
- u64 vmcs01_tsc_offset;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
@@ -2189,20 +2188,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
}
/*
- * Like guest_read_tsc, but always returns L1's notion of the timestamp
- * counter, even if a nested guest (L2) is currently running.
- */
-static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
- u64 tsc_offset;
-
- tsc_offset = is_guest_mode(vcpu) ?
- to_vmx(vcpu)->nested.vmcs01_tsc_offset :
- vmcs_read64(TSC_OFFSET);
- return host_tsc + tsc_offset;
-}
-
-/*
* writes 'offset' into guest's timestamp counter offset register
*/
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -2215,7 +2200,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* to the newly set TSC to get L2's TSC.
*/
struct vmcs12 *vmcs12;
- to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
/* recalculate vmcs02.TSC_OFFSET: */
vmcs12 = get_vmcs12(vcpu);
vmcs_write64(TSC_OFFSET, offset +
@@ -2228,19 +2212,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
}
}
-static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
- u64 offset = vmcs_read64(TSC_OFFSET);
-
- vmcs_write64(TSC_OFFSET, offset + adjustment);
- if (is_guest_mode(vcpu)) {
- /* Even when running L2, the adjustment needs to apply to L1 */
- to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
- } else
- trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
- offset + adjustment);
-}
-
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
@@ -8924,9 +8895,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
- vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+ vcpu->arch.tsc_offset + vmcs12->tsc_offset);
else
- vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset + vmcs12->tsc_offset);
if (enable_vpid) {
/*
@@ -9138,8 +9109,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enter_guest_mode(vcpu);
- vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
-
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -9651,7 +9620,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update TSC_OFFSET if TSC was changed while L2 ran */
- vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;
@@ -9851,8 +9820,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.write_tsc_offset = vmx_write_tsc_offset,
- .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
- .read_l1_tsc = vmx_read_l1_tsc,
.set_tdp_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ec55b1..ea058a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1381,7 +1381,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
+ return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
@@ -1519,7 +1519,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
- kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+ kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
}
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -1527,7 +1527,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
WARN_ON(adjustment < 0);
adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
- kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+ adjust_tsc_offset_guest(vcpu, adjustment);
}
#ifdef CONFIG_X86_64
@@ -1709,7 +1709,7 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
u8 flags;
if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
- u64 tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
+ u64 tsc = kvm_read_l1_tsc(vcpu, native_read_tsc());
__pvclock_read_cycles(&vcpu->arch.hv_clock, &ns, &flags, tsc);
} else {
ns = ktime_to_ns(ktime_get_boottime()) + ka->kvmclock_offset;
More information about the Devel
mailing list