[Devel] [PATCH vz7.19 2/2] KVM:x86: avoid VMCS access from non-vCPU context

Roman Kagan rkagan at virtuozzo.com
Mon Oct 31 10:32:27 PDT 2016


In commit 108b249c453dd7132599ab6dc7e435a7036c193f, "KVM: x86: introduce
get_kvmclock_ns", a function to obtain the time as would be done by the
guest via kvmclock was introduced and used throughout the code.

The problem is that it reads the guest TSC value via kvm_read_l1_tsc
which can only be called in vCPU context as it reads VMCS(TSC_OFFSET)
directly (on Intel CPUs).  Therefore, when called in kvm_arch_vm_ioctl
for KVM_[GS]ET_CLOCK, it returns bogus values, breaking save/restore.

Make this function take vCPU pointer instead and use it only in vCPU
context (also adjust Hyper-V's get_time_ref_counter similarly); for
ioctl(KVM_[GS]ET_CLOCK) manipulate kvmclock_offset directly under the
assumption that the user of those ioctls is prepared to deal with time
warps or calls them when the vCPUs aren't running (i.e. save/restore).

Fixes: 108b249c453dd7132599ab6dc7e435a7036c193f
Cc: Paolo Bonzini <pbonzini at redhat.com>
Upstream-Message-ID: <1477933936-3681-1-git-send-email-rkagan at virtuozzo.com>
Signed-off-by: Roman Kagan <rkagan at virtuozzo.com>
---
 arch/x86/kvm/hyperv.c | 16 +++++++---------
 arch/x86/kvm/x86.c    | 21 ++++++++-------------
 arch/x86/kvm/x86.h    |  2 +-
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b5ebcaf..9f64f32 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -384,10 +384,9 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
 	}
 }
 
-static u64 get_time_ref_counter(struct kvm *kvm)
+static u64 get_time_ref_counter(struct kvm_vcpu *vcpu)
 {
-	struct kvm_hv *hv = &kvm->arch.hyperv;
-	struct kvm_vcpu *vcpu;
+	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
 	u64 tsc;
 
 	/*
@@ -395,9 +394,8 @@ static u64 get_time_ref_counter(struct kvm *kvm)
 	 * stable, fall back to get_kvmclock_ns.
 	 */
 	if (!hv->tsc_ref.tsc_sequence)
-		return div_u64(get_kvmclock_ns(kvm), 100);
+		return div_u64(get_kvmclock_ns(vcpu), 100);
 
-	vcpu = kvm_get_vcpu(kvm, 0);
 	tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
 	return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
 		+ hv->tsc_ref.tsc_offset;
@@ -451,7 +449,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 	u64 time_now;
 	ktime_t ktime_now;
 
-	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
+	time_now = get_time_ref_counter(stimer_to_vcpu(stimer));
 	ktime_now = ktime_get();
 
 	if (stimer->config & HV_STIMER_PERIODIC) {
@@ -591,7 +589,7 @@ static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
 			(struct hv_timer_message_payload *)&msg->u.payload;
 
 	payload->expiration_time = stimer->exp_time;
-	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
+	payload->delivery_time = get_time_ref_counter(vcpu);
 	return synic_deliver_msg(vcpu_to_synic(vcpu),
 				 HV_STIMER_SINT(stimer->config), msg);
 }
@@ -626,7 +624,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 
 				if (exp_time) {
 					time_now =
-						get_time_ref_counter(vcpu->kvm);
+						get_time_ref_counter(vcpu);
 					if (time_now >= exp_time)
 						stimer_expiration(stimer);
 				}
@@ -1051,7 +1049,7 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = hv->hv_hypercall;
 		break;
 	case HV_X64_MSR_TIME_REF_COUNT:
-		data = get_time_ref_counter(kvm);
+		data = get_time_ref_counter(vcpu);
 		break;
 	case HV_X64_MSR_REFERENCE_TSC:
 		data = hv->hv_tsc_page;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b1618b5..84c2981 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1595,10 +1595,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+static u64 __get_kvmclock_ns(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
-	struct kvm_arch *ka = &kvm->arch;
+	struct kvm_arch *ka = &vcpu->kvm->arch;
 	u64 ns;
 	u8 flags;
 
@@ -1612,13 +1611,13 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
 	return ns;
 }
 
-u64 get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm_vcpu *vcpu)
 {
 	unsigned long flags;
 	s64 ns;
 
 	local_irq_save(flags);
-	ns = __get_kvmclock_ns(kvm);
+	ns = __get_kvmclock_ns(vcpu);
 	local_irq_restore(flags);
 
 	return ns;
@@ -3971,7 +3970,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_SET_CLOCK: {
 		struct kvm_clock_data user_ns;
-		u64 now_ns;
 
 		r = -EFAULT;
 		if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
@@ -3982,19 +3980,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 
 		r = 0;
-		local_irq_disable();
-		now_ns = __get_kvmclock_ns(kvm);
-		kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-		local_irq_enable();
+		kvm->arch.kvmclock_offset = user_ns.clock -
+			ktime_to_ns(ktime_get_boottime());
 		kvm_gen_update_masterclock(kvm);
 		break;
 	}
 	case KVM_GET_CLOCK: {
 		struct kvm_clock_data user_ns;
-		u64 now_ns;
 
-		now_ns = get_kvmclock_ns(kvm);
-		user_ns.clock = now_ns;
+		user_ns.clock = ktime_to_ns(ktime_get_boottime()) +
+			kvm->arch.kvmclock_offset;
 		user_ns.flags = 0;
 		memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index be5ebaa..2161c77 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -150,7 +150,7 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
-u64 get_kvmclock_ns(struct kvm *kvm);
+u64 get_kvmclock_ns(struct kvm_vcpu *vcpu);
 
 int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
-- 
2.7.4



More information about the Devel mailing list