[Devel] [PATCH RHEL7 COMMIT] kvm: x86: MMU support for EPT accessed/dirty bits

Konstantin Khorenko khorenko at virtuozzo.com
Wed May 16 12:50:45 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.50.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.47.6
------>
commit bf50228c46cb165fad213d464380cce5afdd08bf
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed May 16 12:50:45 2018 +0300

    kvm: x86: MMU support for EPT accessed/dirty bits
    
    This prepares the MMU paging code for EPT accessed and dirty bits,
    which can be enabled optionally at runtime.  Code that updates the
    accessed and dirty bits will need a pointer to the struct kvm_mmu.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Radim Krčmář <rkrcmar at redhat.com>
    
    (cherry picked from commit 86407bcb5c8320add17b57dc6af1586ff8e14bc0)
    Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
    
    =====================
    Patchset description:
    
    EPT fixes and enhancements
    
    Backport of EPT fixes from upstream for
    https://jira.sw.ru/browse/PSBM-84046
    
    Bandan Das (3):
      kvm: mmu: don't set the present bit unconditionally
      kvm: mmu: track read permission explicitly for shadow EPT page tables
      kvm: vmx: advertise support for ept execute only
    
    Junaid Shahid (2):
      kvm: x86: mmu: Use symbolic constants for EPT Violation Exit
        Qualifications
      kvm: x86: mmu: Rename EPT_VIOLATION_READ/WRITE/INSTR constants
    
    KarimAllah Ahmed (2):
      kvm: Map PFN-type memory regions as writable (if possible)
      KVM: x86: Update the exit_qualification access bits while walking an
        address
    
    Paolo Bonzini (5):
      KVM: nVMX: we support 1GB EPT pages
      kvm: x86: MMU support for EPT accessed/dirty bits
      kvm: nVMX: support EPT accessed/dirty bits
      KVM: MMU: return page fault error code from permission_fault
      KVM: nVMX: fix EPT permissions as reported in exit qualification
---
 arch/x86/kvm/paging_tmpl.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index e318e4d55433..93cc0bd7caee 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -43,6 +43,7 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
 	#define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
 	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
 	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
 	#ifdef CONFIG_X86_64
 	#define PT_MAX_FULL_LEVELS 4
 	#define CMPXCHG cmpxchg
@@ -64,6 +65,7 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
 	#define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
 	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
 	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
 	#define CMPXCHG cmpxchg
 #elif PTTYPE == PTTYPE_EPT
 	#define pt_element_t u64
@@ -78,6 +80,7 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
 	#define PT_GUEST_DIRTY_MASK 0
 	#define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
 	#define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) false
 	#define CMPXCHG cmpxchg64
 	#define PT_MAX_FULL_LEVELS 4
 #else
@@ -111,12 +114,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
 	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
 }
 
-static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
+static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+					     unsigned gpte)
 {
 	unsigned mask;
 
 	/* dirty bit is not supported, so no need to track it */
-	if (!PT_GUEST_DIRTY_MASK)
+	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
 		return;
 
 	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
@@ -171,7 +175,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 		goto no_present;
 
 	/* if accessed bit is not supported prefetch non accessed gpte */
-	if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK))
+	if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
 		goto no_present;
 
 	return false;
@@ -217,7 +221,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 	int ret;
 
 	/* dirty/accessed bits are not supported, so no need to update them */
-	if (!PT_GUEST_DIRTY_MASK)
+	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
 		return 0;
 
 	for (level = walker->max_level; level >= walker->level; --level) {
@@ -276,6 +280,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access, accessed_dirty;
 	gpa_t pte_gpa;
+	bool have_ad;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
@@ -288,6 +293,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 retry_walk:
 	walker->level = mmu->root_level;
 	pte           = mmu->get_cr3(vcpu);
+	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
 
 #if PTTYPE == 64
 	if (walker->level == PT32E_ROOT_LEVEL) {
@@ -301,7 +307,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	walker->max_level = walker->level;
 	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
-	accessed_dirty = PT_GUEST_ACCESSED_MASK;
+	accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
 	pt_access = pte_access = ACC_ALL;
 	++walker->level;
 
@@ -384,7 +390,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	walker->gfn = real_gpa >> PAGE_SHIFT;
 
 	if (!write_fault)
-		FNAME(protect_clean_gpte)(&pte_access, pte);
+		FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
 	else
 		/*
 		 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -475,7 +481,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
 	gfn = gpte_to_gfn(gpte);
 	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
-	FNAME(protect_clean_gpte)(&pte_access, gpte);
+	FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
 	if (is_error_pfn(pfn))
@@ -965,7 +971,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
 		pte_access &= FNAME(gpte_access)(vcpu, gpte);
-		FNAME(protect_clean_gpte)(&pte_access, gpte);
+		FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
 
 		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
 		      &nr_present))
@@ -1006,3 +1012,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 #undef PT_GUEST_DIRTY_MASK
 #undef PT_GUEST_DIRTY_SHIFT
 #undef PT_GUEST_ACCESSED_SHIFT
+#undef PT_HAVE_ACCESSED_DIRTY


More information about the Devel mailing list