[Devel] [PATCH RHEL7 COMMIT] kvm: mmu: track read permission explicitly for shadow EPT page tables

Konstantin Khorenko khorenko at virtuozzo.com
Wed May 16 12:50:42 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.50.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.47.6
------>
commit 5235a2f708dfdd1611a700af0432ec626a38db1e
Author: Bandan Das <bsd at redhat.com>
Date:   Wed May 16 12:50:42 2018 +0300

    kvm: mmu: track read permission explicitly for shadow EPT page tables
    
    To support execute only mappings on behalf of L1 hypervisors,
    reuse ACC_USER_MASK to signify if the L1 hypervisor has the R bit
    set.
    
    For the nested EPT case, we assumed that the U bit was always set
    since there was no equivalent in EPT page tables.  Strictly
    speaking, this was not necessary because handle_ept_violation
    never set PFERR_USER_MASK in the error code (uf=0 in the
    parlance of update_permission_bitmask).  We now have to set
    both U and UF correctly, respectively in FNAME(gpte_access)
    and in handle_ept_violation.
    
    Also in handle_ept_violation bit 3 of the exit qualification is
    not enough to detect a present PTE; all three bits 3-5 have to
    be checked.
    
    Signed-off-by: Bandan Das <bsd at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    
    (cherry picked from commit d95c55687e11febe3ab1aacfe82b58b1822c52c4)
    Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
    
    =====================
    Patchset description:
    
    EPT fixes and enhancements
    
    Backport of EPT fixes from upstream for
    https://jira.sw.ru/browse/PSBM-84046
    
    Bandan Das (3):
      kvm: mmu: don't set the present bit unconditionally
      kvm: mmu: track read permission explicitly for shadow EPT page tables
      kvm: vmx: advertise support for ept execute only
    
    Junaid Shahid (2):
      kvm: x86: mmu: Use symbolic constants for EPT Violation Exit
        Qualifications
      kvm: x86: mmu: Rename EPT_VIOLATION_READ/WRITE/INSTR constants
    
    KarimAllah Ahmed (2):
      kvm: Map PFN-type memory regions as writable (if possible)
      KVM: x86: Update the exit_qualification access bits while walking an
        address
    
    Paolo Bonzini (5):
      KVM: nVMX: we support 1GB EPT pages
      kvm: x86: MMU support for EPT accessed/dirty bits
      kvm: nVMX: support EPT accessed/dirty bits
      KVM: MMU: return page fault error code from permission_fault
      KVM: nVMX: fix EPT permissions as reported in exit qualification
---
 arch/x86/kvm/mmu.c         | 10 +++++++---
 arch/x86/kvm/paging_tmpl.h |  8 +++++++-
 arch/x86/kvm/vmx.c         | 15 +++++++++------
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7fa10bf2e2b0..00cafeec60ca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2513,6 +2513,12 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
 		return 0;
 
+	/*
+	 * For the EPT case, shadow_present_mask is 0 if hardware
+	 * supports exec-only page table entries.  In that case,
+	 * ACC_USER_MASK and shadow_user_mask are used to represent
+	 * read access.  See FNAME(gpte_access) in paging_tmpl.h.
+	 */
 	spte |= shadow_present_mask;
 	if (!speculative)
 		spte |= shadow_accessed_mask;
@@ -3952,9 +3958,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				 *   clearer.
 				 */
 				smap = cr4_smap && u && !uf && !ff;
-			} else
-				/* Not really needed: no U/S accesses on ept  */
-				u = 1;
+			}
 
 			fault = (ff && !x) || (uf && !u) || (wf && !w) ||
 				(smapf && smap);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc6a43aadea0..e318e4d55433 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,13 +181,19 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
 static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		ACC_USER_MASK;
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
 #else
 	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
 	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b64a96f401a..8d07bd9b6a92 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5947,12 +5947,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	trace_kvm_page_fault(gpa, exit_qualification);
 
-	/* It is a write fault? */
-	error_code = exit_qualification & PFERR_WRITE_MASK;
+	/* it is a read fault? */
+	error_code = (exit_qualification << 2) & PFERR_USER_MASK;
+	/* it is a write fault? */
+	error_code |= exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
 	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
+	error_code |= (exit_qualification & 0x38) != 0;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -6301,11 +6303,12 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_msr_write_x2apic(0x83f);
 
 	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
+		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
 			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-			0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_READABLE_MASK);
+			0ull, VMX_EPT_EXECUTABLE_MASK,
+			cpu_has_vmx_ept_execute_only() ?
+				      0ull : VMX_EPT_READABLE_MASK);
 		ept_set_mmio_spte_mask();
 		kvm_enable_tdp();
 	} else


More information about the Devel mailing list