[Devel] [PATCH RHEL9 COMMIT] kvm: move actual VM memory shrink out of kvm_lock

Konstantin Khorenko khorenko at virtuozzo.com
Wed Oct 20 11:40:38 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.12
------>
commit 2b66d27732f04aaf9df6722c2914ff76446dea94
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date:   Wed Oct 20 11:40:38 2021 +0300

    kvm: move actual VM memory shrink out of kvm_lock
    
    We face a situation when a node with many cpu cores (88) and a lot
    of RAM (1Tb) and many VMs (300) has almost all cpu cores busy in
    mmu_shrink_scan():
    all but one just wait for kvm_lock,
    the last one is busy with actual memory shrink for a VM.
    
    Let's allow parallel VM shrinking:
    - just inc the VM usage count, so it's not destroyed under us
    - drop the kvm_lock, so over shrinkers are free to go
    - and shrink our VM without holding the kvm_lock
    - dec the VM usage count after we finish with shrinking
    
    As we shrink only single VM, we don't need the protect the vm_list
    all the way during shrink.
    
    https://jira.sw.ru/browse/PSBM-95077
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    (cherry-picked from vz7 commit bbacd5e44b5b ("kvm: move actual VM memory
    shrink out of kvm_lock"))
    
    +++
    kvm: fix race between mmu_shrink_scan() and VM destroy
    
    Honor the race between VM memory shrink and VM destroy,
    just skip dying VMs, shrink alive ones.
    
    mFixes: 05a623470d4b ("kvm: move actual VM memory shrink out of kvm_lock")
    https://jira.sw.ru/browse/PSBM-95077
    
    Reported-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    (cherry-picked from vz7 commit f0eae7e0c99f ("kvm: fix race between
    mmu_shrink_scan() and VM destroy"))
    
    +++
    kvm: unlock kvm_lock in case no VMs to shrink
    
    If vm_list is empty kvm_lock is acquired and never released in
    mmu_shrink_scan(), fix this.
    
    https://jira.sw.ru/browse/PSBM-100474
    
    mFixes: bbacd5e44b5b ("kvm: move actual VM memory shrink out of
    kvm_lock")
    https://jira.sw.ru/browse/PSBM-96262
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    (cherry-picked from vz7 commit c3ad21d01436 ("kvm: unlock kvm_lock in
    case no VMs to shrink"))
    
    https://jira.sw.ru/browse/PSBM-127849
    Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
    
    Rebased to vz9:
     - dropped LIST_HEAD(invalid_list) as it is not used anymore
    
    (cherry picked from vz8 commit 7334a675287a07644d2bcf95df26bfc118b0394e)
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 arch/x86/kvm/mmu/mmu.c   | 48 +++++++++++++++++++++++++++++++++---------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      |  6 ++++++
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index aa68ae5e4318..6c84f2a5c399 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5837,12 +5837,11 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 	struct kvm *kvm, *tmp;
 	int nr_to_scan = sc->nr_to_scan;
 	unsigned long freed = 0;
+	int idx, found = 0;
 
 	mutex_lock(&kvm_lock);
 
 	list_for_each_entry_safe(kvm, tmp, &vm_list, vm_list) {
-		int idx;
-		LIST_HEAD(invalid_list);
 
 		/*
 		 * Never scan more than sc->nr_to_scan VM instances.
@@ -5872,25 +5871,44 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		    !kvm_has_zapped_obsolete_pages(kvm))
 			continue;
 
-		idx = srcu_read_lock(&kvm->srcu);
-		write_lock(&kvm->mmu_lock);
+		/*
+		 * If try_get fails, we race with last kvm_put_kvm(),
+		 * so skip the VM, it will die soon anyway.
+		 */
+		if (!kvm_try_get_kvm(kvm))
+			continue;
+		/*
+		 * We found VM to shrink, and as we shrink only one VM per
+		 * function call, break the cycle and do actual shrink out of
+		 * the cycle.
+		 */
+		found = 1;
+		break;
+	}
 
-		if (kvm_has_zapped_obsolete_pages(kvm)) {
-			kvm_mmu_commit_zap_page(kvm,
-			      &kvm->arch.zapped_obsolete_pages);
-			goto unlock;
-		}
+	mutex_unlock(&kvm_lock);
 
-		freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
+	/* If not found a VM to shrink, just exit. */
+	if (!found)
+		return freed;
 
-unlock:
-		write_unlock(&kvm->mmu_lock);
-		srcu_read_unlock(&kvm->srcu, idx);
+	idx = srcu_read_lock(&kvm->srcu);
+	write_lock(&kvm->mmu_lock);
 
-		break;
+	if (kvm_has_zapped_obsolete_pages(kvm)) {
+		kvm_mmu_commit_zap_page(kvm,
+					&kvm->arch.zapped_obsolete_pages);
+		goto unlock;
 	}
 
-	mutex_unlock(&kvm_lock);
+	freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
+
+unlock:
+	write_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	kvm_put_kvm(kvm);
+
 	return freed;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b9103940859a..915472bca8fc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -720,6 +720,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
+int kvm_try_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
 bool file_is_kvm(struct file *file);
 void kvm_put_kvm_no_destroy(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b50dbe269f4b..c128df397282 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1134,6 +1134,12 @@ void kvm_get_kvm(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_get_kvm);
 
+int kvm_try_get_kvm(struct kvm *kvm)
+{
+	return refcount_inc_not_zero(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_try_get_kvm);
+
 void kvm_put_kvm(struct kvm *kvm)
 {
 	if (refcount_dec_and_test(&kvm->users_count))


More information about the Devel mailing list