[Devel] [PATCH RHEL8 COMMIT] kvm: move actual VM memory shrink out of kvm_lock
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jun 10 16:35:03 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.39
------>
commit 8944494eb394b86b538cbd6d7a9038ddd2006f0c
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date: Tue Jun 8 20:58:06 2021 +0300
kvm: move actual VM memory shrink out of kvm_lock
We face a situation when a node with many cpu cores (88) and a lot
of RAM (1Tb) and many VMs (300) has almost all cpu cores busy in
mmu_shrink_scan():
all but one just wait for kvm_lock,
the last one is busy with actual memory shrink for a VM.
Let's allow parallel VM shrinking:
- just inc the VM usage count, so it's not destroyed under us
- drop the kvm_lock, so over shrinkers are free to go
- and shrink our VM without holding the kvm_lock
- dec the VM usage count after we finish with shrinking
As we shrink only single VM, we don't need the protect the vm_list
all the way during shrink.
https://jira.sw.ru/browse/PSBM-95077
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry-picked from vz7 commit bbacd5e44b5b ("kvm: move actual VM memory
shrink out of kvm_lock"))
+++
kvm: fix race between mmu_shrink_scan() and VM destroy
Honor the race between VM memory shrink and VM destroy,
just skip dying VMs, shrink alive ones.
mFixes: 05a623470d4b ("kvm: move actual VM memory shrink out of kvm_lock")
https://jira.sw.ru/browse/PSBM-95077
Reported-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry-picked from vz7 commit f0eae7e0c99f ("kvm: fix race between
mmu_shrink_scan() and VM destroy"))
+++
kvm: unlock kvm_lock in case no VMs to shrink
If vm_list is empty kvm_lock is acquired and never released in
mmu_shrink_scan(), fix this.
https://jira.sw.ru/browse/PSBM-100474
mFixes: bbacd5e44b5b ("kvm: move actual VM memory shrink out of
kvm_lock")
https://jira.sw.ru/browse/PSBM-96262
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
(cherry-picked from vz7 commit c3ad21d01436 ("kvm: unlock kvm_lock in
case no VMs to shrink"))
https://jira.sw.ru/browse/PSBM-127849
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
---
arch/x86/kvm/mmu/mmu.c | 56 ++++++++++++++++++++++++++++++++----------------
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 6 ++++++
3 files changed, 45 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9e58984fdfaf..474f441711ea 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6077,12 +6077,12 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
struct kvm *kvm, *tmp;
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
+ int idx, found = 0;
+ LIST_HEAD(invalid_list);
mutex_lock(&kvm_lock);
list_for_each_entry_safe(kvm, tmp, &vm_list, vm_list) {
- int idx;
- LIST_HEAD(invalid_list);
/*
* Never scan more than sc->nr_to_scan VM instances.
@@ -6090,8 +6090,9 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
* to shrink more than one VM and it is very unlikely to see
* !n_used_mmu_pages so many times.
*/
- if (!nr_to_scan--)
+ if (!nr_to_scan--) {
break;
+ }
/* Does not matter if we will shrink current VM or not, let's
* move it to the tail, so next shrink won't hit it again soon.
@@ -6112,27 +6113,46 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
!kvm_has_zapped_obsolete_pages(kvm))
continue;
- idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ /*
+ * If try_get fails, we race with last kvm_put_kvm(),
+ * so skip the VM, it will die soon anyway.
+ */
+ if (!kvm_try_get_kvm(kvm))
+ continue;
+ /*
+ * We found VM to shrink, and as we shrink only one VM per
+ * function call, break the cycle and do actual shrink out of
+ * the cycle.
+ */
+ found = 1;
+ break;
+ }
- if (kvm_has_zapped_obsolete_pages(kvm)) {
- kvm_mmu_commit_zap_page(kvm,
- &kvm->arch.zapped_obsolete_pages);
- goto unlock;
- }
+ mutex_unlock(&kvm_lock);
- if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
- freed++;
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ /* If not found a VM to shrink, just exit. */
+ if (!found)
+ return freed;
-unlock:
- spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
- break;
+ if (kvm_has_zapped_obsolete_pages(kvm)) {
+ kvm_mmu_commit_zap_page(kvm,
+ &kvm->arch.zapped_obsolete_pages);
+ goto unlock;
}
- mutex_unlock(&kvm_lock);
+ if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+ freed++;
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+
+unlock:
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ kvm_put_kvm(kvm);
+
return freed;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fcee0b5522d1..cad5f2cec162 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -626,6 +626,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
+int kvm_try_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
void kvm_put_kvm_no_destroy(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 51572037eb0c..477c67fcd41e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -845,6 +845,12 @@ void kvm_get_kvm(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_get_kvm);
+int kvm_try_get_kvm(struct kvm *kvm)
+{
+ return refcount_inc_not_zero(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_try_get_kvm);
+
void kvm_put_kvm(struct kvm *kvm)
{
if (refcount_dec_and_test(&kvm->users_count))
More information about the Devel
mailing list