[PATCH v2 12/13] KVM: s390: pv: add OOM notifier for lazy destroy

From: Claudio Imbrenda
Date: Wed Jul 28 2021 - 10:27:18 EST


Add a per-VM OOM notifier for lazy destroy.

When a protected VM is undergoing deferred teardown, register an OOM
notifier. This allows an OOM situation to be handled by just waiting a
little.

The background cleanup deferred destroy process will now keep a running
tally of the amount of pages freed. The asynchronous OOM notifier will
check the number of pages freed before and after waiting. The OOM
notifier will wait 10ms, and then report the number of pages freed by
the deferred destroy mechanism during that time.

If at least 1024 pages have already been freed in the current OOM
situation, no action is taken by the OOM notifier and no wait is
performed. This avoids excessive waiting times in case many VMs are
being destroyed at the same time, once enough memory has been freed.

Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
arch/s390/kvm/pv.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)

diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 088b94512af3..390b57307f24 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -15,8 +15,12 @@
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/oom.h>
#include "kvm-s390.h"

+#define KVM_S390_PV_LAZY_DESTROY_OOM_NOTIFY_PRIORITY 70
+
struct deferred_priv {
struct mm_struct *mm;
bool has_mm;
@@ -24,6 +28,8 @@ struct deferred_priv {
u64 handle;
void *stor_var;
unsigned long stor_base;
+ unsigned long n_pages_freed;
+ struct notifier_block oom_nb;
};

static int lazy_destroy = 1;
@@ -249,6 +255,24 @@ static int kvm_s390_pv_deinit_vm_now(struct kvm *kvm, u16 *rc, u16 *rrc)
return cc ? -EIO : 0;
}

+static int kvm_s390_pv_oom_notify(struct notifier_block *nb,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed = parm;
+ unsigned long free_before;
+ struct deferred_priv *p;
+
+ if (*freed > 1024)
+ return NOTIFY_OK;
+
+ p = container_of(nb, struct deferred_priv, oom_nb);
+ free_before = READ_ONCE(p->n_pages_freed);
+ msleep(20);
+ *freed += READ_ONCE(p->n_pages_freed) - free_before;
+
+ return NOTIFY_OK;
+}
+
static int kvm_s390_pv_destroy_vm_thread(void *priv)
{
struct destroy_page_lazy *lazy, *next;
@@ -256,12 +280,20 @@ static int kvm_s390_pv_destroy_vm_thread(void *priv)
u16 rc, rrc;
int r;

+ p->oom_nb.priority = KVM_S390_PV_LAZY_DESTROY_OOM_NOTIFY_PRIORITY;
+ p->oom_nb.notifier_call = kvm_s390_pv_oom_notify;
+ r = register_oom_notifier(&p->oom_nb);
+
list_for_each_entry_safe(lazy, next, &p->mm->context.deferred_list, list) {
list_del(&lazy->list);
s390_uv_destroy_pfns(lazy->count, lazy->pfns);
+ WRITE_ONCE(p->n_pages_freed, p->n_pages_freed + lazy->count + 1);
free_page(__pa(lazy));
}

+ if (!r)
+ unregister_oom_notifier(&p->oom_nb);
+
if (p->has_mm) {
/* Clear all the pages as long as we are not the only users of the mm */
s390_uv_destroy_range(p->mm, 1, 0, TASK_SIZE_MAX);
--
2.31.1