[PATCH v2 1/2] mm: memory-failure: Make memory_failure_queue_delayed() helper

From: Qiuxu Zhuo
Date: Fri Dec 22 2023 - 01:28:43 EST


Introduce the memory_failure_queue_delayed() helper for deferred handling
of memory failure tasks. This prepares for later re-splitting of a
hardware-poisoned huge page.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
---
Prepares for the patch 2.

mm/memory-failure.c | 51 +++++++++++++++++++++++++--------------------
1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 660c21859118..8f2c11863bae 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2385,28 +2385,12 @@ struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
- struct work_struct work;
+ struct delayed_work work;
};

static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);

-/**
- * memory_failure_queue - Schedule handling memory failure of a page.
- * @pfn: Page Number of the corrupted page
- * @flags: Flags for memory failure handling
- *
- * This function is called by the low level hardware error handler
- * when it detects hardware memory corruption of a page. It schedules
- * the recovering of error page, including dropping pages, killing
- * processes etc.
- *
- * The function is primarily of use for corruptions that
- * happen outside the current execution context (e.g. when
- * detected by a background scrubber)
- *
- * Can run in IRQ context.
- */
-void memory_failure_queue(unsigned long pfn, int flags)
+static void memory_failure_queue_delayed(unsigned long pfn, int flags, unsigned long delay)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
@@ -2418,13 +2402,34 @@ void memory_failure_queue(unsigned long pfn, int flags)
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, entry))
- schedule_work_on(smp_processor_id(), &mf_cpu->work);
+ schedule_delayed_work_on(smp_processor_id(), &mf_cpu->work, delay);
else
pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
+
+/**
+ * memory_failure_queue - Schedule handling memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @flags: Flags for memory failure handling
+ *
+ * This function is called by the low level hardware error handler
+ * when it detects hardware memory corruption of a page. It schedules
+ * the recovering of error page, including dropping pages, killing
+ * processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Can run in IRQ context.
+ */
+void memory_failure_queue(unsigned long pfn, int flags)
+{
+ memory_failure_queue_delayed(pfn, flags, 0);
+}
EXPORT_SYMBOL_GPL(memory_failure_queue);

static void memory_failure_work_func(struct work_struct *work)
@@ -2434,7 +2439,7 @@ static void memory_failure_work_func(struct work_struct *work)
unsigned long proc_flags;
int gotten;

- mf_cpu = container_of(work, struct memory_failure_cpu, work);
+ mf_cpu = container_of(work, struct memory_failure_cpu, work.work);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
@@ -2457,8 +2462,8 @@ void memory_failure_queue_kick(int cpu)
struct memory_failure_cpu *mf_cpu;

mf_cpu = &per_cpu(memory_failure_cpu, cpu);
- cancel_work_sync(&mf_cpu->work);
- memory_failure_work_func(&mf_cpu->work);
+ cancel_delayed_work_sync(&mf_cpu->work);
+ memory_failure_work_func(&mf_cpu->work.work);
}

static int __init memory_failure_init(void)
@@ -2470,7 +2475,7 @@ static int __init memory_failure_init(void)
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
- INIT_WORK(&mf_cpu->work, memory_failure_work_func);
+ INIT_DELAYED_WORK(&mf_cpu->work, memory_failure_work_func);
}

register_sysctl_init("vm", memory_failure_table);
--
2.17.1