[PATCH 1/3] mm/memory-failure: Prepare for mass memory_failure()

From: Dan Williams
Date: Thu Mar 18 2021 - 00:10:08 EST


Currently memory_failure() assumes an infrequent report on a handful of
pages. A new use case for surprise removal of a persistent memory device
needs to trigger memory_failure() on a large range. Rate limit
memory_failure() error logging, and allow the
memory_failure_dev_pagemap() helper to be called directly.

Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
mm/memory-failure.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 24210c9bd843..43ba4307c526 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -395,8 +395,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* signal and then access the memory. Just kill it.
*/
if (fail || tk->addr == -EFAULT) {
- pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
- pfn, tk->tsk->comm, tk->tsk->pid);
+ pr_err_ratelimited(
+ "Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
tk->tsk, PIDTYPE_PID);
}
@@ -408,8 +409,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* process anyways.
*/
else if (kill_proc(tk, pfn, flags) < 0)
- pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
- pfn, tk->tsk->comm, tk->tsk->pid);
+ pr_err_ratelimited(
+ "Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
}
put_task_struct(tk->tsk);
kfree(tk);
@@ -919,8 +921,8 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type,
{
trace_memory_failure_event(pfn, type, result);

- pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
- pfn, action_page_types[type], action_name[result]);
+ pr_err_ratelimited("Memory failure: %#lx: recovery action for %s: %s\n",
+ pfn, action_page_types[type], action_name[result]);
}

static int page_action(struct page_state *ps, struct page *p,
@@ -1375,8 +1377,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
unlock:
dax_unlock_page(page, cookie);
out:
- /* drop pgmap ref acquired in caller */
- put_dev_pagemap(pgmap);
action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
return rc;
}
@@ -1415,9 +1415,12 @@ int memory_failure(unsigned long pfn, int flags)
if (!p) {
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
- if (pgmap)
- return memory_failure_dev_pagemap(pfn, flags,
- pgmap);
+ if (pgmap) {
+ res = memory_failure_dev_pagemap(pfn, flags,
+ pgmap);
+ put_dev_pagemap(pgmap);
+ return res;
+ }
}
pr_err("Memory failure: %#lx: memory outside kernel control\n",
pfn);