[PATCH v1] mm/hwpoison: set PageHWPoison after taking page lock in memory_failure_hugetlb()

From: Naoya Horiguchi
Date: Wed Mar 09 2022 - 04:15:27 EST


From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>

There is a race condition between memory_failure_hugetlb() and hugetlb
free/demotion, which causes setting PageHWPoison flag on the wrong page
(which was a hugetlb when memory_failrue() was called, but was removed
or demoted when memory_failure_hugetlb() is called). This results in
killing wrong processes. So set PageHWPoison flag with holding page lock,

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
---
mm/memory-failure.c | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ac6492e36978..fe25eee8f9d6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1494,24 +1494,11 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
int res;
unsigned long page_flags;

- if (TestSetPageHWPoison(head)) {
- pr_err("Memory failure: %#lx: already hardware poisoned\n",
- pfn);
- res = -EHWPOISON;
- if (flags & MF_ACTION_REQUIRED)
- res = kill_accessing_process(current, page_to_pfn(head), flags);
- return res;
- }
-
- num_poisoned_pages_inc();
-
if (!(flags & MF_COUNT_INCREASED)) {
res = get_hwpoison_page(p, flags);
if (!res) {
lock_page(head);
if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
unlock_page(head);
return -EOPNOTSUPP;
}
@@ -1544,13 +1531,16 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
page_flags = head->flags;

if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
put_page(p);
res = -EOPNOTSUPP;
goto out;
}

+ if (TestSetPageHWPoison(head))
+ goto already_hwpoisoned;
+
+ num_poisoned_pages_inc();
+
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
@@ -1576,6 +1566,13 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
out:
unlock_page(head);
return res;
+already_hwpoisoned:
+ unlock_page(head);
+ pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+ res = -EHWPOISON;
+ if (flags & MF_ACTION_REQUIRED)
+ res = kill_accessing_process(current, page_to_pfn(head), flags);
+ return res;
}

static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
--
2.25.1