Re: [PATCH v1 1/3] mm/memory-failure: Use a mutex to avoid memory_failure() races

From: HORIGUCHI NAOYA(堀口 直也)
Date: Tue Apr 20 2021 - 03:46:32 EST


On Mon, Apr 19, 2021 at 07:05:38PM +0200, Borislav Petkov wrote:
> On Tue, Apr 13, 2021 at 07:43:18AM +0900, Naoya Horiguchi wrote:
> > From: Tony Luck <tony.luck@xxxxxxxxx>
> >
> > There can be races when multiple CPUs consume poison from the same
> > page. The first into memory_failure() atomically sets the HWPoison
> > page flag and begins hunting for tasks that map this page. Eventually
> > it invalidates those mappings and may send a SIGBUS to the affected
> > tasks.
> >
> > But while all that work is going on, other CPUs see a "success"
> > return code from memory_failure() and so they believe the error
> > has been handled and continue executing.
> >
> > Fix by wrapping most of the internal parts of memory_failure() in
> > a mutex.
> >
> > Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> > Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
> > ---
> > mm/memory-failure.c | 19 +++++++++++++++++--
> > 1 file changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git v5.12-rc5/mm/memory-failure.c v5.12-rc5_patched/mm/memory-failure.c
> > index 24210c9bd843..c1509f4b565e 100644
> > --- v5.12-rc5/mm/memory-failure.c
> > +++ v5.12-rc5_patched/mm/memory-failure.c
> > @@ -1381,6 +1381,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
> > return rc;
> > }
> >
> > +static DEFINE_MUTEX(mf_mutex);
> > +
> > /**
> > * memory_failure - Handle memory failure of a page.
> > * @pfn: Page Number of the corrupted page
> > @@ -1424,12 +1426,18 @@ int memory_failure(unsigned long pfn, int flags)
> > return -ENXIO;
> > }
>
> So the locking patterns are done in two different ways, which are
> confusing when following this code:
>
> > + mutex_lock(&mf_mutex);
> > +
> > try_again:
> > - if (PageHuge(p))
> > - return memory_failure_hugetlb(pfn, flags);
> > + if (PageHuge(p)) {
> > + res = memory_failure_hugetlb(pfn, flags);
> > + goto out2;
> > + }
>
> You have the goto to a label where you do the unlocking (btw, pls do
> s/out2/out_unlock/g;)...
>
> > +
> > if (TestSetPageHWPoison(p)) {
> > pr_err("Memory failure: %#lx: already hardware poisoned\n",
> > pfn);
> > + mutex_unlock(&mf_mutex);
> > return 0;
>
> ... and you have the other case where you unlock before returning.
>
> Since you've added the label, I think *all* the unlocking should do
> "goto out_unlock" instead of doing either/or.

Make sense, so I updated the patch like below. The existing label "out"
could be renamed in the same manner, so I replaced it with "unlock_page"
and "out2" with "unlock_mutex". I thought of using "out_unlock_{page,mutex}"
but maybe it's clear enough without "out_".

If you have any other suggestion, please let me know.

Thanks,
Naoya Horiguchi

---
From 19dbd0e9cd2c7febf67370ac9957bdde83084316 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@xxxxxxxxx>
Date: Tue, 20 Apr 2021 16:42:01 +0900
Subject: [PATCH 1/3] mm/memory-failure: Use a mutex to avoid memory_failure()
races

There can be races when multiple CPUs consume poison from the same
page. The first into memory_failure() atomically sets the HWPoison
page flag and begins hunting for tasks that map this page. Eventually
it invalidates those mappings and may send a SIGBUS to the affected
tasks.

But while all that work is going on, other CPUs see a "success"
return code from memory_failure() and so they believe the error
has been handled and continue executing.

Fix by wrapping most of the internal parts of memory_failure() in
a mutex.

Along with introducing an additional goto label, this patch also
aligns the returning code with "goto out" pattern and renames
the existing label "out' with clearer one to make code clearer.

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
---
mm/memory-failure.c | 37 ++++++++++++++++++++++++-------------
1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 24210c9bd843..4087308e4b32 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1381,6 +1381,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
return rc;
}

+static DEFINE_MUTEX(mf_mutex);
+
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
@@ -1404,7 +1406,7 @@ int memory_failure(unsigned long pfn, int flags)
struct page *hpage;
struct page *orig_head;
struct dev_pagemap *pgmap;
- int res;
+ int res = 0;
unsigned long page_flags;
bool retry = true;

@@ -1424,13 +1426,18 @@ int memory_failure(unsigned long pfn, int flags)
return -ENXIO;
}

+ mutex_lock(&mf_mutex);
+
try_again:
- if (PageHuge(p))
- return memory_failure_hugetlb(pfn, flags);
+ if (PageHuge(p)) {
+ res = memory_failure_hugetlb(pfn, flags);
+ goto unlock_mutex;
+ }
+
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
- return 0;
+ goto unlock_mutex;
}

orig_head = hpage = compound_head(p);
@@ -1463,17 +1470,19 @@ int memory_failure(unsigned long pfn, int flags)
res = MF_FAILED;
}
action_result(pfn, MF_MSG_BUDDY, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
+ res = res == MF_RECOVERED ? 0 : -EBUSY;
} else {
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
- return -EBUSY;
+ res = -EBUSY;
}
+ goto unlock_mutex;
}

if (PageTransHuge(hpage)) {
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
- return -EBUSY;
+ res = -EBUSY;
+ goto unlock_mutex;
}
VM_BUG_ON_PAGE(!page_count(p), p);
}
@@ -1497,7 +1506,7 @@ int memory_failure(unsigned long pfn, int flags)
if (PageCompound(p) && compound_head(p) != orig_head) {
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}

/*
@@ -1517,14 +1526,14 @@ int memory_failure(unsigned long pfn, int flags)
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
- return 0;
+ goto unlock_mutex;
}
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
- return 0;
+ goto unlock_mutex;
}

if (!PageTransTail(p) && !PageLRU(p))
@@ -1543,7 +1552,7 @@ int memory_failure(unsigned long pfn, int flags)
if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}

/*
@@ -1552,13 +1561,15 @@ int memory_failure(unsigned long pfn, int flags)
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}

identify_page_state:
res = identify_page_state(pfn, p, page_flags);
-out:
+unlock_page:
unlock_page(p);
+unlock_mutex:
+ mutex_unlock(&mf_mutex);
return res;
}
EXPORT_SYMBOL_GPL(memory_failure);
--
2.25.1