Re: [PATCH v3] mm/memory-failure: fix hugetlb_lock AA deadlock in get_huge_page_for_hwpoison
From: Kefeng Wang
Date: Tue May 19 2026 - 22:42:12 EST
You should remove the v3 since this is the first version for mainline.
On 5/20/2026 10:01 AM, Wupeng Ma wrote:
madvise(MADV_HWPOISON) can trigger a recursive spinlock self-deadlock
(AA deadlock) on hugetlb_lock due to a race with concurrent folio
unmapping. The race scenario:
Thread 1 (madvise MADV_HWPOISON) Thread 2 (unmap)
------------------------------- -----------------
madvise_inject_error()
get_user_pages_fast() <- refcount++
memory_failure(MF_COUNT_INCREASED)
get_huge_page_for_hwpoison()
spin_lock_irq(&hugetlb_lock)
// refcount == 2 (gup + map)
// MF_COUNT_INCREASED path:
count_increased = true
zap_pte_range()
page_remove_rmap()
put_page() <- drops map ref
// refcount: 2 -> 1
hugetlb_update_hwpoison()
-> MF_HUGETLB_FOLIO_PRE_POISONED
-> goto out
out:
folio_put(folio) <- drops gup ref
// refcount: 1 -> 0
free_huge_folio()
spin_lock_irq(&hugetlb_lock) <- AA DEADLOCK
When Thread 2's put_page() drops the mapping reference while Thread 1
holds hugetlb_lock, the folio refcount drops to 1. The subsequent
folio_put() at the out: label frees the folio, and free_huge_folio()
attempts to re-acquire the non-recursive hugetlb_lock on the same CPU,
resulting in an AA self-deadlock.
The same deadlock can also occur on the folio_try_get() path: when a
migratable folio is found and folio_try_get() succeeds (refcount rises
to refcount+1), a concurrent unmap and a hugetlb_update_hwpoison()
returning pre-poisoned status will land at out: where folio_put() again
may free the folio under hugetlb_lock.
Fix this by removing the hugetlb_lock wrapper from hugetlb.c and
moving the lock acquisition directly into get_huge_page_for_hwpoison()
(formerly __get_huge_page_for_hwpoison) in memory-failure.c. Place
spin_unlock_irq() before folio_put() at the out: label so that the
folio is always released outside the lock, preventing any recursive
lock acquisition.
The following comment is not useful and correct for this version.
Remove the now-incorrect "Called from hugetlb code
with hugetlb_lock held" comment, and update the stale
__get_huge_page_for_hwpoison declarations in include/linux/mm.h.
The change is lgtm, Reviewed-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
Signed-off-by: Wupeng Ma <mawupeng1@xxxxxxxxxx>
---
include/linux/hugetlb.h | 8 --------
include/linux/mm.h | 8 --------
mm/hugetlb.c | 11 -----------
mm/memory-failure.c | 8 ++++----
4 files changed, 4 insertions(+), 31 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65910437be1ca..aa3eb42e0a01a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void folio_putback_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void hugetlb_fix_reserve_counts(struct inode *inode);
@@ -422,12 +420,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
return 0;
}
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void folio_putback_hugetlb(struct folio *folio)
{
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index abb4963c1f064..46e5936dabaa8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4602,8 +4602,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
*/
extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
#else
@@ -4611,12 +4609,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
{
}
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- return 0;
-}
-
static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 327eaa4074d39..4c99bb868ad08 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7170,17 +7170,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
return ret;
}
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
- bool *migratable_cleared)
-{
- int ret;
-
- spin_lock_irq(&hugetlb_lock);
- ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
- spin_unlock_irq(&hugetlb_lock);
- return ret;
-}
-
/**
* folio_putback_hugetlb - unisolate a hugetlb folio
* @folio: the isolated hugetlb folio
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ee42d43613097..28522180cf7f8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1966,10 +1966,7 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
folio_free_raw_hwp(folio, true);
}
-/*
- * Called from hugetlb code with hugetlb_lock held.
- */
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
struct page *page = pfn_to_page(pfn);
@@ -1977,6 +1974,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool count_increased = false;
int ret, rc;
+ spin_lock_irq(&hugetlb_lock);
if (!folio_test_hugetlb(folio)) {
ret = MF_HUGETLB_NON_HUGEPAGE;
goto out;
@@ -2013,8 +2011,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
*migratable_cleared = true;
}
+ spin_unlock_irq(&hugetlb_lock);
return ret;
out:
+ spin_unlock_irq(&hugetlb_lock);
if (count_increased)
folio_put(folio);
return ret;