Re: [PATCH v3] mm/memory-failure: fix hugetlb_lock AA deadlock in get_huge_page_for_hwpoison
From: mawupeng
Date: Tue May 19 2026 - 22:47:41 EST
On 周三 2026-5-20 10:40, Kefeng Wang wrote:
> You should remove the v3 since this is the first version for mainline.
Thanks for reewing.
Sorry for my mistack, it will be removed next time.
>
> On 5/20/2026 10:01 AM, Wupeng Ma wrote:
>> madvise(MADV_HWPOISON) can trigger a recursive spinlock self-deadlock
>> (AA deadlock) on hugetlb_lock due to a race with concurrent folio
>> unmapping. The race scenario:
>>
>> Thread 1 (madvise MADV_HWPOISON) Thread 2 (unmap)
>> ------------------------------- -----------------
>> madvise_inject_error()
>> get_user_pages_fast() <- refcount++
>> memory_failure(MF_COUNT_INCREASED)
>> get_huge_page_for_hwpoison()
>> spin_lock_irq(&hugetlb_lock)
>> // refcount == 2 (gup + map)
>> // MF_COUNT_INCREASED path:
>> count_increased = true
>> zap_pte_range()
>> page_remove_rmap()
>> put_page() <- drops map ref
>> // refcount: 2 -> 1
>> hugetlb_update_hwpoison()
>> -> MF_HUGETLB_FOLIO_PRE_POISONED
>> -> goto out
>> out:
>> folio_put(folio) <- drops gup ref
>> // refcount: 1 -> 0
>> free_huge_folio()
>> spin_lock_irq(&hugetlb_lock) <- AA DEADLOCK
>>
>> When Thread 2's put_page() drops the mapping reference while Thread 1
>> holds hugetlb_lock, the folio refcount drops to 1. The subsequent
>> folio_put() at the out: label frees the folio, and free_huge_folio()
>> attempts to re-acquire the non-recursive hugetlb_lock on the same CPU,
>> resulting in an AA self-deadlock.
>>
>> The same deadlock can also occur on the folio_try_get() path: when a
>> migratable folio is found and folio_try_get() succeeds (refcount rises
>> to refcount+1), a concurrent unmap and a hugetlb_update_hwpoison()
>> returning pre-poisoned status will land at out: where folio_put() again
>> may free the folio under hugetlb_lock.
>>
>> Fix this by removing the hugetlb_lock wrapper from hugetlb.c and
>> moving the lock acquisition directly into get_huge_page_for_hwpoison()
>> (formerly __get_huge_page_for_hwpoison) in memory-failure.c. Place
>> spin_unlock_irq() before folio_put() at the out: label so that the
>> folio is always released outside the lock, preventing any recursive
>> lock acquisition.
>
>
> The following comment is not useful and correct for this version.
Thanks for reviewing.
Will be removed in later version.
>
> Remove the now-incorrect "Called from hugetlb code
>> with hugetlb_lock held" comment, and update the stale
>> __get_huge_page_for_hwpoison declarations in include/linux/mm.h.
>>
>
> The change is lgtm, Reviewed-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
>
>
>> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
>> Signed-off-by: Wupeng Ma <mawupeng1@xxxxxxxxxx>
>> ---
>> include/linux/hugetlb.h | 8 --------
>> include/linux/mm.h | 8 --------
>> mm/hugetlb.c | 11 -----------
>> mm/memory-failure.c | 8 ++++----
>> 4 files changed, 4 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 65910437be1ca..aa3eb42e0a01a 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
>> long freed);
>> bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
>> int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
>> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> - bool *migratable_cleared);
>> void folio_putback_hugetlb(struct folio *folio);
>> void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
>> void hugetlb_fix_reserve_counts(struct inode *inode);
>> @@ -422,12 +420,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
>> return 0;
>> }
>> -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> - bool *migratable_cleared)
>> -{
>> - return 0;
>> -}
>> -
>> static inline void folio_putback_hugetlb(struct folio *folio)
>> {
>> }
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index abb4963c1f064..46e5936dabaa8 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -4602,8 +4602,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
>> */
>> extern const struct attribute_group memory_failure_attr_group;
>> extern void memory_failure_queue(unsigned long pfn, int flags);
>> -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> - bool *migratable_cleared);
>> void num_poisoned_pages_inc(unsigned long pfn);
>> void num_poisoned_pages_sub(unsigned long pfn, long i);
>> #else
>> @@ -4611,12 +4609,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
>> {
>> }
>> -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> - bool *migratable_cleared)
>> -{
>> - return 0;
>> -}
>> -
>> static inline void num_poisoned_pages_inc(unsigned long pfn)
>> {
>> }
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 327eaa4074d39..4c99bb868ad08 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -7170,17 +7170,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
>> return ret;
>> }
>> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> - bool *migratable_cleared)
>> -{
>> - int ret;
>> -
>> - spin_lock_irq(&hugetlb_lock);
>> - ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
>> - spin_unlock_irq(&hugetlb_lock);
>> - return ret;
>> -}
>> -
>> /**
>> * folio_putback_hugetlb - unisolate a hugetlb folio
>> * @folio: the isolated hugetlb folio
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index ee42d43613097..28522180cf7f8 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -1966,10 +1966,7 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
>> folio_free_raw_hwp(folio, true);
>> }
>> -/*
>> - * Called from hugetlb code with hugetlb_lock held.
>> - */
>> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> +static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> bool *migratable_cleared)
>> {
>> struct page *page = pfn_to_page(pfn);
>> @@ -1977,6 +1974,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> bool count_increased = false;
>> int ret, rc;
>> + spin_lock_irq(&hugetlb_lock);
>> if (!folio_test_hugetlb(folio)) {
>> ret = MF_HUGETLB_NON_HUGEPAGE;
>> goto out;
>> @@ -2013,8 +2011,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> *migratable_cleared = true;
>> }
>> + spin_unlock_irq(&hugetlb_lock);
>> return ret;
>> out:
>> + spin_unlock_irq(&hugetlb_lock);
>> if (count_increased)
>> folio_put(folio);
>> return ret;
>