Re: [PATCH v3] mm/memory-failure: fix hugetlb_lock AA deadlock in get_huge_page_for_hwpoison

From: mawupeng

Date: Tue May 19 2026 - 22:47:41 EST




On 周三 2026-5-20 10:40, Kefeng Wang wrote:
> You should remove the v3 since this is the first version for mainline.

Thanks for reewing.

Sorry for my mistack, it will be removed next time.

>
> On 5/20/2026 10:01 AM, Wupeng Ma wrote:
>> madvise(MADV_HWPOISON) can trigger a recursive spinlock self-deadlock
>> (AA deadlock) on hugetlb_lock due to a race with concurrent folio
>> unmapping.  The race scenario:
>>
>>    Thread 1 (madvise MADV_HWPOISON)          Thread 2 (unmap)
>>    -------------------------------          -----------------
>>    madvise_inject_error()
>>      get_user_pages_fast()  <- refcount++
>>      memory_failure(MF_COUNT_INCREASED)
>>        get_huge_page_for_hwpoison()
>>          spin_lock_irq(&hugetlb_lock)
>>          // refcount == 2 (gup + map)
>>          // MF_COUNT_INCREASED path:
>>          count_increased = true
>>                                             zap_pte_range()
>>                                               page_remove_rmap()
>>                                               put_page()  <- drops map ref
>>                                               // refcount: 2 -> 1
>>          hugetlb_update_hwpoison()
>>            -> MF_HUGETLB_FOLIO_PRE_POISONED
>>            -> goto out
>>          out:
>>            folio_put(folio)  <- drops gup ref
>>              // refcount: 1 -> 0
>>              free_huge_folio()
>>                spin_lock_irq(&hugetlb_lock)  <- AA DEADLOCK
>>
>> When Thread 2's put_page() drops the mapping reference while Thread 1
>> holds hugetlb_lock, the folio refcount drops to 1.  The subsequent
>> folio_put() at the out: label frees the folio, and free_huge_folio()
>> attempts to re-acquire the non-recursive hugetlb_lock on the same CPU,
>> resulting in an AA self-deadlock.
>>
>> The same deadlock can also occur on the folio_try_get() path: when a
>> migratable folio is found and folio_try_get() succeeds (refcount rises
>> to refcount+1), a concurrent unmap and a hugetlb_update_hwpoison()
>> returning pre-poisoned status will land at out: where folio_put() again
>> may free the folio under hugetlb_lock.
>>
>> Fix this by removing the hugetlb_lock wrapper from hugetlb.c and
>> moving the lock acquisition directly into get_huge_page_for_hwpoison()
>> (formerly __get_huge_page_for_hwpoison) in memory-failure.c.  Place
>> spin_unlock_irq() before folio_put() at the out: label so that the
>> folio is always released outside the lock, preventing any recursive
>> lock acquisition. 
>
>
> The following comment is not useful and correct for this version.

Thanks for reviewing.

Will be removed in later version.

>
> Remove the now-incorrect "Called from hugetlb code
>> with hugetlb_lock held" comment, and update the stale
>> __get_huge_page_for_hwpoison declarations in include/linux/mm.h.
>>
>
> The change is lgtm,  Reviewed-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
>
>
>> Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()")
>> Signed-off-by: Wupeng Ma <mawupeng1@xxxxxxxxxx>
>> ---
>>   include/linux/hugetlb.h |  8 --------
>>   include/linux/mm.h      |  8 --------
>>   mm/hugetlb.c            | 11 -----------
>>   mm/memory-failure.c     |  8 ++++----
>>   4 files changed, 4 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 65910437be1ca..aa3eb42e0a01a 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
>>                           long freed);
>>   bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
>>   int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison);
>> -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> -                bool *migratable_cleared);
>>   void folio_putback_hugetlb(struct folio *folio);
>>   void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
>>   void hugetlb_fix_reserve_counts(struct inode *inode);
>> @@ -422,12 +420,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb,
>>       return 0;
>>   }
>>   -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> -                    bool *migratable_cleared)
>> -{
>> -    return 0;
>> -}
>> -
>>   static inline void folio_putback_hugetlb(struct folio *folio)
>>   {
>>   }
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index abb4963c1f064..46e5936dabaa8 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -4602,8 +4602,6 @@ extern int soft_offline_page(unsigned long pfn, int flags);
>>    */
>>   extern const struct attribute_group memory_failure_attr_group;
>>   extern void memory_failure_queue(unsigned long pfn, int flags);
>> -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> -                    bool *migratable_cleared);
>>   void num_poisoned_pages_inc(unsigned long pfn);
>>   void num_poisoned_pages_sub(unsigned long pfn, long i);
>>   #else
>> @@ -4611,12 +4609,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags)
>>   {
>>   }
>>   -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> -                    bool *migratable_cleared)
>> -{
>> -    return 0;
>> -}
>> -
>>   static inline void num_poisoned_pages_inc(unsigned long pfn)
>>   {
>>   }
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 327eaa4074d39..4c99bb868ad08 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -7170,17 +7170,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison
>>       return ret;
>>   }
>>   -int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> -                bool *migratable_cleared)
>> -{
>> -    int ret;
>> -
>> -    spin_lock_irq(&hugetlb_lock);
>> -    ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
>> -    spin_unlock_irq(&hugetlb_lock);
>> -    return ret;
>> -}
>> -
>>   /**
>>    * folio_putback_hugetlb - unisolate a hugetlb folio
>>    * @folio: the isolated hugetlb folio
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index ee42d43613097..28522180cf7f8 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -1966,10 +1966,7 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
>>       folio_free_raw_hwp(folio, true);
>>   }
>>   -/*
>> - * Called from hugetlb code with hugetlb_lock held.
>> - */
>> -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>> +static int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>>                    bool *migratable_cleared)
>>   {
>>       struct page *page = pfn_to_page(pfn);
>> @@ -1977,6 +1974,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>>       bool count_increased = false;
>>       int ret, rc;
>>   +    spin_lock_irq(&hugetlb_lock);
>>       if (!folio_test_hugetlb(folio)) {
>>           ret = MF_HUGETLB_NON_HUGEPAGE;
>>           goto out;
>> @@ -2013,8 +2011,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
>>           *migratable_cleared = true;
>>       }
>>   +    spin_unlock_irq(&hugetlb_lock);
>>       return ret;
>>   out:
>> +    spin_unlock_irq(&hugetlb_lock);
>>       if (count_increased)
>>           folio_put(folio);
>>       return ret;
>