[RFC PATCH v2 13/16] mm,hwpoison: Take pages off the buddy when hard-offlining
From: Oscar Salvador
Date: Thu Oct 17 2019 - 10:21:55 EST
We need to do as we do now for soft-offline, and take poisoned pages
off the buddy allocator.
Otherwise we could face [1] as well.
[1] https://lore.kernel.org/linux-mm/20190826104144.GA7849@linux/T/#u
Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
mm/memory-failure.c | 33 ++++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 48eb314598e0..3d491c0d3f91 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -791,6 +791,14 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
return MF_FAILED;
}
+static int me_huge_free_page(struct page *p)
+{
+ if (page_handle_poison(p, true, false))
+ return MF_RECOVERED;
+ else
+ return MF_FAILED;
+}
+
/*
* Huge pages. Needs work.
* Issues:
@@ -818,8 +826,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
*/
if (PageAnon(hpage))
put_page(hpage);
- dissolve_free_huge_page(p);
- res = MF_RECOVERED;
+ res = me_huge_free_page(p);
lock_page(hpage);
}
@@ -1145,8 +1152,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
}
}
unlock_page(head);
- dissolve_free_huge_page(p);
- action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+ res = me_huge_free_page(p);
+ if (res == MF_FAILED)
+ num_poisoned_pages_dec();
+ action_result(pfn, MF_MSG_FREE_HUGE, res);
return 0;
}
@@ -1307,6 +1316,12 @@ int memory_failure(unsigned long pfn, int flags)
if (PageHuge(p))
return memory_failure_hugetlb(pfn, flags);
+
+ if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+ action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
+ return 0;
+ }
+
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
@@ -1328,10 +1343,10 @@ int memory_failure(unsigned long pfn, int flags)
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
if (!get_hwpoison_page(p)) {
- if (is_free_buddy_page(p)) {
- action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
+ if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+ action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED);
return 0;
- } else {
+ } else if(!is_free_buddy_page(p)) {
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
return -EBUSY;
}
@@ -1354,8 +1369,8 @@ int memory_failure(unsigned long pfn, int flags)
*/
shake_page(p, 0);
/* shake_page could have turned it free. */
- if (!PageLRU(p) && is_free_buddy_page(p)) {
- action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
+ if (!PageLRU(p) && is_free_buddy_page(p) && page_handle_poison(p, true, false)) {
+ action_result(pfn, MF_MSG_BUDDY_2ND, MF_RECOVERED);
return 0;
}
--
2.12.3