[PATCH 3/6] mm/memory-failure.c: add code to resolve quasi-hwpoisoned page
From: Naoya Horiguchi
Date: Thu Mar 13 2014 - 18:25:15 EST
This patch introduces three ways to resolve quasi-hwpoisoned pages:
1. unpoison: this is a test feature, but if users accept data lost (then
continue with rereading old data from storage,) this could be tolerable.
2. truncate: if discarding a part of a file which includes a memory error
is OK for your applications, this could be reasonable too.
3. full page overwrite: if your application is prepared to dirty pagecache
error and it has a copy data (or it can recreate the proper data,)
the application can overwrite the page-sized address range on the error
and continue to run without caring about the error.
Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
include/linux/pagemap.h | 16 +++++++++++++
mm/filemap.c | 14 ++++++++---
mm/memory-failure.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-
mm/truncate.c | 7 ++++++
4 files changed, 95 insertions(+), 4 deletions(-)
diff --git v3.14-rc6.orig/include/linux/pagemap.h v3.14-rc6/include/linux/pagemap.h
index 5e234d0d0baf..715962f7ea7a 100644
--- v3.14-rc6.orig/include/linux/pagemap.h
+++ v3.14-rc6/include/linux/pagemap.h
@@ -589,12 +589,28 @@ static inline int add_to_page_cache(struct page *page,
#ifdef CONFIG_MEMORY_FAILURE
bool mapping_hwpoisoned_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte);
+bool page_quasi_hwpoisoned(struct address_space *mapping, struct page *page);
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+ struct page *page, bool free);
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+ loff_t pos, size_t count);
#else
static inline bool mapping_hwpoisoned_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
return false;
}
+static inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+ struct page *page)
+{
+ return false;
+}
+#define hwpoison_resolve_pagecache_error(mapping, page, free) do {} while (0)
+static inline bool hwpoison_partial_overwrite(struct address_space *mapping,
+ loff_t pos, size_t count)
+{
+ return false;
+}
#endif /* CONFIG_MEMORY_FAILURE */
#endif /* _LINUX_PAGEMAP_H */
diff --git v3.14-rc6.orig/mm/filemap.c v3.14-rc6/mm/filemap.c
index 887f2dfaf185..f58b36e313ad 100644
--- v3.14-rc6.orig/mm/filemap.c
+++ v3.14-rc6/mm/filemap.c
@@ -2110,8 +2110,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
if (unlikely(*pos < 0))
return -EINVAL;
- if (unlikely(mapping_hwpoisoned_range(file->f_mapping, *pos,
- *pos + *count)))
+ if (unlikely(hwpoison_partial_overwrite(file->f_mapping, *pos, *count)))
return -EHWPOISON;
if (!isblk) {
@@ -2222,7 +2221,13 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
+ /*
+ * When the write range includes hwpoisoned region (then written is
+ * -EHWPOISON,) we already confirmed in generic_write_checks() that
+ * it's full page overwrite and we can safely invalidate the error,
+ * so the write doesn't have to fail.
+ */
+ if (written && written != -EHWPOISON)
goto out;
/*
@@ -2362,6 +2367,9 @@ static ssize_t generic_perform_write(struct file *file,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
+ if (page_quasi_hwpoisoned(mapping, page))
+ hwpoison_resolve_pagecache_error(mapping, page, false);
+
pagefault_disable();
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
pagefault_enable();
diff --git v3.14-rc6.orig/mm/memory-failure.c v3.14-rc6/mm/memory-failure.c
index 34f2c046af22..0eca5449d251 100644
--- v3.14-rc6.orig/mm/memory-failure.c
+++ v3.14-rc6/mm/memory-failure.c
@@ -665,6 +665,57 @@ static void clear_pagecache_tag_hwpoison(struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock);
}
+inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+ struct page *page)
+{
+ if (!sysctl_memory_failure_recovery)
+ return false;
+ return unlikely(get_pagecache_tag_hwpoison(mapping, page_index(page)));
+}
+
+/*
+ * This function clears a quasi-hwpoisoned page and turns it into a normal
+ * LRU page. Callers should check that @page is really quasi-hwpoisoned,
+ * and must not call this for real error pages.
+ */
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+ struct page *page, bool free)
+{
+ VM_BUG_ON(PageLRU(page));
+ VM_BUG_ON(!PageLocked(page));
+
+ ClearPageHWPoison(page);
+ clear_pagecache_tag_hwpoison(mapping, page_index(page));
+ dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page));
+ putback_lru_page(page);
+ if (free) {
+ lru_add_drain_all();
+ delete_from_page_cache(page);
+ }
+ iput(mapping->host);
+}
+
+/*
+ * Return true if a given range [pos, pos+count) *partially* overlaps with
+ * hwpoisoned page. Effectively it checks only boundary pages' overlapness.
+ */
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+ loff_t pos, size_t count)
+{
+ if (!sysctl_memory_failure_recovery)
+ return false;
+ if (!mapping_hwpoisoned_range(mapping, pos, pos + count))
+ return false;
+
+ if (!PAGE_ALIGNED(pos) &&
+ get_pagecache_tag_hwpoison(mapping, pos >> PAGE_SHIFT))
+ return true;
+ if (!PAGE_ALIGNED(pos + count) &&
+ get_pagecache_tag_hwpoison(mapping, (pos + count) >> PAGE_SHIFT))
+ return true;
+ return false;
+}
+
/*
* Dirty pagecache page
*
@@ -691,7 +742,10 @@ static void clear_pagecache_tag_hwpoison(struct address_space *mapping,
*
* This quasi-hwpoisoned page works to keep reporting the error for all
* processes which try to access to the error address until it is resolved
- * or the system reboots.
+ * or the system reboots. Quasi-hwpoisoned pages can be resolved by unpoison,
+ * truncate, and full page overwrite. In full page overwrite, the quasi-
+ * hwpoisoned pages safely turn into the normal LRU pages, so we expect
+ * userspace to do this when they received the error report if possible.
*
* Issues: when the error hit a hole page the error is not properly
* propagated.
@@ -1496,12 +1550,18 @@ int unpoison_memory(unsigned long pfn)
* the free buddy page pool.
*/
if (TestClearPageHWPoison(page)) {
+ struct address_space *mapping = page_mapping(page);
+ if (mapping && page_quasi_hwpoisoned(mapping, page)) {
+ hwpoison_resolve_pagecache_error(mapping, page, true);
+ goto unlock;
+ }
pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
atomic_long_sub(nr_pages, &num_poisoned_pages);
freeit = 1;
if (PageHuge(page))
clear_page_hwpoison_huge_page(page);
}
+unlock:
unlock_page(page);
put_page(page);
diff --git v3.14-rc6.orig/mm/truncate.c v3.14-rc6/mm/truncate.c
index 353b683afd6e..92d7097dfc6d 100644
--- v3.14-rc6.orig/mm/truncate.c
+++ v3.14-rc6/mm/truncate.c
@@ -103,6 +103,10 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
cancel_dirty_page(page, PAGE_CACHE_SIZE);
ClearPageMappedToDisk(page);
+
+ if (page_quasi_hwpoisoned(mapping, page))
+ hwpoison_resolve_pagecache_error(mapping, page, false);
+
delete_from_page_cache(page);
return 0;
}
@@ -439,6 +443,9 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
return 0;
+ if (page_quasi_hwpoisoned(mapping, page))
+ hwpoison_resolve_pagecache_error(mapping, page, false);
+
spin_lock_irq(&mapping->tree_lock);
if (PageDirty(page))
goto failed;
--
1.8.5.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/