[PATCH RESEND] mm, kasan: don't poison boot memory

From: Andrey Konovalov
Date: Wed Feb 17 2021 - 16:00:15 EST


During boot, all non-reserved memblock memory is exposed to the buddy
allocator. Poisoning all that memory with KASAN lengthens boot time,
especially on systems with large amount of RAM. This patch makes
page_alloc to not call kasan_free_pages() on all new memory.

__free_pages_core() is used when exposing fresh memory during system
boot and when onlining memory during hotplug. This patch adds a new
FPI_SKIP_KASAN_POISON flag and passes it to __free_pages_ok() through
free_pages_prepare() from __free_pages_core().

This has little impact on KASAN memory tracking.

Assuming that there are no references to newly exposed pages before they
are ever allocated, there won't be any intended (but buggy) accesses to
that memory that KASAN would normally detect.

However, with this patch, KASAN stops detecting wild and large
out-of-bounds accesses that happen to land on a fresh memory page that
was never allocated. This is taken as an acceptable trade-off.

All memory allocated normally when the boot is over keeps getting
poisoned as usual.

Signed-off-by: Andrey Konovalov <andreyknvl@xxxxxxxxxx>
---

Resending with Change-Id dropped.

---
mm/page_alloc.c | 43 ++++++++++++++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0b55c9c95364..f10966e3b4a5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -108,6 +108,17 @@ typedef int __bitwise fpi_t;
*/
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))

+/*
+ * Don't poison memory with KASAN.
+ * During boot, all non-reserved memblock memory is exposed to the buddy
+ * allocator. Poisoning all that memory lengthens boot time, especially on
+ * systems with large amount of RAM. This flag is used to skip that poisoning.
+ * Assuming that there are no references to those newly exposed pages before
+ * they are ever allocated, this has little effect on KASAN memory tracking.
+ * All memory allocated normally after boot gets poisoned as usual.
+ */
+#define FPI_SKIP_KASAN_POISON ((__force fpi_t)BIT(2))
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
@@ -384,10 +395,14 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
* on-demand allocation and then freed again before the deferred pages
* initialization is done, but this is not likely to happen.
*/
-static inline void kasan_free_nondeferred_pages(struct page *page, int order)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ fpi_t fpi_flags)
{
- if (!static_branch_unlikely(&deferred_pages))
- kasan_free_pages(page, order);
+ if (static_branch_unlikely(&deferred_pages))
+ return;
+ if (fpi_flags & FPI_SKIP_KASAN_POISON)
+ return;
+ kasan_free_pages(page, order);
}

/* Returns true if the struct page for the pfn is uninitialised */
@@ -438,7 +453,13 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
return false;
}
#else
-#define kasan_free_nondeferred_pages(p, o) kasan_free_pages(p, o)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ fpi_t fpi_flags)
+{
+ if (fpi_flags & FPI_SKIP_KASAN_POISON)
+ return;
+ kasan_free_pages(page, order);
+}

static inline bool early_page_uninitialised(unsigned long pfn)
{
@@ -1216,7 +1237,7 @@ static void kernel_init_free_pages(struct page *page, int numpages)
}

static __always_inline bool free_pages_prepare(struct page *page,
- unsigned int order, bool check_free)
+ unsigned int order, bool check_free, fpi_t fpi_flags)
{
int bad = 0;

@@ -1290,7 +1311,7 @@ static __always_inline bool free_pages_prepare(struct page *page,

debug_pagealloc_unmap_pages(page, 1 << order);

- kasan_free_nondeferred_pages(page, order);
+ kasan_free_nondeferred_pages(page, order, fpi_flags);

return true;
}
@@ -1303,7 +1324,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
*/
static bool free_pcp_prepare(struct page *page)
{
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
}

static bool bulkfree_pcp_prepare(struct page *page)
@@ -1323,9 +1344,9 @@ static bool bulkfree_pcp_prepare(struct page *page)
static bool free_pcp_prepare(struct page *page)
{
if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
else
- return free_pages_prepare(page, 0, false);
+ return free_pages_prepare(page, 0, false, FPI_NONE);
}

static bool bulkfree_pcp_prepare(struct page *page)
@@ -1533,7 +1554,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,
int migratetype;
unsigned long pfn = page_to_pfn(page);

- if (!free_pages_prepare(page, order, true))
+ if (!free_pages_prepare(page, order, true, fpi_flags))
return;

migratetype = get_pfnblock_migratetype(page, pfn);
@@ -1570,7 +1591,7 @@ void __free_pages_core(struct page *page, unsigned int order)
* Bypass PCP and place fresh pages right to the tail, primarily
* relevant for memory onlining.
*/
- __free_pages_ok(page, order, FPI_TO_TAIL);
+ __free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
}

#ifdef CONFIG_NEED_MULTIPLE_NODES
--
2.30.0.478.g8a0d178c01-goog