[PATCH 1/5] mm: Add support for unaccepted memory
From: Kirill A. Shutemov
Date: Tue Aug 10 2021 - 02:26:44 EST
UEFI Specification version 2.9 introduces concept of memory acceptance:
Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP,
requiring memory to be accepted before it can be used by the guest.
Accepting happens via a protocol specific for the Virtrual Machine
platform.
Accepting memory is costly and it makes VMM allocate memory for the
accepted guest physical address range. It's better to postpone memory
acceptation until memory is needed. It lowers boot time and reduces
memory overhead.
Support of such memory requires few changes in core-mm code:
- memblock has to accept memory on allocation;
- page allocator has to accept memory on the first allocation of the
page;
Memblock change is trivial.
Page allocator is modified to accept pages on the first allocation.
PageOffline() is used to indicate that the page requires acceptance.
The flag currently used by hotplug and balloon. Such pages are not
available to page allocator.
An architecture has to provide three helpers if it wants to support
unaccepted memory:
- accept_memory() makes a range of physical addresses accepted.
- maybe_set_page_offline() marks a page PageOffline() if it requires
acceptance. Used during boot to put pages on free lists.
- clear_page_offline() clears makes a page accepted and clears
PageOffline().
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
mm/internal.h | 14 ++++++++++++++
mm/memblock.c | 1 +
mm/page_alloc.c | 13 ++++++++++++-
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/mm/internal.h b/mm/internal.h
index 31ff935b2547..d2fc8a17fbe0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -662,4 +662,18 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);
+#ifndef CONFIG_UNACCEPTED_MEMORY
+static inline void maybe_set_page_offline(struct page *page, unsigned int order)
+{
+}
+
+static inline void clear_page_offline(struct page *page, unsigned int order)
+{
+}
+
+static inline void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+}
+#endif
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index 28a813d9e955..8c1bf08f2b0b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1370,6 +1370,7 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
*/
kmemleak_alloc_phys(found, size, 0, 0);
+ accept_memory(found, found + size);
return found;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 856b175c15a4..892347d9a507 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -899,6 +899,9 @@ static inline bool page_is_buddy(struct page *page, struct page *buddy,
if (buddy_order(buddy) != order)
return false;
+ if (PageOffline(buddy) || PageOffline(page))
+ return false;
+
/*
* zone check is done late to avoid uselessly calculating
* zone/node ids for pages that could never merge.
@@ -1001,6 +1004,9 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
if (page_reported(page))
__ClearPageReported(page);
+ if (PageOffline(page))
+ clear_page_offline(page, order);
+
list_del(&page->lru);
__ClearPageBuddy(page);
set_page_private(page, 0);
@@ -1165,7 +1171,8 @@ static inline void __free_one_page(struct page *page,
static inline bool page_expected_state(struct page *page,
unsigned long check_flags)
{
- if (unlikely(atomic_read(&page->_mapcount) != -1))
+ if (unlikely(atomic_read(&page->_mapcount) != -1) &&
+ !PageOffline(page))
return false;
if (unlikely((unsigned long)page->mapping |
@@ -1748,6 +1755,8 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
{
if (early_page_uninitialised(pfn))
return;
+
+ maybe_set_page_offline(page, order);
__free_pages_core(page, order);
}
@@ -1839,10 +1848,12 @@ static void __init deferred_free_range(unsigned long pfn,
if (nr_pages == pageblock_nr_pages &&
(pfn & (pageblock_nr_pages - 1)) == 0) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ maybe_set_page_offline(page, pageblock_order);
__free_pages_core(page, pageblock_order);
return;
}
+ accept_memory(pfn << PAGE_SHIFT, (pfn + nr_pages) << PAGE_SHIFT);
for (i = 0; i < nr_pages; i++, page++, pfn++) {
if ((pfn & (pageblock_nr_pages - 1)) == 0)
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
--
2.31.1