[PATCH v6 08/11] x86/tdx: Add APIs to support Dynamic PAMT ops from KVM's fault path
From: Rick Edgecombe
Date: Mon May 25 2026 - 22:37:40 EST
When handling an EPT violation, KVM holds a spinlock while manipulating
the EPT. Before entering the spinlock it doesn't know how many EPT page
tables will need to be installed or whether a huge page will be used. For
this reason it allocates a worst case number of page tables that it might
need as part of servicing the EPT violation.
Under Dynamic PAMT these pre-allocated pages will potentially need to have
Dynamic PAMT backing pages installed for them. KVM already has helpers to
manage topping up page caches before taking the MMU lock, but they cannot be
passed from KVM to arch/x86 code.
The problem of how and when to install the DPAMT backing pages for the
pages given to the TDX module during the fault path has had a lot of
design attempts.
- Extracting KVM's MMU caches requires too much inlined code added to
headers.
- A few varieties of installing Dynamic PAMT backing when allocating the
S-EPT page tables. [0][1]
- Using mempool_t to transfer the pages between KVM and arch/x86 doesn't
work because it is the component is designed more around maintaining a
pool of pages, rather than topping up a continually drained cache.
So don't do these as they all had various problems. Instead just create a
small simple data structure to use for handing a pre-allocated list of
pages between KVM and arch/x86 code. Model this on KVM's existing MMU
memory caches.
Add a tdx_pamt_cache arg to tdx_pamt_get() so it can draw pages from a
cache when needed. Not all DPAMT page installations will happen under
spinlock, for example control pages. So have tdx_pamt_get() maintain the
existing behavior of allocating from the page allocator when NULL is
passed for the struct tdx_pamt_cache arg. This prevents excess allocations
for cases where it can be avoided.
Export the new helpers for KVM.
Assisted-by: GitHub Copilot:claude-opus-4-6 Claude:claude-opus-4-7
Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx>
Link: https://lore.kernel.org/kvm/de05853257e9cc66998101943f78a4b7e6e3d741.camel@xxxxxxxxx/ [0]
Link: https://lore.kernel.org/kvm/aYprxnSHKHUtk7pt@xxxxxxxxxx/ [1]
---
v6:
- Filled out log from Sean's series
---
arch/x86/include/asm/tdx.h | 17 ++++++++++
arch/x86/virt/vmx/tdx/tdx.c | 65 +++++++++++++++++++++++++++++++++----
2 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 74e75db5728c7..191da84bbf2a1 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -155,6 +155,23 @@ static inline bool tdx_supports_dynamic_pamt(const struct tdx_sys_info *sysinfo)
return false; /* To be enabled when kernel is ready */
}
+/* Simple structure for pre-allocating Dynamic PAMT pages outside of locks. */
+struct tdx_pamt_cache {
+ struct list_head page_list;
+ int cnt;
+};
+
+static inline void tdx_init_pamt_cache(struct tdx_pamt_cache *cache)
+{
+ INIT_LIST_HEAD(&cache->page_list);
+ cache->cnt = 0;
+}
+
+void tdx_free_pamt_cache(struct tdx_pamt_cache *cache);
+int tdx_topup_pamt_cache(struct tdx_pamt_cache *cache, unsigned long npages);
+int tdx_pamt_get(kvm_pfn_t pfn, struct tdx_pamt_cache *cache);
+void tdx_pamt_put(kvm_pfn_t pfn);
+
int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index c41c632a4cdf2..3544794fb092a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -1971,12 +1971,33 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn)
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
-static int alloc_pamt_array(struct page **pamt_pages)
+static struct page *tdx_alloc_page_pamt_cache(struct tdx_pamt_cache *cache)
+{
+ struct page *page;
+
+ page = list_first_entry_or_null(&cache->page_list, struct page, lru);
+ if (page) {
+ list_del(&page->lru);
+ cache->cnt--;
+ }
+
+ return page;
+}
+
+static struct page *alloc_dpamt_page(struct tdx_pamt_cache *cache)
+{
+ if (cache)
+ return tdx_alloc_page_pamt_cache(cache);
+
+ return alloc_page(GFP_KERNEL_ACCOUNT);
+}
+
+static int alloc_pamt_array(struct page **pamt_pages, struct tdx_pamt_cache *cache)
{
int i, j;
for (i = 0; i < TDX_DPAMT_ENTRY_PAGE_CNT; i++) {
- pamt_pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
+ pamt_pages[i] = alloc_dpamt_page(cache);
if (!pamt_pages[i])
goto err;
}
@@ -2047,7 +2068,7 @@ static u64 tdh_phymem_pamt_remove(kvm_pfn_t pfn, struct page **pamt_pages)
static DEFINE_SPINLOCK(pamt_lock);
/* Bump PAMT refcount for the given page and allocate PAMT memory if needed */
-static int tdx_pamt_get(kvm_pfn_t pfn)
+int tdx_pamt_get(kvm_pfn_t pfn, struct tdx_pamt_cache *cache)
{
struct page *pamt_pages[TDX_DPAMT_ENTRY_PAGE_CNT];
atomic_t *pamt_refcount;
@@ -2066,7 +2087,7 @@ static int tdx_pamt_get(kvm_pfn_t pfn)
if (atomic_inc_not_zero(pamt_refcount))
return 0;
- ret = alloc_pamt_array(pamt_pages);
+ ret = alloc_pamt_array(pamt_pages, cache);
if (ret)
return ret;
@@ -2106,12 +2127,13 @@ static int tdx_pamt_get(kvm_pfn_t pfn)
free_pamt_array(pamt_pages);
return ret;
}
+EXPORT_SYMBOL_FOR_KVM(tdx_pamt_get);
/*
* Drop PAMT refcount for the given page and free PAMT memory if it is no
* longer needed.
*/
-static void tdx_pamt_put(kvm_pfn_t pfn)
+void tdx_pamt_put(kvm_pfn_t pfn)
{
struct page *pamt_pages[TDX_DPAMT_ENTRY_PAGE_CNT] = {};
atomic_t *pamt_refcount;
@@ -2152,6 +2174,37 @@ static void tdx_pamt_put(kvm_pfn_t pfn)
free_pamt_array(pamt_pages);
}
+EXPORT_SYMBOL_FOR_KVM(tdx_pamt_put);
+
+void tdx_free_pamt_cache(struct tdx_pamt_cache *cache)
+{
+ struct page *page;
+
+ while ((page = tdx_alloc_page_pamt_cache(cache)))
+ __free_page(page);
+}
+EXPORT_SYMBOL_FOR_KVM(tdx_free_pamt_cache);
+
+int tdx_topup_pamt_cache(struct tdx_pamt_cache *cache, unsigned long npages)
+{
+ if (WARN_ON_ONCE(!tdx_supports_dynamic_pamt(&tdx_sysinfo)))
+ return 0;
+
+ npages *= TDX_DPAMT_ENTRY_PAGE_CNT;
+
+ while (cache->cnt < npages) {
+ struct page *page = alloc_page(GFP_KERNEL_ACCOUNT);
+
+ if (!page)
+ return -ENOMEM;
+
+ list_add(&page->lru, &cache->page_list);
+ cache->cnt++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_FOR_KVM(tdx_topup_pamt_cache);
/*
* Return a page that can be gifted to the TDX-Module for use as a "control"
@@ -2167,7 +2220,7 @@ struct page *tdx_alloc_control_page(void)
if (!page)
return NULL;
- if (tdx_pamt_get(page_to_pfn(page))) {
+ if (tdx_pamt_get(page_to_pfn(page), NULL)) {
__free_page(page);
return NULL;
}
--
2.54.0