[PATCH v3 3/6] x86/sev: Add support to perform RMP optimizations asynchronously

From: Ashish Kalra

Date: Mon Mar 30 2026 - 18:37:33 EST

From: Ashish Kalra <ashish.kalra@xxxxxxx>

As SEV-SNP is enabled by default on boot when an RMP table is
allocated by BIOS, the hypervisor and non-SNP guests are subject to
RMP write checks to provide integrity of SNP guest memory.

RMPOPT is a new instruction that minimizes the performance overhead of
RMP checks on the hypervisor and on non-SNP guests by allowing RMP
checks to be skipped for 1GB regions of memory that are known not to
contain any SEV-SNP guest memory.

Add support for performing RMP optimizations asynchronously using a
dedicated workqueue, scheduling delayed work to perform RMP
optimizations every 10 seconds.

Enable RMPOPT optimizations globally for all system RAM up to 2TB at
RMP initialization time. RMP checks can initially be skipped for 1GB
memory ranges that do not contain SEV-SNP guest memory (excluding
preassigned pages such as the RMP table and firmware pages). As SNP
guests are launched, RMPUPDATE will disable the corresponding RMPOPT
optimizations.

Suggested-by: Thomas Lendacky <thomas.lendacky@xxxxxxx>
Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
arch/x86/virt/svm/sev.c | 114 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index dc6a8e102cdc..1644f8a9b2a2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -19,6 +19,7 @@
#include <linux/iommu.h>
#include <linux/amd-iommu.h>
#include <linux/nospec.h>
+#include <linux/workqueue.h>

#include <asm/sev.h>
#include <asm/processor.h>
@@ -124,6 +125,19 @@ static void *rmp_bookkeeping __ro_after_init;

static u64 probed_rmp_base, probed_rmp_size;

+enum rmpopt_function {
+ RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS,
+ RMPOPT_FUNC_REPORT_STATUS
+};
+
+#define RMPOPT_WORK_TIMEOUT 10000
+
+static struct workqueue_struct *rmpopt_wq;
+static struct delayed_work rmpopt_delayed_work;
+
+static cpumask_t primary_threads_cpumask;
+static phys_addr_t rmpopt_pa_start, rmpopt_pa_end;
+
static LIST_HEAD(snp_leaked_pages_list);
static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);

@@ -477,6 +491,75 @@ static bool __init setup_rmptable(void)
return true;
}

+static inline bool __rmpopt(u64 rax, u64 rcx)
+{
+ bool optimized;
+
+ asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
+ : "=@ccc" (optimized)
+ : "a" (rax), "c" (rcx)
+ : "memory", "cc");
+
+ return optimized;
+}
+
+/*
+ * 'val' is a system physical address.
+ */
+static void rmpopt(void *val)
+{
+ u64 rax = ALIGN_DOWN((u64)val, SZ_1G);
+ u64 rcx = RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS;
+
+ __rmpopt(rax, rcx);
+}
+
+static void rmpopt_work_handler(struct work_struct *work)
+{
+ phys_addr_t pa;
+
+ pr_info("Attempt RMP optimizations on physical address range @1GB alignment [0x%016llx - 0x%016llx]\n",
+ rmpopt_pa_start, rmpopt_pa_end);
+
+ /*
+ * RMPOPT optimizations skip RMP checks at 1GB granularity if this
+ * range of memory does not contain any SNP guest memory. Optimize
+ * each range on one CPU first, then let other CPUs execute RMPOPT
+ * in parallel so they can skip most work as the range has already
+ * been optimized.
+ */
+
+ cpumask_clear_cpu(smp_processor_id(), &primary_threads_cpumask);
+
+ /* current CPU */
+ for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G)
+ rmpopt((void *)pa);
+
+ for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G) {
+ on_each_cpu_mask(&primary_threads_cpumask, rmpopt,
+ (void *)pa, true);
+
+ /* Give a chance for other threads to run */
+ cond_resched();
+
+ }
+
+ cpumask_set_cpu(smp_processor_id(), &primary_threads_cpumask);
+}
+
+static void rmpopt_all_physmem(bool early)
+{
+ if (!rmpopt_wq)
+ return;
+
+ if (early)
+ queue_delayed_work(rmpopt_wq, &rmpopt_delayed_work,
+ msecs_to_jiffies(1));
+ else
+ queue_delayed_work(rmpopt_wq, &rmpopt_delayed_work,
+ msecs_to_jiffies(RMPOPT_WORK_TIMEOUT));
+}
+
static __init void configure_and_enable_rmpopt(void)
{
phys_addr_t pa_start = ALIGN_DOWN(PFN_PHYS(min_low_pfn), SZ_1G);
@@ -499,6 +582,37 @@ static __init void configure_and_enable_rmpopt(void)
*/
for_each_online_cpu(cpu)
wrmsrq_on_cpu(cpu, MSR_AMD64_RMPOPT_BASE, rmpopt_base);
+
+ /*
+ * Create an RMPOPT-specific workqueue to avoid scheduling
+ * RMPOPT workitem on the global system workqueue.
+ */
+ rmpopt_wq = alloc_workqueue("rmpopt_wq", WQ_UNBOUND, 1);
+ if (!rmpopt_wq)
+ return;
+
+ INIT_DELAYED_WORK(&rmpopt_delayed_work, rmpopt_work_handler);
+
+ rmpopt_pa_start = pa_start;
+ rmpopt_pa_end = ALIGN(PFN_PHYS(max_pfn), SZ_1G);
+
+ /* Limit memory scanning to the first 2 TB of RAM */
+ if ((rmpopt_pa_end - rmpopt_pa_start) > SZ_2T)
+ rmpopt_pa_end = rmpopt_pa_start + SZ_2T;
+
+ /* Only one thread per core needs to issue RMPOPT instruction */
+ for_each_online_cpu(cpu) {
+ if (!topology_is_primary_thread(cpu))
+ continue;
+
+ cpumask_set_cpu(cpu, &primary_threads_cpumask);
+ }
+
+ /*
+ * Once all per-CPU RMPOPT tables have been configured, enable RMPOPT
+ * optimizations on all physical memory.
+ */
+ rmpopt_all_physmem(TRUE);
}

/*
--
2.43.0