[PATCH v4 4/7] x86/sev: Add support to perform RMP optimizations asynchronously
From: Ashish Kalra
Date: Mon Apr 13 2026 - 15:45:51 EST
From: Ashish Kalra <ashish.kalra@xxxxxxx>
When SEV-SNP is enabled, all writes to memory are checked to ensure
integrity of SNP guest memory. This imposes performance overhead on the
whole system.
RMPOPT is a new instruction that minimizes the performance overhead of
RMP checks on the hypervisor and on non-SNP guests by allowing RMP
checks to be skipped for 1GB regions of memory that are known not to
contain any SEV-SNP guest memory.
Add support for performing RMP optimizations asynchronously using a
dedicated workqueue.
Enable RMPOPT optimizations globally for all system RAM up to 2TB at
RMP initialization time. RMP checks can initially be skipped for 1GB
memory ranges that do not contain SEV-SNP guest memory (excluding
preassigned pages such as the RMP table and firmware pages). As SNP
guests are launched, RMPUPDATE will disable the corresponding RMPOPT
optimizations.
Suggested-by: Thomas Lendacky <thomas.lendacky@xxxxxxx>
Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
arch/x86/virt/svm/sev.c | 117 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 115 insertions(+), 2 deletions(-)
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 4f942abaf86e..56c9fc3fe53a 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -19,6 +19,7 @@
#include <linux/iommu.h>
#include <linux/amd-iommu.h>
#include <linux/nospec.h>
+#include <linux/workqueue.h>
#include <asm/sev.h>
#include <asm/processor.h>
@@ -125,7 +126,17 @@ static void *rmp_bookkeeping __ro_after_init;
static u64 probed_rmp_base, probed_rmp_size;
static cpumask_t rmpopt_cpumask;
-static phys_addr_t rmpopt_pa_start;
+static phys_addr_t rmpopt_pa_start, rmpopt_pa_end;
+
+enum rmpopt_function {
+ RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS,
+ RMPOPT_FUNC_REPORT_STATUS
+};
+
+#define RMPOPT_WORK_TIMEOUT 10000
+
+static struct workqueue_struct *rmpopt_wq;
+static struct delayed_work rmpopt_delayed_work;
static LIST_HEAD(snp_leaked_pages_list);
static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
@@ -560,6 +571,83 @@ void snp_shutdown(void)
}
EXPORT_SYMBOL_FOR_MODULES(snp_shutdown, "ccp");
+static inline bool __rmpopt(u64 rax, u64 rcx)
+{
+ bool optimized;
+
+ asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
+ : "=@ccc" (optimized)
+ : "a" (rax), "c" (rcx)
+ : "memory", "cc");
+
+ return optimized;
+}
+
+/*
+ * 'val' is a system physical address.
+ */
+static void rmpopt_smp(void *val)
+{
+ u64 rax = ALIGN_DOWN((u64)val, SZ_1G);
+ u64 rcx = RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS;
+
+ __rmpopt(rax, rcx);
+}
+
+static void rmpopt(u64 pa)
+{
+ u64 rax = ALIGN_DOWN(pa, SZ_1G);
+ u64 rcx = RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS;
+
+ __rmpopt(rax, rcx);
+}
+
+/*
+ * RMPOPT optimizations skip RMP checks at 1GB granularity if this
+ * range of memory does not contain any SNP guest memory.
+ */
+static void rmpopt_work_handler(struct work_struct *work)
+{
+ bool current_cpu_cleared = false;
+ phys_addr_t pa;
+
+ pr_info("Attempt RMP optimizations on physical address range @1GB alignment [0x%016llx - 0x%016llx]\n",
+ rmpopt_pa_start, rmpopt_pa_end);
+
+ /*
+ * RMPOPT scans the RMP table, stores the result of the scan in the
+ * reserved processor memory. The RMP scan is the most expensive
+ * part. If a second RMPOPT occurs, it can skip the expensive scan
+ * if they can see a cached result in the reserved processor memory.
+ *
+ * Do RMPOPT on one CPU alone. Then, follow that up with RMPOPT
+ * on every other primary thread. This potentially allows the
+ * followers to use the "cached" scan results to avoid repeating
+ * full scans.
+ */
+
+ if (cpumask_test_cpu(smp_processor_id(), &rmpopt_cpumask)) {
+ cpumask_clear_cpu(smp_processor_id(), &rmpopt_cpumask);
+ current_cpu_cleared = true;
+ }
+
+ /* current CPU */
+ for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G)
+ rmpopt(pa);
+
+ for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G) {
+ on_each_cpu_mask(&rmpopt_cpumask, rmpopt_smp,
+ (void *)pa, true);
+
+ /* Give a chance for other threads to run */
+ cond_resched();
+
+ }
+
+ if (current_cpu_cleared)
+ cpumask_set_cpu(smp_processor_id(), &rmpopt_cpumask);
+}
+
void snp_setup_rmpopt(void)
{
u64 rmpopt_base;
@@ -568,9 +656,20 @@ void snp_setup_rmpopt(void)
if (!cpu_feature_enabled(X86_FEATURE_RMPOPT))
return;
+ /*
+ * Create an RMPOPT-specific workqueue to avoid scheduling
+ * RMPOPT workitem on the global system workqueue.
+ */
+ rmpopt_wq = alloc_workqueue("rmpopt_wq", WQ_UNBOUND, 1);
+ if (!rmpopt_wq) {
+ setup_clear_cpu_cap(X86_FEATURE_RMPOPT);
+ return;
+ }
+
/*
* RMPOPT_BASE MSR is per-core, so only one thread per core needs to
- * setup RMPOPT_BASE MSR.
+ * setup RMPOPT_BASE MSR. Additionally only one thread per core needs
+ * to issue the RMPOPT instruction.
*/
for_each_online_cpu(cpu) {
@@ -590,6 +689,20 @@ void snp_setup_rmpopt(void)
* up to 2 TB of system RAM on all CPUs.
*/
wrmsrq_on_cpus(&rmpopt_cpumask, MSR_AMD64_RMPOPT_BASE, rmpopt_base);
+
+ INIT_DELAYED_WORK(&rmpopt_delayed_work, rmpopt_work_handler);
+
+ rmpopt_pa_end = ALIGN(PFN_PHYS(max_pfn), SZ_1G);
+
+ /* Limit memory scanning to the first 2 TB of RAM */
+ if ((rmpopt_pa_end - rmpopt_pa_start) > SZ_2T)
+ rmpopt_pa_end = rmpopt_pa_start + SZ_2T;
+
+ /*
+ * Once all per-CPU RMPOPT tables have been configured, enable RMPOPT
+ * optimizations on all physical memory.
+ */
+ queue_delayed_work(rmpopt_wq, &rmpopt_delayed_work, 0);
}
EXPORT_SYMBOL_FOR_MODULES(snp_setup_rmpopt, "ccp");
--
2.43.0