On 12/2/2021 10:42 PM, Tom Lendacky wrote:
On 12/1/21 10:02 AM, Tianyu Lan wrote:
From: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx>
In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
extra address space which is above shared_gpa_boundary (E.G 39 bit
address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
physical address will be original physical address + shared_gpa_boundary.
The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
memory(vTOM). Memory addresses below vTOM are automatically treated as
private while memory above vTOM is treated as shared.
Expose swiotlb_unencrypted_base for platforms to set unencrypted
memory base offset and platform calls swiotlb_update_mem_attributes()
to remap swiotlb mem to unencrypted address space. memremap() can
not be called in the early stage and so put remapping code into
swiotlb_update_mem_attributes(). Store remap address and use it to copy
data from/to swiotlb bounce buffer.
Signed-off-by: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx>
This patch results in the following stack trace during a bare-metal boot
on my EPYC system with SME active (e.g. mem_encrypt=on):
[ 0.123932] BUG: Bad page state in process swapper pfn:108001
[ 0.123942] page:(____ptrval____) refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x108001
[ 0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
[ 0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 0000000000000000
[ 0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 0000000000000000
[ 0.123955] page dumped because: nonzero mapcount
[ 0.123957] Modules linked in:
[ 0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 5.16.0-rc3-sos-custom #2
[ 0.123964] Hardware name: AMD Corporation
[ 0.123967] Call Trace:
[ 0.123971] <TASK>
[ 0.123975] dump_stack_lvl+0x48/0x5e
[ 0.123985] bad_page.cold+0x65/0x96
[ 0.123990] __free_pages_ok+0x3a8/0x410
[ 0.123996] memblock_free_all+0x171/0x1dc
[ 0.124005] mem_init+0x1f/0x14b
[ 0.124011] start_kernel+0x3b5/0x6a1
[ 0.124016] secondary_startup_64_no_verify+0xb0/0xbb
[ 0.124022] </TASK>
I see ~40 of these traces, each for different pfns.
Thanks,
Tom
Hi Tom:
Thanks for your test. Could you help to test the following patch and check whether it can fix the issue.
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 569272871375..f6c3638255d5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
* @end: The end address of the swiotlb memory pool. Used to do a quick
* range check to see if the memory was in fact allocated by this
* API.
+ * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool
+ * may be remapped in the memory encrypted case and store virtual
+ * address for bounce buffer operation.
* @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
* @end. For default swiotlb, this is command line adjustable via
* setup_io_tlb_npages.
@@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
struct io_tlb_mem {
phys_addr_t start;
phys_addr_t end;
+ void *vaddr;
unsigned long nslabs;
unsigned long used;
unsigned int index;
@@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
}
#endif /* CONFIG_DMA_RESTRICTED_POOL */
+extern phys_addr_t swiotlb_unencrypted_base;
+
#endif /* __LINUX_SWIOTLB_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8e840fbbed7c..34e6ade4f73c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,6 +50,7 @@
#include <asm/io.h>
#include <asm/dma.h>
+#include <linux/io.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/iommu-helper.h>
@@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;
struct io_tlb_mem io_tlb_default_mem;
+phys_addr_t swiotlb_unencrypted_base;
+
/*
* Max segment that we can provide which (if pages are contingous) will
* not be bounced (unless SWIOTLB_FORCE is set).
@@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val)
return DIV_ROUND_UP(val, IO_TLB_SIZE);
}
+/*
+ * Remap swioltb memory in the unencrypted physical address space
+ * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
+ * Isolation VMs).
+ */
+void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+ void *vaddr = NULL;
+
+ if (swiotlb_unencrypted_base) {
+ phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+
+ vaddr = memremap(paddr, bytes, MEMREMAP_WB);
+ if (!vaddr)
+ pr_err("Failed to map the unencrypted memory %llx size %lx.\n",
+ paddr, bytes);
+ }
+
+ return vaddr;
+}
+
/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
@@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
- memset(vaddr, 0, bytes);
+
+ mem->vaddr = swiotlb_mem_remap(mem, bytes);
+ if (!mem->vaddr)
+ mem->vaddr = vaddr;
+
+ memset(mem->vaddr, 0, bytes);
}
static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
@@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
+
+ /*
+ * If swiotlb_unencrypted_base is set, the bounce buffer memory will
+ * be remapped and cleared in swiotlb_update_mem_attributes.
+ */
+ if (swiotlb_unencrypted_base)
+ return;
+
memset(vaddr, 0, bytes);
+ mem->vaddr = vaddr;
+ return;
}
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
- unsigned char *vaddr = phys_to_virt(tlb_addr);
+ unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
unsigned int tlb_offset, orig_addr_offset;
if (orig_addr == INVALID_PHYS_ADDR)
Thanks.