Re: [PATCH] riscv: Add RISC-V svpbmt extension

From: Nick Kossifidis
Date: Thu Sep 23 2021 - 05:48:51 EST


Hello Guo,

Στις 2021-09-23 10:27, guoren@xxxxxxxxxx έγραψε:
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index e534f6a7cfa1..1825cd8db0de 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -56,7 +56,9 @@ properties:
enum:
- riscv,sv32
- riscv,sv39
+ - riscv,sv39,svpbmt
- riscv,sv48
+ - riscv,sv48,svpbmt
- riscv,none

Isn't svpbmt orthogonal to the mmu type ? It's a functionality that can be present on either sv39/48/57 so why not have another "svpbmt" property directly on the cpu node ?

+ * rv64 PTE format:
+ * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 |
3 | 2 | 1 | 0
+ * N MT RSV PFN reserved for SW D A G U
X W R V
+ * [62:61] Memory Type definitions:
+ * 00 - PMA Normal Cacheable, No change to implied PMA memory type
+ * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory
+ * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory
+ * 11 - Rsvd Reserved for future standard use
+ */
+#define _PAGE_MT_MASK ((u64)0x3 << 61)
+#define _PAGE_MT_PMA ((u64)0x0 << 61)
+#define _PAGE_MT_NC ((u64)0x1 << 61)
+#define _PAGE_MT_IO ((u64)0x2 << 61)
+

It'd be cleaner IMHO if you defined _PAGE_MT_MASK as (_PAGE_MT_PMA | _PAGE_MT_NC | _PAGE_MT_IO), like other masks are defined (e.g. _PAGE_CHG_MASK on the same file). I also suggest you use unsigned long instead of u64 for consistency.

+enum {
+ MT_PMA,
+ MT_NC,
+ MT_IO,
+ MT_MAX
+};
+
+extern struct __riscv_svpbmt_struct {
+ unsigned long mask;
+ unsigned long mt[MT_MAX];
+} __riscv_svpbmt;
+
+#define _PAGE_DMA_MASK __riscv_svpbmt.mask
+#define _PAGE_DMA_PMA __riscv_svpbmt.mt[MT_PMA]
+#define _PAGE_DMA_NC __riscv_svpbmt.mt[MT_NC]
+#define _PAGE_DMA_IO __riscv_svpbmt.mt[MT_IO]
+#else
+#define _PAGE_DMA_MASK 0
+#define _PAGE_DMA_PMA 0
+#define _PAGE_DMA_NC 0
+#define _PAGE_DMA_IO 0
+#endif /* CONFIG_64BIT */
+#endif /* __ASSEMBLY__ */
+
#define _PAGE_SPECIAL _PAGE_SOFT
#define _PAGE_TABLE _PAGE_PRESENT


This struct is not useful as part of enabling the standard Svpbmt extension on Linux, we can set _PAGE_DMA_* macros directly on this patch and introduce the struct approach later on, when we also define alternative values for _PAGE_DMA_* flags. Also to someone reading the code the struct doesn't make sense without some documentation on why it's needed. Finally why the enum / array ? Why not just have different fields on the struct ?

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 39b550310ec6..d07ba586c866 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -136,7 +136,8 @@
| _PAGE_PRESENT \
| _PAGE_ACCESSED \
| _PAGE_DIRTY \
- | _PAGE_GLOBAL)
+ | _PAGE_GLOBAL \
+ | _PAGE_DMA_PMA)


That's a bit misleading, it's like marking the kernel pages as DMAable.

-/*
- * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
- * change the properties of memory regions.
- */
-#define _PAGE_IOREMAP _PAGE_KERNEL
+#define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_DMA_MASK) | _PAGE_DMA_IO)
+
+#define PAGE_IOREMAP __pgprot(_PAGE_IOREMAP)

This isn't used anywhere.

@@ -490,6 +489,28 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}

+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t _prot)
+{
+ unsigned long prot = pgprot_val(_prot);
+
+ prot &= ~_PAGE_DMA_MASK;
+ prot |= _PAGE_DMA_IO;
+
+ return __pgprot(prot);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
+{
+ unsigned long prot = pgprot_val(_prot);
+
+ prot &= ~_PAGE_DMA_MASK;
+ prot |= _PAGE_DMA_NC;
+
+ return __pgprot(prot);
+}
+

We also have the IO type, we should also define pgprot_device to also ensure ordering, or else it'll fallback to pgprot_noncached, which in our case won't work well due to RVWMO:
https://elixir.bootlin.com/linux/latest/source/include/linux/pgtable.h#L930

+void __init riscv_svpbmt(void)
+{
+#ifdef CONFIG_64BIT
+ struct device_node *node;
+ const char *str;
+
+ for_each_of_cpu_node(node) {
+ if (of_property_read_string(node, "mmu-type", &str)) {
+ continue;
+ }
+
+ if (!strncmp(str + 11, "svpbmt", 6)) {
+ __riscv_svpbmt.mask = _PAGE_MT_MASK;
+ __riscv_svpbmt.mt[MT_PMA] = _PAGE_MT_PMA;
+ __riscv_svpbmt.mt[MT_NC] = _PAGE_MT_NC;
+ __riscv_svpbmt.mt[MT_IO] = _PAGE_MT_IO;
+ break;
+ }
+ }
+#endif
+}

You break; here the first time you find a cpu node with svpbmt enabled, shouldn't we make sure that all used cpu nodes support svpbmt before using the extension ?

Regards,
Nick