[PATCH 4/4] x86/mm/cpa: Better use clflushopt

From: Peter Zijlstra
Date: Fri Nov 30 2018 - 09:06:28 EST


Currently we issue an MFENCE before and after flushing a range. This
means that if we flush a bunch of single page ranges -- like with the
cpa array, we issue a whole bunch of superfluous MFENCEs.

Reorgainze the code a little to avoid this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/mm/pageattr.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)

--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -251,15 +251,7 @@ static unsigned long __cpa_addr(struct c
* Flushing functions
*/

-/**
- * clflush_cache_range - flush a cache range with clflush
- * @vaddr: virtual start address
- * @size: number of bytes to flush
- *
- * clflushopt is an unordered instruction which needs fencing with mfence or
- * sfence to avoid ordering issues.
- */
-void clflush_cache_range(void *vaddr, unsigned int size)
+static void clflush_cache_range_opt(void *vaddr, unsigned int size)
{
const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
@@ -268,11 +260,22 @@ void clflush_cache_range(void *vaddr, un
if (p >= vend)
return;

- mb();
-
for (; p < vend; p += clflush_size)
clflushopt(p);
+}

+/**
+ * clflush_cache_range - flush a cache range with clflush
+ * @vaddr: virtual start address
+ * @size: number of bytes to flush
+ *
+ * clflushopt is an unordered instruction which needs fencing with mfence or
+ * sfence to avoid ordering issues.
+ */
+void clflush_cache_range(void *vaddr, unsigned int size)
+{
+ mb();
+ clflush_cache_range_opt(vaddr, size);
mb();
}
EXPORT_SYMBOL_GPL(clflush_cache_range);
@@ -339,6 +342,7 @@ static void cpa_flush(struct cpa_data *d
if (!cache)
return;

+ mb();
for (i = 0; i < cpa->numpages; i++) {
unsigned long addr = __cpa_addr(cpa, i);
unsigned int level;
@@ -349,8 +353,9 @@ static void cpa_flush(struct cpa_data *d
* Only flush present addresses:
*/
if (pte && (pte_val(*pte) & _PAGE_PRESENT))
- clflush_cache_range((void *)addr, PAGE_SIZE);
+ clflush_cache_range_opt((void *)addr, PAGE_SIZE);
}
+ mb();
}

static bool overlaps(unsigned long r1_start, unsigned long r1_end,