[PATCH] perf bench mem: Always memset source before memcpy

From: Vincent Whitchurch
Date: Mon Aug 10 2020 - 09:34:29 EST


For memcpy, the source pages are memset to zero only when --cycles is
used. This leads to wildly different results with or without --cycles,
since all sources pages are likely to be mapped to the same zero page
without explicit writes.

Before this fix:

$ export cmd="./perf stat -e LLC-loads -- ./perf bench \
mem memcpy -s 1024MB -l 100 -f default"
$ $cmd

2,935,826 LLC-loads
3.821677452 seconds time elapsed

$ $cmd --cycles

217,533,436 LLC-loads
8.616725985 seconds time elapsed

After this fix:

$ $cmd

214,459,686 LLC-loads
8.674301124 seconds time elapsed

$ $cmd --cycles

214,758,651 LLC-loads
8.644480006 seconds time elapsed

Fixes: 47b5757bac03c3387c ("perf bench mem: Move boilerplate memory allocation to the infrastructure")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@xxxxxxxx>
---
tools/perf/bench/mem-functions.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}

-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memcpy_t fn = r->fn.memcpy;
- int i;
-
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
memset(src, 0, size);

@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
* to not measure page fault overhead:
*/
fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);

cycle_start = get_cycles();
for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
memcpy_t fn = r->fn.memcpy;
int i;

- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, src, size);
+ memcpy_prefault(fn, size, src, dst);

BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < nr_loops; ++i)
--
2.25.1