[PATCH 2/2] perf bench: Add epoll-wait-mp1c benchmark calling epoll_wait(2)

From: Roman Penyaev
Date: Thu Jan 03 2019 - 10:00:48 EST


The intention of this benchmark is to measure events delivery
bandwidth: N threads produce events and 1 thread consumes events
calling epoll_wait(2).

Benchmark does measurements for 8, 16, 32, 64 and 128 threads in
a loop.

This one differs from epoll-wait-1pmc in that it produces events
from many threads and consumes from one, thus mp1c (many producers
1 consumer).

Signed-off-by: Roman Penyaev <rpenyaev@xxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Davidlohr Bueso <dbueso@xxxxxxx>
Cc: Jason Baron <jbaron@xxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
tools/perf/bench/Build | 1 +
tools/perf/bench/bench.h | 1 +
tools/perf/bench/epoll-wait-mp1c.c | 175 +++++++++++++++++++++++++++++
tools/perf/builtin-bench.c | 1 +
4 files changed, 178 insertions(+)
create mode 100644 tools/perf/bench/epoll-wait-mp1c.c

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index ef07fc40bc35..570df3f475b8 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -8,6 +8,7 @@ perf-y += futex-requeue.o
perf-y += futex-lock-pi.o

perf-y += epoll-wait-1pmc.o
+perf-y += epoll-wait-mp1c.o
perf-y += epoll-ctl.o

perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fb9782624644..2ee7e7256e23 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -39,6 +39,7 @@ int bench_futex_requeue(int argc, const char **argv);
int bench_futex_lock_pi(int argc, const char **argv);

int bench_epoll_wait_1pmc(int argc, const char **argv);
+int bench_epoll_wait_mp1c(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);

#define BENCH_FORMAT_DEFAULT_STR "default"
diff --git a/tools/perf/bench/epoll-wait-mp1c.c b/tools/perf/bench/epoll-wait-mp1c.c
new file mode 100644
index 000000000000..44b06ae86e5b
--- /dev/null
+++ b/tools/perf/bench/epoll-wait-mp1c.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef HAVE_EVENTFD
+/*
+ * Copyright (C) 2019 Roman Penyaev
+ *
+ * This program benchmarks bandwidth of events delivered from many threads
+ * (many producers) to a single consumer, which monitors for events calling
+ * epoll_wait(2).
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+#include <err.h>
+#include <linux/kernel.h> /* For ARRAY_SIZE only */
+#include "bench.h"
+#include "cpumap.h"
+
+#define ITERS 1000000ull
+
+struct thread_ctx {
+ pthread_t thread;
+ int efd;
+};
+
+static volatile unsigned int thr_ready;
+static volatile unsigned int start;
+
+static inline unsigned long long nsecs(void)
+{
+ struct timespec ts = {0, 0};
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ((unsigned long long)ts.tv_sec * 1000000000ull) + ts.tv_nsec;
+}
+
+static void *thread_work(void *arg)
+{
+ struct thread_ctx *ctx = arg;
+ uint64_t ucnt = 1;
+ unsigned int i;
+ int rc;
+
+ __atomic_add_fetch(&thr_ready, 1, __ATOMIC_RELAXED);
+
+ while (!start)
+ ;
+
+ for (i = 0; i < ITERS; i++) {
+ rc = write(ctx->efd, &ucnt, sizeof(ucnt));
+ assert(rc == sizeof(ucnt));
+ }
+
+ return NULL;
+}
+
+static int do_bench(struct cpu_map *cpu, unsigned int nthreads)
+{
+ struct epoll_event ev, events[nthreads];
+ struct thread_ctx threads[nthreads];
+ pthread_attr_t thrattr;
+ struct thread_ctx *ctx;
+ int rc, epfd, nfds;
+ cpu_set_t cpuset;
+ unsigned int i;
+
+ unsigned long long epoll_calls = 0, epoll_nsecs;
+ unsigned long long ucnt, ucnt_sum = 0;
+
+ epfd = epoll_create1(0);
+ if (epfd < 0)
+ err(EXIT_FAILURE, "epoll_create1");
+
+ for (i = 0; i < nthreads; i++) {
+ ctx = &threads[i];
+
+ ctx->efd = eventfd(0, EFD_NONBLOCK);
+ if (ctx->efd < 0)
+ err(EXIT_FAILURE, "eventfd");
+
+ ev.events = EPOLLIN;
+ ev.data.ptr = ctx;
+ rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ctx->efd, &ev);
+ if (rc)
+ err(EXIT_FAILURE, "epoll_ctl");
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ pthread_attr_init(&thrattr);
+ rc = pthread_attr_setaffinity_np(&thrattr, sizeof(cpu_set_t),
+ &cpuset);
+ if (rc) {
+ errno = rc;
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+ }
+
+ rc = pthread_create(&ctx->thread, &thrattr, thread_work, ctx);
+ if (rc) {
+ errno = rc;
+ err(EXIT_FAILURE, "pthread_create");
+ }
+ }
+
+ while (thr_ready == nthreads)
+ ;
+
+ /* Signal start for all threads */
+ start = 1;
+
+ epoll_nsecs = nsecs();
+ while (1) {
+ nfds = epoll_wait(epfd, events, nthreads, -1);
+ if (nfds < 0)
+ err(EXIT_FAILURE, "epoll_wait");
+
+ epoll_calls++;
+
+ for (i = 0; i < (unsigned int)nfds; ++i) {
+ ctx = events[i].data.ptr;
+ rc = read(ctx->efd, &ucnt, sizeof(ucnt));
+ assert(rc == sizeof(ucnt));
+ ucnt_sum += ucnt;
+ if (ucnt_sum == nthreads * ITERS)
+ goto end;
+ }
+ }
+end:
+ epoll_nsecs = nsecs() - epoll_nsecs;
+
+ for (i = 0; i < nthreads; i++) {
+ ctx = &threads[i];
+ pthread_join(ctx->thread, NULL);
+ close(ctx->efd);
+ }
+ close(epfd);
+
+ printf("%7d %8lld %8lld\n",
+ nthreads,
+ ITERS*nthreads/(epoll_nsecs/1000/1000),
+ epoll_nsecs/1000/1000);
+
+ return 0;
+}
+
+int bench_epoll_wait_mp1c(int argc, const char **argv)
+{
+ unsigned int i, nthreads_arr[] = {8, 16, 32, 64, 128};
+ struct cpu_map *cpu;
+
+ (void)argc; (void)argv;
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu) {
+ errno = ENOMEM;
+ err(EXIT_FAILURE, "cpu_map__new");
+ }
+
+ printf("threads events/ms run-time ms\n");
+ for (i = 0; i < ARRAY_SIZE(nthreads_arr); i++)
+ do_bench(cpu, nthreads_arr[i]);
+
+ cpu_map__put(cpu);
+
+ return 0;
+}
+
+#endif // HAVE_EVENTFD
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 6926b5a5eebf..4426b7afadf3 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -71,6 +71,7 @@ static struct bench futex_benchmarks[] = {
#ifdef HAVE_EVENTFD
static struct bench epoll_benchmarks[] = {
{ "wait-1pmc", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait_1pmc },
+ { "wait-mp1c", "Benchmark epoll events delivery bandwidth", bench_epoll_wait_mp1c },
{ "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl },
{ "all", "Run all futex benchmarks", NULL },
{ NULL, NULL, NULL }
--
2.19.1