Re: [PATCH 1/3] mm/gup_benchmark: Support threading

From: John Hubbard
Date: Fri May 07 2021 - 00:37:45 EST


On 5/6/21 4:25 PM, Peter Xu wrote:
Add a new parameter "-j N" to support concurrent gup test.

I had a long-standing personal TODO item that said, "decide if it's
worth it, to add pthread support to gup_test". So now, at least one
other person has decided that it *is* worth it, and you've also written
the patch. OK, then. Sweet! :)

This looks correct to me. I have a couple of minor comments below, but
either way you can add:

Reviewed-by: John Hubbard <jhubbard@xxxxxxxxxx>



Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
tools/testing/selftests/vm/gup_test.c | 94 ++++++++++++++++++---------
1 file changed, 63 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1e662d59c502a..3f0fcc697c3fc 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -6,6 +6,7 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <pthread.h>
#include "../../../../mm/gup_test.h"
#define MB (1UL << 20)
@@ -15,6 +16,12 @@
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
static char *cmd_to_str(unsigned long cmd)
{
switch (cmd) {
@@ -34,17 +41,55 @@ static char *cmd_to_str(unsigned long cmd)
return "Unknown command";
}
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup))
+ perror("ioctl"), exit(1);
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ printf("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+ return NULL;
+}
+
int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
- unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
- unsigned long cmd = GUP_FAST_BENCHMARK;
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, threads = 1;


"nthreads" is a little more accurate for this.


int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
+ pthread_t *tid;
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
switch (opt) {
case 'a':
cmd = PIN_FAST_BENCHMARK;
@@ -74,6 +119,9 @@ int main(int argc, char **argv)
/* strtol, so you can pass flags in hex form */
gup.gup_flags = strtol(optarg, 0, 0);
break;
+ case 'j':
+ threads = atoi(optarg);
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -154,8 +202,8 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- fd = open("/sys/kernel/debug/gup_test", O_RDWR);
- if (fd == -1) {
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1) {
perror("open");
exit(1);
}
@@ -185,32 +233,16 @@ int main(int argc, char **argv)
p[0] = 0;
}
- /* Only report timing information on the *_BENCHMARK commands: */
- if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
- (cmd == PIN_LONGTERM_BENCHMARK)) {
- for (i = 0; i < repeats; i++) {
- gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
-
- printf("%s: Time: get:%lld put:%lld us",
- cmd_to_str(cmd), gup.get_delta_usec,
- gup.put_delta_usec);
- if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
- }
- } else {
- gup.size = size;
- if (ioctl(fd, cmd, &gup)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("%s: done\n", cmd_to_str(cmd));
- if (gup.size != size)
- printf("Truncated (size: %lld)\n", gup.size);
+ tid = malloc(sizeof(pthread_t) * threads);
+ if (!tid) {
+ perror("malloc()");
+ exit(1);
}
+ for (i = 0; i < threads; i++)
+ pthread_create(&tid[i], NULL, gup_thread, &gup);

It might be nice to check the return value of pthread_create(), and
just exit out with an error if it fails. On the other hand, it seems
spectacularly unlikely for it to fail here...on the other-other hand,
I always say that, just before writing code that doesn't check an
error, and the error somehow happens anyway and costs someone some
wasted time.

Your call.

+ for (i = 0; i < threads; i++)
+ pthread_join(tid[i], NULL);
+ free(tid);
return 0;
}


thanks,
--
John Hubbard
NVIDIA