Re: [PATCH net-next v3 10/12] test/vsock: MSG_ZEROCOPY flag tests

From: Arseniy Krasnov
Date: Tue Oct 10 2023 - 03:21:04 EST




On 10.10.2023 10:19, Stefano Garzarella wrote:
> On Mon, Oct 09, 2023 at 11:24:18PM +0300, Arseniy Krasnov wrote:
>>
>>
>> On 09.10.2023 18:17, Stefano Garzarella wrote:
>>> On Sat, Oct 07, 2023 at 08:21:37PM +0300, Arseniy Krasnov wrote:
>>>> This adds three tests for MSG_ZEROCOPY feature:
>>>> 1) SOCK_STREAM tx with different buffers.
>>>> 2) SOCK_SEQPACKET tx with different buffers.
>>>> 3) SOCK_STREAM test to read empty error queue of the socket.
>>>>
>>>> Patch also works as preparation for the next patches for tools in this
>>>> patchset: vsock_perf and vsock_uring_test:
>>>> 1) Adds several new functions to util.c - they will be also used by
>>>>   vsock_uring_test.
>>>> 2) Adds two new functions for MSG_ZEROCOPY handling to a new header
>>>>   file - such header will be shared between vsock_test, vsock_perf and
>>>>   vsock_uring_test, thus avoiding code copy-pasting.
>>>>
>>>> Signed-off-by: Arseniy Krasnov <avkrasnov@xxxxxxxxxxxxxxxxx>
>>>> ---
>>>> Changelog:
>>>> v1 -> v2:
>>>>  * Move 'SOL_VSOCK' and 'VSOCK_RECVERR' from 'util.c' to 'util.h'.
>>>> v2 -> v3:
>>>>  * Patch was reworked. Now it is also preparation patch (see commit
>>>>    message). Shared stuff for 'vsock_perf' and tests is placed to a
>>>>    new header file, while shared code between current test tool and
>>>>    future uring test is placed to the 'util.c'. I think, that making
>>>>    this patch as preparation allows to reduce number of changes in the
>>>>    next patches in this patchset.
>>>>  * Make 'struct vsock_test_data' private by placing it to the .c file.
>>>>    Also add comments to this struct to clarify sense of its fields.
>>>>
>>>> tools/testing/vsock/Makefile              |   2 +-
>>>> tools/testing/vsock/msg_zerocopy_common.h |  92 ++++++
>>>> tools/testing/vsock/util.c                | 110 +++++++
>>>> tools/testing/vsock/util.h                |   5 +
>>>> tools/testing/vsock/vsock_test.c          |  16 +
>>>> tools/testing/vsock/vsock_test_zerocopy.c | 367 ++++++++++++++++++++++
>>>> tools/testing/vsock/vsock_test_zerocopy.h |  15 +
>>>> 7 files changed, 606 insertions(+), 1 deletion(-)
>>>> create mode 100644 tools/testing/vsock/msg_zerocopy_common.h
>>>> create mode 100644 tools/testing/vsock/vsock_test_zerocopy.c
>>>> create mode 100644 tools/testing/vsock/vsock_test_zerocopy.h
>>>>
>>>> diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
>>>> index 21a98ba565ab..1a26f60a596c 100644
>>>> --- a/tools/testing/vsock/Makefile
>>>> +++ b/tools/testing/vsock/Makefile
>>>> @@ -1,7 +1,7 @@
>>>> # SPDX-License-Identifier: GPL-2.0-only
>>>> all: test vsock_perf
>>>> test: vsock_test vsock_diag_test
>>>> -vsock_test: vsock_test.o timeout.o control.o util.o
>>>> +vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o
>>>> vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
>>>> vsock_perf: vsock_perf.o
>>>>
>>>> diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h
>>>> new file mode 100644
>>>> index 000000000000..ce89f1281584
>>>> --- /dev/null
>>>> +++ b/tools/testing/vsock/msg_zerocopy_common.h
>>>> @@ -0,0 +1,92 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>> +#ifndef MSG_ZEROCOPY_COMMON_H
>>>> +#define MSG_ZEROCOPY_COMMON_H
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <stdlib.h>
>>>> +#include <sys/types.h>
>>>> +#include <sys/socket.h>
>>>> +#include <linux/errqueue.h>
>>>> +
>>>> +#ifndef SOL_VSOCK
>>>> +#define SOL_VSOCK    287
>>>> +#endif
>>>> +
>>>> +#ifndef VSOCK_RECVERR
>>>> +#define VSOCK_RECVERR    1
>>>> +#endif
>>>> +
>>>> +static void enable_so_zerocopy(int fd)
>>>> +{
>>>> +    int val = 1;
>>>> +
>>>> +    if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
>>>> +        perror("setsockopt");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void vsock_recv_completion(int fd, const bool *zerocopied) __maybe_unused;
>>>
>>> To avoid this, maybe we can implement those functions in .c file and
>>> link the object.
>>>
>>> WDYT?
>>>
>>> Ah, here (cc (GCC) 13.2.1 20230728 (Red Hat 13.2.1-1)) the build is
>>> failing:
>>>
>>> In file included from vsock_perf.c:23:
>>> msg_zerocopy_common.h: In function ‘vsock_recv_completion’:
>>> msg_zerocopy_common.h:29:67: error: expected declaration specifiers before ‘__maybe_unused’
>>>    29 | static void vsock_recv_completion(int fd, const bool *zerocopied) __maybe_unused;
>>>       |                                                                   ^~~~~~~~~~~~~~
>>> msg_zerocopy_common.h:31:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
>>>    31 | {
>>>       | ^
>>>
>>>> +static void vsock_recv_completion(int fd, const bool *zerocopied)
>>>> +{
>>>> +    struct sock_extended_err *serr;
>>>> +    struct msghdr msg = { 0 };
>>>> +    char cmsg_data[128];
>>>> +    struct cmsghdr *cm;
>>>> +    ssize_t res;
>>>> +
>>>> +    msg.msg_control = cmsg_data;
>>>> +    msg.msg_controllen = sizeof(cmsg_data);
>>>> +
>>>> +    res = recvmsg(fd, &msg, MSG_ERRQUEUE);
>>>> +    if (res) {
>>>> +        fprintf(stderr, "failed to read error queue: %zi\n", res);
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    cm = CMSG_FIRSTHDR(&msg);
>>>> +    if (!cm) {
>>>> +        fprintf(stderr, "cmsg: no cmsg\n");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    if (cm->cmsg_level != SOL_VSOCK) {
>>>> +        fprintf(stderr, "cmsg: unexpected 'cmsg_level'\n");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    if (cm->cmsg_type != VSOCK_RECVERR) {
>>>> +        fprintf(stderr, "cmsg: unexpected 'cmsg_type'\n");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    serr = (void *)CMSG_DATA(cm);
>>>> +    if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
>>>> +        fprintf(stderr, "serr: wrong origin: %u\n", serr->ee_origin);
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    if (serr->ee_errno) {
>>>> +        fprintf(stderr, "serr: wrong error code: %u\n", serr->ee_errno);
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    /* This flag is used for tests, to check that transmission was
>>>> +     * performed as expected: zerocopy or fallback to copy. If NULL
>>>> +     * - don't care.
>>>> +     */
>>>> +    if (!zerocopied)
>>>> +        return;
>>>> +
>>>> +    if (*zerocopied && (serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) {
>>>> +        fprintf(stderr, "serr: was copy instead of zerocopy\n");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    if (!*zerocopied && !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) {
>>>> +        fprintf(stderr, "serr: was zerocopy instead of copy\n");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +}
>>>> +
>>>> +#endif /* MSG_ZEROCOPY_COMMON_H */
>>>> diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
>>>> index 6779d5008b27..b1770edd8cc1 100644
>>>> --- a/tools/testing/vsock/util.c
>>>> +++ b/tools/testing/vsock/util.c
>>>> @@ -11,10 +11,12 @@
>>>> #include <stdio.h>
>>>> #include <stdint.h>
>>>> #include <stdlib.h>
>>>> +#include <string.h>
>>>> #include <signal.h>
>>>> #include <unistd.h>
>>>> #include <assert.h>
>>>> #include <sys/epoll.h>
>>>> +#include <sys/mman.h>
>>>>
>>>> #include "timeout.h"
>>>> #include "control.h"
>>>> @@ -444,3 +446,111 @@ unsigned long hash_djb2(const void *data, size_t len)
>>>>
>>>>     return hash;
>>>> }
>>>> +
>>>> +size_t iovec_bytes(const struct iovec *iov, size_t iovnum)
>>>> +{
>>>> +    size_t bytes;
>>>> +    int i;
>>>> +
>>>> +    for (bytes = 0, i = 0; i < iovnum; i++)
>>>> +        bytes += iov[i].iov_len;
>>>> +
>>>> +    return bytes;
>>>> +}
>>>> +
>>>> +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum)
>>>> +{
>>>> +    unsigned long hash;
>>>> +    size_t iov_bytes;
>>>> +    size_t offs;
>>>> +    void *tmp;
>>>> +    int i;
>>>> +
>>>> +    iov_bytes = iovec_bytes(iov, iovnum);
>>>> +
>>>> +    tmp = malloc(iov_bytes);
>>>> +    if (!tmp) {
>>>> +        perror("malloc");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    for (offs = 0, i = 0; i < iovnum; i++) {
>>>> +        memcpy(tmp + offs, iov[i].iov_base, iov[i].iov_len);
>>>> +        offs += iov[i].iov_len;
>>>> +    }
>>>> +
>>>> +    hash = hash_djb2(tmp, iov_bytes);
>>>> +    free(tmp);
>>>> +
>>>> +    return hash;
>>>> +}
>>>> +
>>>> +struct iovec *iovec_from_test_data(const struct iovec *test_iovec, int iovnum)
>>>
>>> From the name this function seems related to vsock_test_data, so I'd
>>> suggest to move this and free_iovec_test_data() in vsock_test_zerocopy.c
>>>
>>>> +{
>>>> +    struct iovec *iovec;
>>>> +    int i;
>>>> +
>>>> +    iovec = malloc(sizeof(*iovec) * iovnum);
>>>> +    if (!iovec) {
>>>> +        perror("malloc");
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < iovnum; i++) {
>>>> +        iovec[i].iov_len = test_iovec[i].iov_len;
>>>> +
>>>> +        iovec[i].iov_base = mmap(NULL, iovec[i].iov_len,
>>>> +                     PROT_READ | PROT_WRITE,
>>>> +                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
>>>> +                     -1, 0);
>>>> +        if (iovec[i].iov_base == MAP_FAILED) {
>>>> +            perror("mmap");
>>>> +            exit(EXIT_FAILURE);
>>>> +        }
>>>> +
>>>> +        if (test_iovec[i].iov_base != MAP_FAILED)
>>>> +            iovec[i].iov_base += (uintptr_t)test_iovec[i].iov_base;
>>>> +    }
>>>> +
>>>> +    /* Unmap "invalid" elements. */
>>>> +    for (i = 0; i < iovnum; i++) {
>>>> +        if (test_iovec[i].iov_base == MAP_FAILED) {
>>>> +            if (munmap(iovec[i].iov_base, iovec[i].iov_len)) {
>>>> +                perror("munmap");
>>>> +                exit(EXIT_FAILURE);
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < iovnum; i++) {
>>>> +        int j;
>>>> +
>>>> +        if (test_iovec[i].iov_base == MAP_FAILED)
>>>> +            continue;
>>>> +
>>>> +        for (j = 0; j < iovec[i].iov_len; j++)
>>>> +            ((uint8_t *)iovec[i].iov_base)[j] = rand() & 0xff;
>>>> +    }
>>>> +
>>>> +    return iovec;
>>>> +}
>>>> +
>>>> +void free_iovec_test_data(const struct iovec *test_iovec,
>>>> +              struct iovec *iovec, int iovnum)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < iovnum; i++) {
>>>> +        if (test_iovec[i].iov_base != MAP_FAILED) {
>>>> +            if (test_iovec[i].iov_base)
>>>> +                iovec[i].iov_base -= (uintptr_t)test_iovec[i].iov_base;
>>>> +
>>>> +            if (munmap(iovec[i].iov_base, iovec[i].iov_len)) {
>>>> +                perror("munmap");
>>>> +                exit(EXIT_FAILURE);
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +
>>>> +    free(iovec);
>>>> +}
>>>> diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
>>>> index e5407677ce05..4cacb8d804c1 100644
>>>> --- a/tools/testing/vsock/util.h
>>>> +++ b/tools/testing/vsock/util.h
>>>> @@ -53,4 +53,9 @@ void list_tests(const struct test_case *test_cases);
>>>> void skip_test(struct test_case *test_cases, size_t test_cases_len,
>>>>            const char *test_id_str);
>>>> unsigned long hash_djb2(const void *data, size_t len);
>>>> +size_t iovec_bytes(const struct iovec *iov, size_t iovnum);
>>>> +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum);
>>>> +struct iovec *iovec_from_test_data(const struct iovec *test_iovec, int iovnum);
>>>> +void free_iovec_test_data(const struct iovec *test_iovec,
>>>> +              struct iovec *iovec, int iovnum);
>>>> #endif /* UTIL_H */
>>>> diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
>>>> index da4cb819a183..c1f7bc9abd22 100644
>>>> --- a/tools/testing/vsock/vsock_test.c
>>>> +++ b/tools/testing/vsock/vsock_test.c
>>>> @@ -21,6 +21,7 @@
>>>> #include <poll.h>
>>>> #include <signal.h>
>>>>
>>>> +#include "vsock_test_zerocopy.h"
>>>> #include "timeout.h"
>>>> #include "control.h"
>>>> #include "util.h"
>>>> @@ -1269,6 +1270,21 @@ static struct test_case test_cases[] = {
>>>>         .run_client = test_stream_shutrd_client,
>>>>         .run_server = test_stream_shutrd_server,
>>>>     },
>>>> +    {
>>>> +        .name = "SOCK_STREAM MSG_ZEROCOPY",
>>>> +        .run_client = test_stream_msgzcopy_client,
>>>> +        .run_server = test_stream_msgzcopy_server,
>>>> +    },
>>>> +    {
>>>> +        .name = "SOCK_SEQPACKET MSG_ZEROCOPY",
>>>> +        .run_client = test_seqpacket_msgzcopy_client,
>>>> +        .run_server = test_seqpacket_msgzcopy_server,
>>>> +    },
>>>> +    {
>>>> +        .name = "SOCK_STREAM MSG_ZEROCOPY empty MSG_ERRQUEUE",
>>>> +        .run_client = test_stream_msgzcopy_empty_errq_client,
>>>> +        .run_server = test_stream_msgzcopy_empty_errq_server,
>>>> +    },
>>>>     {},
>>>> };
>>>>
>>>> diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c
>>>> new file mode 100644
>>>> index 000000000000..af14efdf334b
>>>> --- /dev/null
>>>> +++ b/tools/testing/vsock/vsock_test_zerocopy.c
>>>> @@ -0,0 +1,367 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/* MSG_ZEROCOPY feature tests for vsock
>>>> + *
>>>> + * Copyright (C) 2023 SberDevices.
>>>> + *
>>>> + * Author: Arseniy Krasnov <avkrasnov@xxxxxxxxxxxxxxxxx>
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <stdlib.h>
>>>> +#include <string.h>
>>>> +#include <sys/mman.h>
>>>> +#include <unistd.h>
>>>> +#include <poll.h>
>>>> +#include <linux/errqueue.h>
>>>> +#include <linux/kernel.h>
>>>> +#include <errno.h>
>>>> +
>>>> +#include "control.h"
>>>> +#include "vsock_test_zerocopy.h"
>>>> +#include "msg_zerocopy_common.h"
>>>> +
>>>> +#define PAGE_SIZE        4096
>>>
>>> In some tests I saw `sysconf(_SC_PAGESIZE)` is used,
>>> e.g. in selftests/ptrace/peeksiginfo.c:
>>>
>>> #ifndef PAGE_SIZE
>>> #define PAGE_SIZE sysconf(_SC_PAGESIZE)
>>> #endif
>>>
>>> WDYT?
>>
>> Only small problem with that - in this case I can't use PAGE_SIZE
>> as array initializer. I think to add some reserved constant value
>> to designate that iov element must be size of page, then use this
>> value as initializer and handle it during test iov creating...
>
> Okay I see. Maybe I'm overthinking!
> It is just a test, let's do not complicate it.
>
> Feel free to use the previous version, I'd just add the guards.

Ok, got it!

Thanks, Arseniy

>
> Thanks,
> Stefano
>