Re: [PATCH net-next v1 6/7] net: fix SO_DEVMEM_DONTNEED looping too long
From: Stanislav Fomichev
Date: Wed Oct 30 2024 - 10:36:57 EST
On 10/29, Mina Almasry wrote:
> Check we're going to free a reasonable number of frags in token_count
> before starting the loop, to prevent looping too long.
>
> Also minor code cleanups:
> - Flip checks to reduce indentation.
> - Use sizeof(*tokens) everywhere for consistentcy.
>
> Cc: Yi Lai <yi1.lai@xxxxxxxxxxxxxxx>
>
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
>
> ---
> net/core/sock.c | 46 ++++++++++++++++++++++++++++------------------
> 1 file changed, 28 insertions(+), 18 deletions(-)
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7f398bd07fb7..8603b8d87f2e 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1047,11 +1047,12 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
>
> #ifdef CONFIG_PAGE_POOL
>
> -/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
> +/* This is the number of frags that the user can SO_DEVMEM_DONTNEED in
> * 1 syscall. The limit exists to limit the amount of memory the kernel
> - * allocates to copy these tokens.
> + * allocates to copy these tokens, and to prevent looping over the frags for
> + * too long.
> */
> -#define MAX_DONTNEED_TOKENS 128
> +#define MAX_DONTNEED_FRAGS 1024
>
> static noinline_for_stack int
> sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> @@ -1059,43 +1060,52 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> unsigned int num_tokens, i, j, k, netmem_num = 0;
> struct dmabuf_token *tokens;
> netmem_ref netmems[16];
> + u64 num_frags = 0;
> int ret = 0;
>
> if (!sk_is_tcp(sk))
> return -EBADF;
>
> - if (optlen % sizeof(struct dmabuf_token) ||
> - optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
> + if (optlen % sizeof(*tokens) ||
> + optlen > sizeof(*tokens) * MAX_DONTNEED_FRAGS)
> return -EINVAL;
>
> - tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
> + num_tokens = optlen / sizeof(*tokens);
> + tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
> if (!tokens)
> return -ENOMEM;
>
> - num_tokens = optlen / sizeof(struct dmabuf_token);
> if (copy_from_sockptr(tokens, optval, optlen)) {
> kvfree(tokens);
> return -EFAULT;
> }
>
> + for (i = 0; i < num_tokens; i++) {
> + num_frags += tokens[i].token_count;
> + if (num_frags > MAX_DONTNEED_FRAGS) {
> + kvfree(tokens);
> + return -E2BIG;
> + }
> + }
> +
> xa_lock_bh(&sk->sk_user_frags);
> for (i = 0; i < num_tokens; i++) {
> for (j = 0; j < tokens[i].token_count; j++) {
> netmem_ref netmem = (__force netmem_ref)__xa_erase(
> &sk->sk_user_frags, tokens[i].token_start + j);
>
> - if (netmem &&
> - !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
> - netmems[netmem_num++] = netmem;
> - if (netmem_num == ARRAY_SIZE(netmems)) {
> - xa_unlock_bh(&sk->sk_user_frags);
> - for (k = 0; k < netmem_num; k++)
> - WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
> - netmem_num = 0;
> - xa_lock_bh(&sk->sk_user_frags);
> - }
> - ret++;
[..]
> + if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
> + continue;
Any reason we are not returning explicit error to the callers here?
That probably needs some mechanism to signal which particular one failed
so the users can restart?