[PATCH AUTOSEL 4.18 09/33] xsk: do not call synchronize_net() under RCU read lock

From: Sasha Levin
Date: Tue Oct 30 2018 - 09:27:32 EST


From: BjÃrn TÃpel <bjorn.topel@xxxxxxxxx>

[ Upstream commit cee271678d0e3177a25d0fcb2fa5e051d48e4262 ]

The XSKMAP update and delete functions called synchronize_net(), which
can sleep. It is not allowed to sleep during an RCU read section.

Instead we need to make sure that the sock sk_destruct (xsk_destruct)
function is asynchronously called after an RCU grace period. Setting
the SOCK_RCU_FREE flag for XDP sockets takes care of this.

Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP")
Reported-by: Eric Dumazet <eric.dumazet@xxxxxxxxx>
Signed-off-by: BjÃrn TÃpel <bjorn.topel@xxxxxxxxx>
Acked-by: Song Liu <songliubraving@xxxxxx>
Signed-off-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
kernel/bpf/xskmap.c | 10 ++--------
net/xdp/xsk.c | 2 ++
2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index b3c557476a8d..c98501a04742 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -191,11 +191,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
sock_hold(sock->sk);

old_xs = xchg(&m->xsk_map[i], xs);
- if (old_xs) {
- /* Make sure we've flushed everything. */
- synchronize_net();
+ if (old_xs)
sock_put((struct sock *)old_xs);
- }

sockfd_put(sock);
return 0;
@@ -211,11 +208,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;

old_xs = xchg(&m->xsk_map[k], NULL);
- if (old_xs) {
- /* Make sure we've flushed everything. */
- synchronize_net();
+ if (old_xs)
sock_put((struct sock *)old_xs);
- }

return 0;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4e937cd7c17d..661504042d30 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -744,6 +744,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_destruct = xsk_destruct;
sk_refcnt_debug_inc(sk);

+ sock_set_flag(sk, SOCK_RCU_FREE);
+
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
--
2.17.1