[PATCH v4 net 5/5] tcp: Fix data races around icsk->icsk_af_ops.

From: Kuniyuki Iwashima
Date: Tue Oct 04 2022 - 13:41:18 EST


setsockopt(IPV6_ADDRFORM) and tcp_v6_connect() change icsk->icsk_af_ops
under lock_sock(), but tcp_(get|set)sockopt() read it locklessly. To
avoid load/store tearing, we need to add READ_ONCE() and WRITE_ONCE()
for the reads and writes.

Thanks to Eric Dumazet for providing the syzbot report:

BUG: KCSAN: data-race in tcp_setsockopt / tcp_v6_connect

write to 0xffff88813c624518 of 8 bytes by task 23936 on cpu 0:
tcp_v6_connect+0x5b3/0xce0 net/ipv6/tcp_ipv6.c:240
__inet_stream_connect+0x159/0x6d0 net/ipv4/af_inet.c:660
inet_stream_connect+0x44/0x70 net/ipv4/af_inet.c:724
__sys_connect_file net/socket.c:1976 [inline]
__sys_connect+0x197/0x1b0 net/socket.c:1993
__do_sys_connect net/socket.c:2003 [inline]
__se_sys_connect net/socket.c:2000 [inline]
__x64_sys_connect+0x3d/0x50 net/socket.c:2000
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff88813c624518 of 8 bytes by task 23937 on cpu 1:
tcp_setsockopt+0x147/0x1c80 net/ipv4/tcp.c:3789
sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3585
__sys_setsockopt+0x212/0x2b0 net/socket.c:2252
__do_sys_setsockopt net/socket.c:2263 [inline]
__se_sys_setsockopt net/socket.c:2260 [inline]
__x64_sys_setsockopt+0x62/0x70 net/socket.c:2260
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0xffffffff8539af68 -> 0xffffffff8539aff8

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 23937 Comm: syz-executor.5 Not tainted
6.0.0-rc4-syzkaller-00331-g4ed9c1e971b1-dirty #0

Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 08/26/2022

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzkaller@xxxxxxxxxxxxxxxx>
Reported-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
---
net/ipv4/tcp.c | 10 ++++++----
net/ipv6/ipv6_sockglue.c | 3 ++-
net/ipv6/tcp_ipv6.c | 6 ++++--
3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 997a80ce1e13..08db82c05a4a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3797,8 +3797,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
const struct inet_connection_sock *icsk = inet_csk(sk);

if (level != SOL_TCP)
- return icsk->icsk_af_ops->setsockopt(sk, level, optname,
- optval, optlen);
+ /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+ return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname,
+ optval, optlen);
return do_tcp_setsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(tcp_setsockopt);
@@ -4396,8 +4397,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
struct inet_connection_sock *icsk = inet_csk(sk);

if (level != SOL_TCP)
- return icsk->icsk_af_ops->getsockopt(sk, level, optname,
- optval, optlen);
+ /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+ return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname,
+ optval, optlen);
return do_tcp_getsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(tcp_getsockopt);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2fb9ee413c53..19ac75c2cd54 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -479,7 +479,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,

/* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */
WRITE_ONCE(sk->sk_prot, &tcp_prot);
- icsk->icsk_af_ops = &ipv4_specific;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
sk->sk_socket->ops = &inet_stream_ops;
sk->sk_family = PF_INET;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e54eee80ce5f..8680aa83f0b9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -237,7 +237,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sin.sin_port = usin->sin6_port;
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

- icsk->icsk_af_ops = &ipv6_mapped;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -249,7 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,

if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &ipv6_specific;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
--
2.30.2