Re: [RFC PATCH 2/3] ipv6: Run a reverse sk_lookup on sendmsg.

From: Martin KaFai Lau
Date: Fri Sep 13 2024 - 14:24:40 EST


On 9/13/24 2:39 AM, Tiago Lam wrote:
This follows the same rationale provided for the ipv4 counterpart, where
it now runs a reverse socket lookup when source addresses and/or ports
are changed, on sendmsg, to check whether egress traffic should be
allowed to go through or not.

As with ipv4, the ipv6 sendmsg path is also extended here to support the
IPV6_ORIGDSTADDR ancilliary message to be able to specify a source
address/port.

Suggested-by: Jakub Sitnicki <jakub@xxxxxxxxxxxxxx>
Signed-off-by: Tiago Lam <tiagolam@xxxxxxxxxxxxxx>
---
net/ipv6/datagram.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/udp.c | 8 ++++--
2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fff78496803d..4214dda1c320 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -756,6 +756,27 @@ void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
+static inline bool reverse_sk_lookup(struct flowi6 *fl6, struct sock *sk,
+ struct in6_addr *saddr, __be16 sport)
+{
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ (saddr && sport) &&
+ (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, saddr) || inet_sk(sk)->inet_sport != sport)) {
+ struct sock *sk_egress;
+
+ bpf_sk_lookup_run_v6(sock_net(sk), IPPROTO_UDP, &fl6->daddr, fl6->fl6_dport,
+ saddr, ntohs(sport), 0, &sk_egress);

iirc, in the ingress path, the sk could also be selected by a tc bpf prog doing bpf_sk_assign. Then this re-run on sk_lookup may give an incorrect result?

In general, is it necessary to rerun any bpf prog if the user space has specified the IP[v6]_ORIGDSTADDR.

+ if (!IS_ERR_OR_NULL(sk_egress) &&
+ atomic64_read(&sk_egress->sk_cookie) == atomic64_read(&sk->sk_cookie))
+ return true;
+
+ net_info_ratelimited("No reverse socket lookup match for local addr %pI6:%d remote addr %pI6:%d\n",
+ &saddr, ntohs(sport), &fl6->daddr, ntohs(fl6->fl6_dport));
+ }
+
+ return false;
+}
+
int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
struct ipcm6_cookie *ipc6)
@@ -844,7 +865,62 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
break;
}
+ case IPV6_ORIGDSTADDR:
+ {
+ struct sockaddr_in6 *sockaddr_in;
+ struct net_device *dev = NULL;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct sockaddr_in6))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ sockaddr_in = (struct sockaddr_in6 *)CMSG_DATA(cmsg);
+
+ addr_type = __ipv6_addr_type(&sockaddr_in->sin6_addr);
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL)
+ return -EINVAL;
+
+ /* If we're egressing with a different source address and/or port, we
+ * perform a reverse socket lookup. The rationale behind this is that we
+ * can allow return UDP traffic that has ingressed through sk_lookup to
+ * also egress correctly. In case the reverse lookup fails, we
+ * continue with the normal path.
+ *
+ * The lookup is performed if either source address and/or port changed, and
+ * neither is "0".
+ */
+ if (reverse_sk_lookup(fl6, sk, &sockaddr_in->sin6_addr,
+ sockaddr_in->sin6_port)) {
+ /* Override the source port and address to use with the one we
+ * got in cmsg and bail early.
+ */
+ fl6->saddr = sockaddr_in->sin6_addr;
+ fl6->fl6_sport = sockaddr_in->sin6_port;
+ break;
+ }
+ if (addr_type != IPV6_ADDR_ANY) {
+ int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
+
+ if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) &&
+ !ipv6_chk_addr_and_flags(net,
+ &sockaddr_in->sin6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
+ !ipv6_chk_acast_addr_src(net, dev,
+ &sockaddr_in->sin6_addr))
+ err = -EINVAL;
+ else
+ fl6->saddr = sockaddr_in->sin6_addr;
+ }
+
+ if (err)
+ goto exit_f;
+
+ break;
+ }
case IPV6_FLOWINFO:
if (cmsg->cmsg_len < CMSG_LEN(4)) {
err = -EINVAL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6602a2e9cdb5..6121cbb71ad3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1476,6 +1476,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6->flowi6_uid = sk->sk_uid;
+ /* We use fl6's daddr and fl6_sport in the reverse sk_lookup done
+ * within ip6_datagram_send_ctl() now.
+ */
+ fl6->daddr = *daddr;
+ fl6->fl6_sport = inet->inet_sport;
+
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
@@ -1511,10 +1517,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6->flowi6_proto = sk->sk_protocol;
fl6->flowi6_mark = ipc6.sockc.mark;
- fl6->daddr = *daddr;
if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
fl6->saddr = np->saddr;
- fl6->fl6_sport = inet->inet_sport;
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,