From: Leonard Crestez <cdleonard@xxxxxxxxx>
Date: Mon, 19 Jul 2021 14:24:46 +0300
This is similar to TCP MD5 in functionality but it's sufficiently
different that userspace interface and wire formats are incompatible.
Compared to TCP-MD5 more algorithms are supported and multiple keys can
be used on the same connection but there is still no negotiation
mechanism.
Expected use-case is protecting long-duration BGP/LDP connections
between routers using pre-shared keys.
This is an early version which focuses on getting the correct
signature bits on the wire in a way that can interoperate with other
implementations. Major issues still need to be solved:
* Lockdep warnings (incorrect context for initializing shash)
* Support for aes-128-cmac-96
* Binding keys to addresses and/or interfaces similar to md5
* Sequence Number Extension
A small test suite is here: https://github.com/cdleonard/tcp-authopt-test
The tests work by establishing loopback TCP connections, capturing
packets with scapy and validating signatures.
Changes for yabgp are here:
https://github.com/cdleonard/yabgp/commits/tcp_authopt
The patched version of yabgp can establish a BGP session protected by
TCP Authentication Option with a Cisco IOS-XR router.
I'm especially interested in feedback regarding ABI and testing.
Signed-off-by: Leonard Crestez <cdleonard@xxxxxxxxx>
---
Allocating shash requires user context but holding a struct tfm in
tcp_authopt_key_info allocated by tcp_set_authopt_key doesn't work
because when a server handshake is succesful the server socket needs to
copy the keys of the listen socket in softirq context.
Sharing the crypto_shash tfm between listen and server sockets doesn't
work well either because keys for each connection (and each syn packet)
are different and the hmac or cmac key is per-tfm rather than per
shash_desc. The server sockets would need locking to access their shared
tfm.
Simplest solution would be to allocate one shash for each CPU and borrow
it for each hashing operation. TCP-MD5 allocates one ahash globally but
that can't work for hmac/cmac because of setkey.
Signed-off-by: Leonard Crestez <cdleonard@xxxxxxxxx>
---
include/linux/tcp.h | 6 +
include/net/tcp.h | 1 +
include/net/tcp_authopt.h | 103 ++++++
include/uapi/linux/snmp.h | 1 +
include/uapi/linux/tcp.h | 40 +++
net/ipv4/Kconfig | 14 +
net/ipv4/Makefile | 1 +
net/ipv4/proc.c | 1 +
net/ipv4/tcp.c | 7 +
net/ipv4/tcp_authopt.c | 718 ++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_input.c | 17 +
net/ipv4/tcp_ipv4.c | 5 +
net/ipv4/tcp_minisocks.c | 2 +
net/ipv4/tcp_output.c | 65 +++-
14 files changed, 980 insertions(+), 1 deletion(-)
create mode 100644 include/net/tcp_authopt.h
create mode 100644 net/ipv4/tcp_authopt.c
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48d8a363319e..cfddfc720b00 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -140,10 +140,12 @@ struct tcp_request_sock {
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
{
return (struct tcp_request_sock *)req;
}
+struct tcp_authopt_info;
+
struct tcp_sock {
/* inet_connection_sock has to be the first member of tcp_sock */
struct inet_connection_sock inet_conn;
u16 tcp_header_len; /* Bytes of tcp header to send */
u16 gso_segs; /* Max number of segs per GSO packet */
@@ -403,10 +405,14 @@ struct tcp_sock {
/* TCP MD5 Signature Option information */
struct tcp_md5sig_info __rcu *md5sig_info;
#endif
+#ifdef CONFIG_TCP_AUTHOPT
+ struct tcp_authopt_info __rcu *authopt_info;
+#endif
+
/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
/* fastopen_rsk points to request_sock that resulted in this big
* socket. Used to retransmit SYNACKs etc.
*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 17df9b047ee4..767611fd5ec3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -182,10 +182,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_WINDOW 3 /* Window scaling */
#define TCPOPT_SACK_PERM 4 /* SACK Permitted */
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_AUTHOPT 29 /* Auth Option (RFC5925) */
#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
new file mode 100644
index 000000000000..aaab5c955984
--- /dev/null
+++ b/include/net/tcp_authopt.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_TCP_AUTHOPT_H
+#define _LINUX_TCP_AUTHOPT_H
+
+#include <uapi/linux/tcp.h>
+
+/* Representation of a Master Key Tuple as per RFC5925 */
+struct tcp_authopt_key_info {
+ struct hlist_node node;
+ /* Local identifier */
+ u32 local_id;
+ u32 flags;
+ /* Wire identifiers */
+ u8 send_id, recv_id;
+ u8 alg;
+ u8 keylen;
+ u8 key[TCP_AUTHOPT_MAXKEYLEN];
+ u8 maclen;
+ u8 traffic_key_len;
+ struct rcu_head rcu;
+};
+
+/* Per-socket information regarding tcp_authopt */
+struct tcp_authopt_info {
+ struct hlist_head head;
+ u32 local_send_id;
+ u32 src_isn;
+ u32 dst_isn;
+ u8 rnextkeyid;
+ struct rcu_head rcu;
+};
+
+#ifdef CONFIG_TCP_AUTHOPT
+struct tcp_authopt_key_info *tcp_authopt_key_info_lookup(struct sock *sk, int key_id);
+void tcp_authopt_clear(struct sock *sk);
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen);
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen);
+int tcp_authopt_hash(
+ char *hash_location,
+ struct tcp_authopt_key_info *key,
+ struct sock *sk, struct sk_buff *skb);
+int __tcp_authopt_openreq(struct sock *newsk, const struct sock *oldsk, struct request_sock *req);
+static inline int tcp_authopt_openreq(
+ struct sock *newsk,
+ const struct sock *oldsk,
+ struct request_sock *req)
+{
+ if (!rcu_dereference(tcp_sk(oldsk)->authopt_info))
s/rcu_dereference/rcu_access_pointer/
+ return 0;
+ else
+ return __tcp_authopt_openreq(newsk, oldsk, req);
nit: 'else' can be removed.
+}
+int __tcp_authopt_inbound_check(
+ struct sock *sk,
+ struct sk_buff *skb,
+ struct tcp_authopt_info *info);
+static inline int tcp_authopt_inbound_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_authopt_info *info = rcu_dereference(tcp_sk(sk)->authopt_info);
+
+ if (info)
+ return __tcp_authopt_inbound_check(sk, skb, info);
+ else
+ return 0;
Same with the above and can be formatted like:
if (!info)
return 0;
return __tcp_authopt_inbound_check(sk, skb, info);
+}
+#else
+static inline struct tcp_authopt_key_info *tcp_authopt_key_info_lookup(
+ struct sock *sk,
+ int key_id)
+{
+ return NULL;
+}
+static inline int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+static inline void tcp_authopt_clear(struct sock *sk)
+{
+}
+static inline int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+static inline int tcp_authopt_hash(
+ char *hash_location,
+ struct tcp_authopt_key_info *key,
+ struct sock *sk, struct sk_buff *skb)
+{
+ return -EINVAL;
+}
+static inline int tcp_authopt_openreq(struct sock *newsk,
+ const struct sock *oldsk,
+ struct request_sock *req)
+{
+ return 0;
+}
+static inline int tcp_authopt_inbound_check(struct sock *sk, struct sk_buff *skb)
+{
+ return 0;
+}
+#endif
+
+#endif /* _LINUX_TCP_AUTHOPT_H */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 904909d020e2..1d96030889a1 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -290,10 +290,11 @@ enum
LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */
LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */
LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */
LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */
LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */
+ LINUX_MIB_TCPAUTHOPTFAILURE, /* TCPAuthOptFailure */
__LINUX_MIB_MAX
};
/* linux Xfrm mib definitions */
enum
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8fc09e8638b3..30b8ad769871 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -126,10 +126,12 @@ enum {
#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */
#define TCP_CM_INQ TCP_INQ
#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */
+#define TCP_AUTHOPT 38 /* TCP Authentication Option (RFC2385) */
+#define TCP_AUTHOPT_KEY 39 /* TCP Authentication Option update key (RFC2385) */
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF 0
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
@@ -340,10 +342,48 @@ struct tcp_diag_md5sig {
__u16 tcpm_keylen;
__be32 tcpm_addr[4];
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN];
};
+/* for TCP_AUTHOPT socket option */
+#define TCP_AUTHOPT_MAXKEYLEN 80
+
+#define TCP_AUTHOPT_ALG_HMAC_SHA_1_96 1
+#define TCP_AUTHOPT_ALG_AES_128_CMAC_96 2
+
+/* Per-socket options */
+struct tcp_authopt {
+ /* No flags currently defined */
+ __u32 flags;
+ /* local_id of preferred output key */
+ __u32 local_send_id;
+};
+
+/* Delete the key by local_id and ignore all fields */
+#define TCP_AUTHOPT_KEY_DEL (1 << 0)
+/* Exclude TCP options from signature */
+#define TCP_AUTHOPT_KEY_EXCLUDE_OPTS (1 << 1)
+
+/* Per-key options
+ * Each key is identified by a non-zero local_id which is managed by the application.
+ */
+struct tcp_authopt_key {
+ /* Mix of TCP_AUTHOPT_KEY_ flags */
+ __u32 flags;
+ /* Local identifier */
+ __u32 local_id;
+ /* SendID on the network */
+ __u8 send_id;
+ /* RecvID on the network */
+ __u8 recv_id;
+ /* One of the TCP_AUTHOPT_ALG_* constant */
+ __u8 alg;
+ /* Length of the key buffer */
+ __u8 keylen;
+ __u8 key[TCP_AUTHOPT_MAXKEYLEN];
+};
+
/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
struct tcp_zerocopy_receive {
__u64 address; /* in: address of mapping */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..6459f4ea6f1d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -740,5 +740,19 @@ config TCP_MD5SIG
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
on the Internet.
If unsure, say N.
+
+config TCP_AUTHOPT
+ bool "TCP: Authentication Option support (RFC5925)"
+ select CRYPTO
+ select CRYPTO_SHA1
+ select CRYPTO_HMAC
+ select CRYPTO_AES
+ select CRYPTO_CMAC
+ help
+ RFC5925 specifies a new method of giving protection to TCP sessions.
+ Its intended use is to protect BGP sessions between core routers
+ on the Internet. It obsoletes TCP MD5 (RFC2385) but is incompatible.
+
+ If unsure, say N.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..d336f32ce177 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -59,10 +59,11 @@ obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_TCP_AUTHOPT) += tcp_authopt.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b0d3a09dc84e..61dd06f8389c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -295,10 +295,11 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
+ SNMP_MIB_ITEM("TCPAuthOptFailure", LINUX_MIB_TCPAUTHOPTFAILURE),
SNMP_MIB_SENTINEL
};
static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
unsigned short *type, int count)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8cb44040ec68..3c29bb579d27 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,10 +271,11 @@
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/tcp_authopt.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
#include <linux/uaccess.h>
@@ -3573,10 +3574,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
+ case TCP_AUTHOPT:
+ err = tcp_set_authopt(sk, optval, optlen);
+ break;
+ case TCP_AUTHOPT_KEY:
+ err = tcp_set_authopt_key(sk, optval, optlen);
+ break;
case TCP_USER_TIMEOUT:
/* Cap the max time in ms TCP will retry or probe the window
* before giving up and aborting (ETIMEDOUT) a connection.
*/
if (val < 0)
diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
new file mode 100644
index 000000000000..40ee83fc0afe
--- /dev/null
+++ b/net/ipv4/tcp_authopt.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <net/tcp_authopt.h>
+#include <crypto/hash.h>
+#include <trace/events/tcp.h>
+
+/* All current algorithms have a mac length of 12 but crypto API digestsize can be larger */
+#define TCP_AUTHOPT_MAXMACBUF 20
+#define TCP_AUTHOPT_MAX_TRAFFIC_KEY_LEN 20
+
+struct tcp_authopt_key_info *__tcp_authopt_key_info_lookup(struct sock *sk,
+ struct tcp_authopt_info *info,
+ int key_id)
+{
+ struct tcp_authopt_key_info *key;
+
+ hlist_for_each_entry_rcu(key, &info->head, node, lockdep_sock_is_held(sk))
+ if (key->local_id == key_id)
+ return key;
+
+ return NULL;
+}
+
+struct tcp_authopt_key_info *tcp_authopt_key_info_lookup(struct sock *sk, int key_id)
+{
+ struct tcp_authopt_info *info;
+ struct tcp_authopt_key_info *key;
+
+ info = rcu_dereference_check(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+ if (!info)
+ return NULL;
+
+ hlist_for_each_entry_rcu(key, &info->head, node, lockdep_sock_is_held(sk))
+ if (key->local_id == key_id)
+ return key;
+
+ return NULL;
The loop and 'return' can be replaced by
return __tcp_authopt_key_info_lookup(sk, info, key_id);
+}
+
+int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_authopt opt;
+ struct tcp_authopt_info *info;
+
+ if (optlen < sizeof(opt))
+ return -EINVAL;
+
+ WARN_ON(!lockdep_sock_is_held(sk));
+ if (copy_from_sockptr(&opt, optval, sizeof(opt)))
+ return -EFAULT;
+
+ info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+ if (!info) {
+ info = kmalloc(sizeof(*info), GFP_KERNEL | __GFP_ZERO);
+ if (!info)
+ return -ENOMEM;
+
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ INIT_HLIST_HEAD(&info->head);
+ rcu_assign_pointer(tp->authopt_info, info);
+ }
info->flags = opt.flags;
In case we forget to add this in the future.
+ info->local_send_id = opt.local_send_id;
+
+ return 0;
+}
+
+static void tcp_authopt_key_del(struct sock *sk, struct tcp_authopt_key_info *key)
+{
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Should this be done after actually freeing the key?
+ kfree_rcu(key, rcu);
+}
+
+/* free info and keys but don't touch tp->authopt_info */
+void __tcp_authopt_info_free(struct sock *sk, struct tcp_authopt_info *info)
+{
+ struct hlist_node *n;
+ struct tcp_authopt_key_info *key;
+
+ hlist_for_each_entry_safe(key, n, &info->head, node)
+ tcp_authopt_key_del(sk, key);
+ kfree_rcu(info, rcu);
+}
+
+/* free everything and clear tcp_sock.authopt_info to NULL */
+void tcp_authopt_clear(struct sock *sk)
+{
+ struct tcp_authopt_info *info;
+
+ info = rcu_dereference_protected(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+ if (info) {
+ __tcp_authopt_info_free(sk, info);
+ tcp_sk(sk)->authopt_info = NULL;
+ }
+}
+
+int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ struct tcp_authopt_key opt;
+ struct tcp_authopt_info *info;
+ struct tcp_authopt_key_info *key_info;
+ u8 traffic_key_len, maclen;
+
+ if (optlen < sizeof(opt))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&opt, optval, sizeof(opt)))
+ return -EFAULT;
+
+ if (opt.keylen > TCP_AUTHOPT_MAXKEYLEN)
+ return -EINVAL;
+
+ if (opt.local_id == 0)
+ return -EINVAL;
+
+ /* must set authopt before setting keys */
+ info = rcu_dereference_protected(tcp_sk(sk)->authopt_info, lockdep_sock_is_held(sk));
+ if (!info)
+ return -EINVAL;
+
+ if (opt.flags & TCP_AUTHOPT_KEY_DEL) {
+ key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id);
+ if (!key_info)
+ return -ENOENT;
+ tcp_authopt_key_del(sk, key_info);
+ return 0;
+ }
+
+ /* check the algorithm */
+ if (opt.alg == TCP_AUTHOPT_ALG_HMAC_SHA_1_96) {
+ traffic_key_len = 20;
+ maclen = 12;
+ } else if (opt.alg == TCP_AUTHOPT_ALG_AES_128_CMAC_96) {
+ traffic_key_len = 16;
+ maclen = 12;
+ } else {
+ return -EINVAL;
+ }
+
+ /* If an old value exists for same local_id it is deleted */
+ key_info = __tcp_authopt_key_info_lookup(sk, info, opt.local_id);
+ if (key_info)
+ tcp_authopt_key_del(sk, key_info);
+ key_info = sock_kmalloc(sk, sizeof(*key_info), GFP_KERNEL | __GFP_ZERO);
+ if (!key_info)
+ return -ENOMEM;
+ key_info->local_id = opt.local_id;
+ key_info->flags = opt.flags & TCP_AUTHOPT_KEY_EXCLUDE_OPTS;
+ key_info->send_id = opt.send_id;
+ key_info->recv_id = opt.recv_id;
+ key_info->alg = opt.alg;
+ key_info->keylen = opt.keylen;
+ memcpy(key_info->key, opt.key, opt.keylen);
+ key_info->maclen = maclen;
+ key_info->traffic_key_len = traffic_key_len;
+ hlist_add_head_rcu(&key_info->node, &info->head);
+
+ return 0;
+}
I have looked up to here and will continue tomorrow.
BTW, this patch seems a bit large to me, so splitting it will make it
easier to read.