[PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support

From: Cole Dishington
Date: Wed Apr 21 2021 - 22:35:30 EST


This adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx>
Co-developed-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Blair Steven <blair.steven@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Cole Dishington <Cole.Dishington@xxxxxxxxxxxxxxxxxxx>
---
include/net/netfilter/nf_conntrack.h | 2 +
.../netfilter/nf_conntrack_tuple_common.h | 5 +
include/uapi/linux/netfilter/nf_nat.h | 3 +-
net/netfilter/nf_nat_core.c | 101 ++++++++++++++++--
net/netfilter/nf_nat_ftp.c | 23 ++--
net/netfilter/nf_nat_helper.c | 15 ++-
6 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 439379ca9ffa..d63d38aa7188 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -92,6 +92,8 @@ struct nf_conn {
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;

+ struct nf_nat_range2 *range;
+
#if defined(CONFIG_NF_CONNTRACK_MARK)
u_int32_t mark;
#endif
diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
index 64390fac6f7e..36d16d47c2b0 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
@@ -39,6 +39,11 @@ union nf_conntrack_man_proto {
struct {
__be16 key; /* GRE key is 32bit, PPtP only uses 16bit */
} gre;
+ struct {
+ unsigned char psid_length;
+ unsigned char offset;
+ __be16 psid;
+ } psid;
};

#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
#define NF_NAT_RANGE_NETMAP (1 << 6)
+#define NF_NAT_RANGE_PSID (1 << 7)

#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
- NF_NAT_RANGE_NETMAP)
+ NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)

struct nf_nat_ipv4_range {
unsigned int flags;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7c3c902290f..7730ce4ca9a9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -232,13 +232,33 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
+ const union nf_conntrack_man_proto *max,
+ bool is_psid)
{
__be16 port;

+ int m = 0;
+ u16 offset_mask = 0;
+ u16 psid_mask = 0;
+
+ /* In this case we are in PSID mode and the rules are all different */
+ if (is_psid) {
+ /* m = number of bits in each valid range */
+ m = 16 - min->psid.psid_length - min->psid.offset;
+ offset_mask = ((1 << min->psid.offset) - 1) <<
+ (16 - min->psid.offset);
+ psid_mask = ((1 << min->psid.psid_length) - 1) << m;
+ }
+
switch (tuple->dst.protonum) {
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
+ if (is_psid) {
+ return ((ntohs(tuple->src.u.icmp.id) & offset_mask) !=
+ 0) &&
+ ((ntohs(tuple->src.u.icmp.id) & psid_mask) ==
+ min->psid.psid);
+ }
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
case IPPROTO_GRE: /* all fall though */
@@ -252,6 +272,11 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
else
port = tuple->dst.u.all;

+ if (is_psid) {
+ return ((ntohs(port) & offset_mask) != 0) &&
+ (((ntohs(port) & psid_mask) >> m) ==
+ min->psid.psid);
+ }
return ntohs(port) >= ntohs(min->all) &&
ntohs(port) <= ntohs(max->all);
default:
@@ -274,9 +299,9 @@ static int in_range(const struct nf_conntrack_tuple *tuple,

if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
return 1;
-
return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
- &range->min_proto, &range->max_proto);
+ &range->min_proto, &range->max_proto,
+ range->flags & NF_NAT_RANGE_PSID);
}

static inline int
@@ -397,10 +422,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
*
* Per-protocol part of tuple is initialized to the incoming packet.
*/
-static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
{
unsigned int range_size, min, max, i, attempts;
__be16 *keyptr;
@@ -457,6 +482,50 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
return;
}

+ if (range->flags & NF_NAT_RANGE_PSID) {
+ /* Find the non-PSID parts of the port.
+ * To do this we look for an unused port that is
+ * comprised of [t_chunk|PSID|b_chunk]. The size of
+ * these pieces is defined by the psid_length and
+ * offset.
+ */
+ int m = 16 - range->min_proto.psid.psid_length -
+ range->min_proto.psid.offset;
+ int available;
+ int range_count = ((1 << range->min_proto.psid.offset) - 1);
+
+ /* Calculate the size of the bottom block */
+ range_size = (1 << m);
+
+ /* Calculate the total IDs to check */
+ available = range_size * range_count;
+ if (!available)
+ available = range_size;
+
+ off = ntohs(*keyptr);
+ for (i = 0;; ++off) {
+ int b_chunk = off % range_size;
+ int t_chunk = 0;
+
+ /* Move up to avoid the all-zeroes reserved chunk
+ * (if there is one).
+ */
+ if (range->min_proto.psid.offset > 0) {
+ t_chunk = (off >> m) % range_count;
+ ++t_chunk;
+ t_chunk <<= (m +
+ range->min_proto.psid.psid_length);
+ }
+
+ *keyptr = htons(t_chunk |
+ (range->min_proto.psid.psid << m)
+ | b_chunk);
+
+ if (++i >= available || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ }
+
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
@@ -566,11 +635,18 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,

/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
- if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+ /* Now that the PSID mode is present we always need to check
+ * to see if the source ports are in range.
+ */
+ if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
+ (range->flags & NF_NAT_RANGE_PSID &&
+ !in_range(orig_tuple, range))) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
- &range->min_proto,
- &range->max_proto) &&
+ &range->min_proto,
+ &range->max_proto,
+ range->flags &
+ NF_NAT_RANGE_PSID) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
return;
@@ -623,6 +699,11 @@ nf_nat_setup_info(struct nf_conn *ct,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);

get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+ if (range) {
+ if (!ct->range)
+ ct->range = kmalloc(sizeof(*ct->range), 0);
+ memcpy(ct->range, range, sizeof(*ct->range));
+ }

if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
struct nf_conntrack_tuple reply;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..006b7e1836ff 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -17,6 +17,10 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_ftp.h>
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct);

#define NAT_HELPER_NAME "ftp"

@@ -86,19 +90,12 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
* this one. */
exp->expectfn = nf_nat_follow_master;

- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- port = 0;
- break;
- }
- }
+ /* Find a port that matches the MASQ rule. */
+ nf_nat_l4proto_unique_tuple(&exp->tuple, ct->range,
+ dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
+ ct);
+ port = ntohs(exp->tuple.dst.u.tcp.port);
+ nf_ct_expect_related(exp, 0);

if (port == 0) {
nf_ct_helper_log(skb, ct, "all ports in use");
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..090153475d4d 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -184,11 +184,16 @@ void nf_nat_follow_master(struct nf_conn *ct,
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);

- /* Change src to where master sends to */
- range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_addr = range.max_addr
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
- nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+ if (exp->master && exp->master->range && !exp->dir) {
+ range = *exp->master->range;
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+ } else {
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+ }

/* For DST manip, map port here to where it's expected. */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
--
2.31.1