[PATCH] net: netfilter: Add RFC-7597 Section 5.1 PSID support

From: Cole Dishington
Date: Mon Jun 28 2021 - 20:48:39 EST


This adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Anthony Lineham <anthony.lineham@xxxxxxxxxxxxxxxxxxx>
Co-developed-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Scott Parlane <scott.parlane@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Blair Steven <blair.steven@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Cole Dishington <Cole.Dishington@xxxxxxxxxxxxxxxxxxx>
---

Notes:
Thanks for your time reviewing. I have also submitted a patch to netfilter iptables for these changes.

Comments:
Selecting the ports for psid needs to be in nf_nat_core since the PSID ranges are not a single range. e.g. offset=1024, PSID=0, psid_length=8 generates the ranges 1024-1027, 2048-2051, ..., 63488-63491, ... (example taken from RFC7597 B.2).
This is why it is enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init upper/lower boundaries.

Changes in v2:
- Moved cached range2 from struct nf_conn to nf_conn_nat.
- Moved psid fields out of union nf_conntrack_man_proto. Now using range2 fields src, dst, and base to store psid parameters.
- Readded removed error check for nf_ct_expect_related()
- Added new version to masquerade iptables extension to use the range2 base field.

include/net/netfilter/nf_nat.h | 1 +
include/uapi/linux/netfilter/nf_nat.h | 3 +-
net/netfilter/nf_nat_core.c | 69 +++++++++++++++++++++++----
net/netfilter/nf_nat_ftp.c | 29 ++++++-----
net/netfilter/nf_nat_helper.c | 16 +++++--
net/netfilter/nf_nat_masquerade.c | 13 +++--
net/netfilter/xt_MASQUERADE.c | 44 +++++++++++++++--
7 files changed, 140 insertions(+), 35 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 987111ae5240..67cc033f76bb 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,6 +32,7 @@ struct nf_conn_nat {
union nf_conntrack_nat_help help;
#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE)
int masq_index;
+ struct nf_nat_range2 *range;
#endif
};

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
#define NF_NAT_RANGE_NETMAP (1 << 6)
+#define NF_NAT_RANGE_PSID (1 << 7)

#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
- NF_NAT_RANGE_NETMAP)
+ NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)

struct nf_nat_ipv4_range {
unsigned int flags;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7307bb28ece2 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,32 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
+ const union nf_conntrack_man_proto *max,
+ const union nf_conntrack_man_proto *base,
+ bool is_psid)
{
__be16 port;
+ u16 offset_mask = 0;
+ u16 psid_mask = 0;
+ u16 psid = 0;
+
+ /* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+ if (is_psid) {
+ u16 j = ntohs(max->all) - ntohs(min->all) + 1;
+ u16 a = (1 << 16) / ntohs(base->all);
+
+ offset_mask = (a - 1) * ntohs(base->all);
+ psid_mask = ((ntohs(base->all) / j) << 1) - 1;
+ psid = ntohs(min->all) & psid_mask;
+ }

switch (tuple->dst.protonum) {
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
+ if (is_psid) {
+ return ((ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+ ((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+ }
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
case IPPROTO_GRE: /* all fall though */
@@ -215,6 +234,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
else
port = tuple->dst.u.all;

+ if (is_psid) {
+ return ((ntohs(port) & offset_mask) != 0) &&
+ ((ntohs(port) & psid_mask) == psid);
+ }
return ntohs(port) >= ntohs(min->all) &&
ntohs(port) <= ntohs(max->all);
default:
@@ -239,7 +262,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
return 1;

return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
- &range->min_proto, &range->max_proto);
+ &range->min_proto, &range->max_proto, &range->base_proto,
+ range->flags & NF_NAT_RANGE_PSID);
}

static inline int
@@ -360,10 +384,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
*
* Per-protocol part of tuple is initialized to the incoming packet.
*/
-static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
{
unsigned int range_size, min, max, i, attempts;
__be16 *keyptr;
@@ -420,6 +444,25 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
return;
}

+ if (range->flags & NF_NAT_RANGE_PSID) {
+ /* PSID defines a group of port ranges, per PSID. PSID
+ * is already contained in min and max.
+ */
+ unsigned int min_to_max, base;
+
+ min = ntohs(range->min_proto.all);
+ max = ntohs(range->max_proto.all);
+ base = ntohs(range->base_proto.all);
+ min_to_max = max - min;
+ for (; max <= (1 << 16) - 1; min += base, max = min + min_to_max) {
+ for (off = 0; off <= min_to_max; off++) {
+ *keyptr = htons(min + off);
+ if (!nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ }
+ }
+
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
@@ -529,11 +572,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,

/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
- if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+ /* PSID mode is present always needs to check
+ * to see if the source ports are in range.
+ */
+ if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED ||
+ (range->flags & NF_NAT_RANGE_PSID &&
+ !in_range(orig_tuple, range))) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
- &range->min_proto,
- &range->max_proto) &&
+ &range->min_proto,
+ &range->max_proto,
+ &range->base_proto,
+ range->flags &
+ NF_NAT_RANGE_PSID) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
return;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..f65163278db0 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -17,6 +17,10 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_ftp.h>
+void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct);

#define NAT_HELPER_NAME "ftp"

@@ -72,8 +76,13 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
u_int16_t port;
int dir = CTINFO2DIR(ctinfo);
struct nf_conn *ct = exp->master;
+ struct nf_conn_nat *nat = nfct_nat(ct);
char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
unsigned int buflen;
+ int ret;
+
+ if (WARN_ON_ONCE(!nat))
+ return NF_DROP;

pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);

@@ -86,18 +95,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
* this one. */
exp->expectfn = nf_nat_follow_master;

- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- port = 0;
- break;
- }
+ /* Find a port that matches the MASQ rule. */
+ nf_nat_l4proto_unique_tuple(&exp->tuple, nat->range,
+ dir ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST,
+ ct);
+ ret = nf_ct_expect_related(exp, 0);
+ port = ntohs(exp->tuple.dst.u.tcp.port);
+ if (ret != 0 && ret != -EBUSY) {
+ port = 0;
}

if (port == 0) {
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..2d105e4eb8f8 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -179,15 +179,23 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct nf_conn_nat *nat = NULL;
struct nf_nat_range2 range;

/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);

- /* Change src to where master sends to */
- range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_addr = range.max_addr
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ if (exp->master && !exp->dir) {
+ nat = nfct_nat(exp->master);
+ if (nat)
+ range = *nat->range;
+ }
+ if (!nat) {
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ }
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);

/* For DST manip, map port here to where it's expected. */
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..d83cd3d8ad3f 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -45,10 +45,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
return NF_DROP;
}

- nat = nf_ct_nat_ext_add(ct);
- if (nat)
- nat->masq_index = out->ifindex;
-
/* Transfer from original range. */
memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
@@ -57,6 +53,15 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
newrange.max_addr.ip = newsrc;
newrange.min_proto = range->min_proto;
newrange.max_proto = range->max_proto;
+ newrange.base_proto = range->base_proto;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat) {
+ nat->masq_index = out->ifindex;
+ if (!nat->range)
+ nat->range = kmalloc(sizeof(*nat->range), 0);
+ memcpy(nat->range, &newrange, sizeof(*nat->range));
+ }

/* Hand modified range to generic setup. */
return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@xxxxxxxxxxxxx>");
MODULE_DESCRIPTION("Xtables: automatic-address SNAT");

/* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
{
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;

@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
return nf_ct_netns_get(par->net, par->family);
}

+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range2 *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
+ }
+ return nf_ct_netns_get(par->net, par->family);
+}
+
static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_nat_range2 range;
const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
xt_out(par));
}

+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range2 *range = par->targinfo;
+
+ return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+ xt_out(par));
+}
+
static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
{
nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
{
#if IS_ENABLED(CONFIG_IPV6)
.name = "MASQUERADE",
+ .revision = 0,
.family = NFPROTO_IPV6,
.target = masquerade_tg6,
.targetsize = sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
}, {
#endif
.name = "MASQUERADE",
+ .revision = 0,
.family = NFPROTO_IPV4,
- .target = masquerade_tg,
+ .target = masquerade_tg_v0,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = 1 << NF_INET_POST_ROUTING,
- .checkentry = masquerade_tg_check,
+ .checkentry = masquerade_tg_check_v0,
.destroy = masquerade_tg_destroy,
.me = THIS_MODULE,
- }
+ },
+ {
+ .name = "MASQUERADE",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .target = masquerade_tg_v1,
+ .targetsize = sizeof(struct nf_nat_range2),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg_check_v1,
+ .destroy = masquerade_tg_destroy,
+ .me = THIS_MODULE,
+ },
};

static int __init masquerade_tg_init(void)
--
2.32.0