[net-next v5 1/2] seg6: inherit DSCP of inner IPv4 packets

From: Ahmed Abdelsalam
Date: Tue Aug 25 2020 - 08:18:33 EST


This patch allows SRv6 encapsulation to inherit the DSCP value of
the inner IPv4 packet.

This allows forwarding packet across the SRv6 fabric based on their
original traffic class.

The option is controlled through a sysctl (seg6_inherit_inner_ipv4_dscp).
The sysctl has to be set to 1 to enable this feature.

Signed-off-by: Ahmed Abdelsalam <ahabdels@xxxxxxxxx>
---
include/net/netns/ipv6.h | 1 +
net/ipv6/seg6_iptunnel.c | 37 ++++++++++++++++++++-----------------
net/ipv6/sysctl_net_ipv6.c | 9 +++++++++
3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5ec054473d81..6ed73951f479 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -50,6 +50,7 @@ struct netns_sysctl_ipv6 {
int max_dst_opts_len;
int max_hbh_opts_len;
int seg6_flowlabel;
+ bool seg6_inherit_inner_ipv4_dscp;
bool skip_notify_on_dev_down;
};

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..9cc168462e11 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -104,8 +104,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
}

/* Compute flowlabel for outer IPv6 header */
-static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
- struct ipv6hdr *inner_hdr)
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb)
{
int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
__be32 flowlabel = 0;
@@ -116,7 +115,7 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
- flowlabel = ip6_flowlabel(inner_hdr);
+ flowlabel = ip6_flowlabel(ipv6_hdr(skb));
}
return flowlabel;
}
@@ -129,6 +128,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
+ u8 tos = 0, hop_limit;
__be32 flowlabel;

hdrlen = (osrh->hdrlen + 1) << 3;
@@ -138,30 +138,33 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
if (unlikely(err))
return err;

- inner_hdr = ipv6_hdr(skb);
- flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
-
- skb_push(skb, tot_len);
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
- hdr = ipv6_hdr(skb);
-
/* inherit tc, flowlabel and hlim
* hlim will be decremented in ip6_forward() afterwards and
* decapsulation will overwrite inner hlim with outer hlim
*/

+ flowlabel = seg6_make_flowlabel(net, skb);
+ hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
if (skb->protocol == htons(ETH_P_IPV6)) {
- ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- flowlabel);
- hdr->hop_limit = inner_hdr->hop_limit;
+ inner_hdr = ipv6_hdr(skb);
+ hop_limit = inner_hdr->hop_limit;
+ tos = ip6_tclass(ip6_flowinfo(inner_hdr));
+ } else if (skb->protocol == htons(ETH_P_IP)) {
+ if (net->ipv6.sysctl.seg6_inherit_inner_ipv4_dscp)
+ tos = ip_hdr(skb)->tos;
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
} else {
- ip6_flow_hdr(hdr, 0, flowlabel);
- hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
-
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
}

+ skb_push(skb, tot_len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ ip6_flow_hdr(hdr, tos, flowlabel);
+ hdr->hop_limit = hop_limit;
hdr->nexthdr = NEXTHDR_ROUTING;

isrh = (void *)hdr + sizeof(*hdr);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index fac2135aa47b..4b2cf8764524 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -159,6 +159,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "seg6_inherit_inner_ipv4_dscp",
+ .data = &init_net.ipv6.sysctl.seg6_inherit_inner_ipv4_dscp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};

--
2.17.1