[PATCH 4.4 068/312] GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU

From: Greg Kroah-Hartman
Date: Fri May 08 2020 - 09:21:15 EST


From: Alexander Duyck <aduyck@xxxxxxxxxxxx>

commit a0ca153f98db8cf25298565a09e11fe9d82846ad upstream.

This patch fixes an issue I found in which we were dropping frames if we
had enabled checksums on GRE headers that were encapsulated by either FOU
or GUE. Without this patch I was barely able to get 1 Gb/s of throughput.
With this patch applied I am now at least getting around 6 Gb/s.

The issue is due to the fact that with FOU or GUE applied we do not provide
a transport offset pointing to the GRE header, nor do we offload it in
software as the GRE header is completely skipped by GSO and treated like a
VXLAN or GENEVE type header. As such we need to prevent the stack from
generating it and also prevent GRE from generating it via any interface we
create.

Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE")
Signed-off-by: Alexander Duyck <aduyck@xxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
include/linux/netdevice.h | 5 ++++-
net/core/dev.c | 1 +
net/ipv4/fou.c | 6 ++++++
net/ipv4/gre_offload.c | 8 ++++++++
net/ipv4/ip_gre.c | 13 ++++++++++---
5 files changed, 29 insertions(+), 4 deletions(-)

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2013,7 +2013,10 @@ struct napi_gro_cb {
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;

- /* 3 bit hole */
+ /* Used in GRE, set in fou/gue_gro_receive */
+ u8 is_fou:1;
+
+ /* 2 bit hole */

/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4320,6 +4320,7 @@ static enum gro_result dev_gro_receive(s
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
NAPI_GRO_CB(skb)->recursion_counter = 0;
+ NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;

/* Setup for GRO checksum validation */
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -205,6 +205,9 @@ static struct sk_buff **fou_gro_receive(
*/
NAPI_GRO_CB(skb)->encap_mark = 0;

+ /* Flag this frame as already having an outer encap header */
+ NAPI_GRO_CB(skb)->is_fou = 1;
+
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
@@ -372,6 +375,9 @@ static struct sk_buff **gue_gro_receive(
*/
NAPI_GRO_CB(skb)->encap_mark = 0;

+ /* Flag this frame as already having an outer encap header */
+ NAPI_GRO_CB(skb)->is_fou = 1;
+
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[guehdr->proto_ctype]);
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -151,6 +151,14 @@ static struct sk_buff **gre_gro_receive(
if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
goto out;

+ /* We can only support GRE_CSUM if we can track the location of
+ * the GRE header. In the case of FOU/GUE we cannot because the
+ * outer UDP header displaces the GRE header leaving us in a state
+ * of limbo.
+ */
+ if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou)
+ goto out;
+
type = greh->protocol;

rcu_read_lock();
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -851,9 +851,16 @@ static void __gre_tunnel_init(struct net
dev->hw_features |= GRE_FEATURES;

if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported. */
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
/* Can use a lockless transmit, unless we generate
* output sequences
*/