[PATCH 4.9 54/71] inet: frags: break the 2GB limit for frags storage

From: Greg Kroah-Hartman
Date: Tue Oct 16 2018 - 13:26:02 EST


4.9-stable review patch. If anyone has any objections, please let me know.

------------------

From: Eric Dumazet <edumazet@xxxxxxxxxx>

Some users are willing to provision huge amounts of memory to be able
to perform reassembly reasonnably well under pressure.

Current memory tracking is using one atomic_t and integers.

Switch to atomic_long_t so that 64bit arches can use more than 2GB,
without any cost for 32bit arches.

Note that this patch avoids an overflow error, if high_thresh was set
to ~2GB, since this test in inet_frag_alloc() was never true :

if (... || frag_mem_limit(nf) > nf->high_thresh)

Tested:

$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh

<frag DDOS>

$ grep FRAG /proc/net/sockstat
FRAG: inuse 14705885 memory 16000002880

$ nstat -n ; sleep 1 ; nstat | grep Reas
IpReasmReqds 3317150 0.0
IpReasmFails 3317112 0.0

Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
(cherry picked from commit 3e67f106f619dcfaf6f4e2039599bdb69848c714)
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
Documentation/networking/ip-sysctl.txt | 4 ++--
include/net/inet_frag.h | 20 ++++++++++----------
net/ieee802154/6lowpan/reassembly.c | 10 +++++-----
net/ipv4/ip_fragment.c | 10 +++++-----
net/ipv4/proc.c | 2 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++-----
net/ipv6/proc.c | 2 +-
net/ipv6/reassembly.c | 6 +++---
8 files changed, 32 insertions(+), 32 deletions(-)

--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -122,10 +122,10 @@ min_adv_mss - INTEGER

IP Fragmentation:

-ipfrag_high_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
Maximum memory used to reassemble IP fragments.

-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
(Obsolete since linux-4.17)
Maximum memory used to reassemble IP fragments before the kernel
begins to remove incomplete fragment queues to free up resources.
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -7,11 +7,11 @@ struct netns_frags {
struct rhashtable rhashtable ____cacheline_aligned_in_smp;

/* Keep atomic mem on separate cachelines in structs that include it */
- atomic_t mem ____cacheline_aligned_in_smp;
+ atomic_long_t mem ____cacheline_aligned_in_smp;
/* sysctls */
+ long high_thresh;
+ long low_thresh;
int timeout;
- int high_thresh;
- int low_thresh;
int max_dist;
struct inet_frags *f;
};
@@ -101,7 +101,7 @@ void inet_frags_fini(struct inet_frags *

static inline int inet_frags_init_net(struct netns_frags *nf)
{
- atomic_set(&nf->mem, 0);
+ atomic_long_set(&nf->mem, 0);
return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
}
void inet_frags_exit_net(struct netns_frags *nf);
@@ -118,19 +118,19 @@ static inline void inet_frag_put(struct

/* Memory Tracking Functions. */

-static inline int frag_mem_limit(struct netns_frags *nf)
+static inline long frag_mem_limit(const struct netns_frags *nf)
{
- return atomic_read(&nf->mem);
+ return atomic_long_read(&nf->mem);
}

-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_sub(i, &nf->mem);
+ atomic_long_sub(val, &nf->mem);
}

-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_add(i, &nf->mem);
+ atomic_long_add(val, &nf->mem);
}

/* RFC 3168 support :
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -410,23 +410,23 @@ err:
}

#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;

static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -681,23 +681,23 @@ struct sk_buff *ip_check_defrag(struct n
EXPORT_SYMBOL(ip_check_defrag);

#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;

static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv4.frags.high_thresh
},
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -73,7 +73,7 @@ static int sockstat_seq_show(struct seq_
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- seq_printf(seq, "FRAG: inuse %u memory %u\n",
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
atomic_read(&net->ipv4.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv4.frags));
return 0;
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;

#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;

static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysc
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
atomic_read(&net->ipv6.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv6.frags));
return 0;
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -546,15 +546,15 @@ static struct ctl_table ip6_frags_ns_ctl
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,