All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: davem@davemloft.net, gregkh@linuxfoundation.org
Cc: netdev@vger.kernel.org, stable@vger.kernel.org, edumazet@google.com
Subject: [PATCH v3 13/30] inet: frags: break the 2GB limit for frags storage
Date: Thu, 13 Sep 2018 07:58:45 -0700	[thread overview]
Message-ID: <20180913145902.17531-14-sthemmin@microsoft.com> (raw)
In-Reply-To: <20180913145902.17531-1-sthemmin@microsoft.com>

From: Eric Dumazet <edumazet@google.com>

Some users are willing to provision huge amounts of memory to be able
to perform reassembly reasonnably well under pressure.

Current memory tracking is using one atomic_t and integers.

Switch to atomic_long_t so that 64bit arches can use more than 2GB,
without any cost for 32bit arches.

Note that this patch avoids an overflow error, if high_thresh was set
to ~2GB, since this test in inet_frag_alloc() was never true :

if (... || frag_mem_limit(nf) > nf->high_thresh)

Tested:

$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh

<frag DDOS>

$ grep FRAG /proc/net/sockstat
FRAG: inuse 14705885 memory 16000002880

$ nstat -n ; sleep 1 ; nstat | grep Reas
IpReasmReqds                    3317150            0.0
IpReasmFails                    3317112            0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3e67f106f619dcfaf6f4e2039599bdb69848c714)
---
 Documentation/networking/ip-sysctl.txt  |  4 ++--
 include/net/inet_frag.h                 | 20 ++++++++++----------
 net/ieee802154/6lowpan/reassembly.c     | 10 +++++-----
 net/ipv4/ip_fragment.c                  | 10 +++++-----
 net/ipv4/proc.c                         |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++-----
 net/ipv6/proc.c                         |  2 +-
 net/ipv6/reassembly.c                   |  6 +++---
 8 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f23582a3c661..a054b5ad410a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -133,10 +133,10 @@ min_adv_mss - INTEGER
 
 IP Fragmentation:
 
-ipfrag_high_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
 	Maximum memory used to reassemble IP fragments.
 
-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
 	(Obsolete since linux-4.17)
 	Maximum memory used to reassemble IP fragments before the kernel
 	begins to remove incomplete fragment queues to free up resources.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 95e353e3305b..a52e7273e7a5 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -8,11 +8,11 @@ struct netns_frags {
 	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
 
 	/* Keep atomic mem on separate cachelines in structs that include it */
-	atomic_t		mem ____cacheline_aligned_in_smp;
+	atomic_long_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
+	long			high_thresh;
+	long			low_thresh;
 	int			timeout;
-	int			high_thresh;
-	int			low_thresh;
 	int			max_dist;
 	struct inet_frags	*f;
 };
@@ -102,7 +102,7 @@ void inet_frags_fini(struct inet_frags *);
 
 static inline int inet_frags_init_net(struct netns_frags *nf)
 {
-	atomic_set(&nf->mem, 0);
+	atomic_long_set(&nf->mem, 0);
 	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
 }
 void inet_frags_exit_net(struct netns_frags *nf);
@@ -119,19 +119,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-static inline int frag_mem_limit(struct netns_frags *nf)
+static inline long frag_mem_limit(const struct netns_frags *nf)
 {
-	return atomic_read(&nf->mem);
+	return atomic_long_read(&nf->mem);
 }
 
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
 {
-	atomic_sub(i, &nf->mem);
+	atomic_long_sub(val, &nf->mem);
 }
 
-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
 {
-	atomic_add(i, &nf->mem);
+	atomic_long_add(val, &nf->mem);
 }
 
 /* RFC 3168 support :
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 1aec71a3f904..44f148a6bb57 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -411,23 +411,23 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
 }
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_high_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_low_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
 	},
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 38cbf56bb48e..dc3ed0ac4c58 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -683,23 +683,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 EXPORT_SYMBOL(ip_check_defrag);
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ipfrag_high_thresh",
 		.data		= &init_net.ipv4.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ipv4.frags.low_thresh
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
 		.data		= &init_net.ipv4.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.ipv4.frags.high_thresh
 	},
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 01a337c3a36b..8fbd5633e544 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -71,7 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 		   sock_prot_inuse_get(net, &udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n",
 		   sock_prot_inuse_get(net, &raw_prot));
-	seq_printf(seq,  "FRAG: inuse %u memory %u\n",
+	seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
 		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
 		   frag_mem_limit(&net->ipv4.frags));
 	return 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 54ce1d2a9a9d..6613f81e553a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
@@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_low_thresh",
 		.data		= &init_net.nf_frag.frags.low_thresh,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.nf_frag.frags.high_thresh
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
 		.data		= &init_net.nf_frag.frags.high_thresh,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.nf_frag.frags.low_thresh
 	},
 	{ }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 5704ec3d3178..dc04c024986c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 			sock_prot_inuse_get(net, &udplitev6_prot));
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(net, &rawv6_prot));
-	seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+	seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
 		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
 		   frag_mem_limit(&net->ipv6.frags));
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2a77fda5e3bc..905a8aee2671 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -552,15 +552,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ip6frag_high_thresh",
 		.data		= &init_net.ipv6.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ipv6.frags.low_thresh
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
 		.data		= &init_net.ipv6.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
-- 
2.18.0

  parent reply	other threads:[~2018-09-13 20:09 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-13 14:58 [PATCH v3 00/30] backport of IP fragmentation fixes Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 01/30] inet: frags: change inet_frags_init_net() return value Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 02/30] inet: frags: add a pointer to struct netns_frags Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 03/30] inet: frags: refactor ipfrag_init() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 04/30] inet: frags: Convert timers to use timer_setup() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 05/30] inet: frags: refactor ipv6_frag_init() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 06/30] inet: frags: refactor lowpan_net_frag_init() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 07/30] ipv6: export ip6 fragments sysctl to unprivileged users Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 08/30] rhashtable: add schedule points Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 09/30] inet: frags: use rhashtables for reassembly units Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 10/30] inet: frags: remove some helpers Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 11/30] inet: frags: get rif of inet_frag_evicting() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 12/30] inet: frags: remove inet_frag_maybe_warn_overflow() Stephen Hemminger
2018-09-13 14:58 ` Stephen Hemminger [this message]
2018-09-13 14:58 ` [PATCH v3 14/30] inet: frags: do not clone skb in ip_expire() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 15/30] ipv6: frags: rewrite ip6_expire_frag_queue() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 16/30] rhashtable: reorganize struct rhashtable layout Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 17/30] inet: frags: reorganize struct netns_frags Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 18/30] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 19/30] inet: frags: fix ip6frag_low_thresh boundary Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 20/30] ip: discard IPv4 datagrams with overlapping segments Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 21/30] net: speed up skb_rbtree_purge() Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 22/30] net: modify skb_rbtree_purge to return the truesize of all purged skbs Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 23/30] ipv6: defrag: drop non-last frags smaller than min mtu Stephen Hemminger
2019-01-10 19:30   ` Tom Herbert
2019-01-10 22:22     ` Florian Westphal
2019-01-11 10:57       ` Eric Dumazet
2019-01-11 12:21         ` Michal Kubecek
2019-01-11 12:27           ` Eric Dumazet
2019-01-11 12:52             ` Michal Kubecek
2019-01-11 13:07               ` Eric Dumazet
     [not found]                 ` <CAOSSMjUODMbBuW=GgwcEt6avKoyYD5A9CzdBtE6NR6dz4pnD6w@mail.gmail.com>
2019-01-11 14:09                   ` Eric Dumazet
2019-01-11 14:21                   ` Michal Kubecek
     [not found]                     ` <CAOSSMjVMVWxzkT5M2LHgf0+GPHdaWHV01a6mBqbGRVXOaQ04PQ@mail.gmail.com>
2019-01-11 17:09                       ` Peter Oskolkov
2019-01-11 18:10                         ` Michal Kubecek
2019-01-12  3:21                           ` Tom Herbert
2018-09-13 14:58 ` [PATCH v3 24/30] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 25/30] net: add rb_to_skb() and other rb tree helpers Stephen Hemminger
2018-09-13 14:58 ` [PATCH v3 26/30] net: sk_buff rbnode reorg Stephen Hemminger
2018-10-18 16:01   ` Christoph Paasch
2018-09-13 14:58 ` [PATCH v3 27/30] ipv4: frags: precedence bug in ip_expire() Stephen Hemminger
2018-09-13 14:59 ` [PATCH v3 28/30] ip: add helpers to process in-order fragments faster Stephen Hemminger
2018-09-13 14:59 ` [PATCH v3 29/30] ip: process in-order fragments efficiently Stephen Hemminger
2018-09-13 14:59 ` [PATCH v3 30/30] ip: frags: fix crash in ip_do_fragment() Stephen Hemminger
2018-09-17 12:47 ` [PATCH v3 00/30] backport of IP fragmentation fixes Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180913145902.17531-14-sthemmin@microsoft.com \
    --to=stephen@networkplumber.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=netdev@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.