stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Hannes Frederic Sowa <hannes@stressinduktion.org>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Jesper Dangaard Brouer <jbrouer@redhat.com>,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [ 14/72] inet: limit length of fragment queue hash table bucket lists
Date: Tue, 26 Mar 2013 15:50:57 -0700	[thread overview]
Message-ID: <20130326224921.206950724@linuxfoundation.org> (raw)
In-Reply-To: <20130326224919.675227837@linuxfoundation.org>

3.4-stable review patch.  If anyone has any objections, please let me know.

------------------


From: Hannes Frederic Sowa <hannes@stressinduktion.org>

[ Upstream commit 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 ]

This patch introduces a constant limit of the fragment queue hash
table bucket list lengths. Currently the limit 128 is choosen somewhat
arbitrary and just ensures that we can fill up the fragment cache with
empty packets up to the default ip_frag_high_thresh limits. It should
just protect from list iteration eating considerable amounts of cpu.

If we reach the maximum length in one hash bucket a warning is printed.
This is implemented on the caller side of inet_frag_find to distinguish
between the different users of inet_fragment.c.

I dropped the out of memory warning in the ipv4 fragment lookup path,
because we already get a warning by the slab allocator.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jesper Dangaard Brouer <jbrouer@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet_frag.h                 |    9 +++++++++
 net/ipv4/inet_fragment.c                |   20 +++++++++++++++++++-
 net/ipv4/ip_fragment.c                  |   11 ++++-------
 net/ipv6/netfilter/nf_conntrack_reasm.c |   11 ++++++-----
 net/ipv6/reassembly.c                   |    8 ++++++--
 5 files changed, 44 insertions(+), 15 deletions(-)

--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -33,6 +33,13 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+/* averaged:
+ * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
+ *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
+ *	       struct frag_queue))
+ */
+#define INETFRAGS_MAXDEPTH		128
+
 struct inet_frags {
 	struct hlist_head	hash[INETFRAGS_HASHSZ];
 	rwlock_t		lock;
@@ -64,6 +71,8 @@ int inet_frag_evictor(struct netns_frags
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock);
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix);
 
 static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
 {
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -21,6 +21,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
+#include <net/sock.h>
 #include <net/inet_frag.h>
 
 static void inet_frag_secret_rebuild(unsigned long dummy)
@@ -271,6 +272,7 @@ struct inet_frag_queue *inet_frag_find(s
 {
 	struct inet_frag_queue *q;
 	struct hlist_node *n;
+	int depth = 0;
 
 	hlist_for_each_entry(q, n, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
@@ -278,9 +280,25 @@ struct inet_frag_queue *inet_frag_find(s
 			read_unlock(&f->lock);
 			return q;
 		}
+		depth++;
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key);
+	if (depth <= INETFRAGS_MAXDEPTH)
+		return inet_frag_create(nf, f, key);
+	else
+		return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix)
+{
+	static const char msg[] = "inet_frag_find: Fragment hash bucket"
+		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+		". Dropping fragment.\n";
+
+	if (PTR_ERR(q) == -ENOBUFS)
+		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+}
+EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -295,14 +295,11 @@ static inline struct ipq *ip_find(struct
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
 	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
-	if (q == NULL)
-		goto out_nomem;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct ipq, q);
-
-out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
-	return NULL;
 }
 
 /* Is the fragment too far ahead to be part of ipq? */
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,6 +14,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -176,13 +178,12 @@ fq_find(__be32 id, u32 user, struct in6_
 
 	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
 	local_bh_enable();
-	if (q == NULL)
-		goto oom;
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 
 	return container_of(q, struct nf_ct_frag6_queue, q);
-
-oom:
-	return NULL;
 }
 
 
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -26,6 +26,9 @@
  *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
  *				calculate ICV correctly.
  */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -240,9 +243,10 @@ fq_find(struct net *net, __be32 id, cons
 	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
-	if (q == NULL)
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
-
+	}
 	return container_of(q, struct frag_queue, q);
 }
 



  parent reply	other threads:[~2013-03-26 22:50 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-26 22:50 [ 00/72] 3.4.38-stable review Greg Kroah-Hartman
2013-03-26 22:50 ` [ 01/72] Revert "USB: EHCI: dont check DMA values in QH overlays" Greg Kroah-Hartman
2013-03-26 22:50 ` [ 02/72] sunsu: Fix panic in case of nonexistent port at "console=ttySY" cmdline option Greg Kroah-Hartman
2013-03-26 22:50 ` [ 03/72] net/ipv4: Ensure that location of timestamp option is stored Greg Kroah-Hartman
2013-03-26 22:50 ` [ 04/72] netconsole: dont call __netpoll_cleanup() while atomic Greg Kroah-Hartman
2013-03-26 22:50 ` [ 05/72] bonding: dont call update_speed_duplex() under spinlocks Greg Kroah-Hartman
2013-03-26 22:50 ` [ 06/72] tg3: 5715 does not link up when autoneg off Greg Kroah-Hartman
2013-03-26 22:50 ` [ 07/72] sctp: Use correct sideffect command in duplicate cookie handling Greg Kroah-Hartman
2013-03-26 22:50 ` [ 08/72] sctp: dont break the loop while meeting the active_path so as to find the matched transport Greg Kroah-Hartman
2013-03-26 22:50 ` [ 09/72] ipv4: fix definition of FIB_TABLE_HASHSZ Greg Kroah-Hartman
2013-03-26 22:50 ` [ 10/72] tcp: fix skb_availroom() Greg Kroah-Hartman
2013-03-26 22:50 ` [ 11/72] rtnetlink: Mask the rta_type when range checking Greg Kroah-Hartman
2013-03-26 22:50 ` [ 12/72] vhost/net: fix heads usage of ubuf_info Greg Kroah-Hartman
2013-03-26 22:50 ` [ 13/72] bnx2x: fix occasional statistics off-by-4GB error Greg Kroah-Hartman
2013-03-26 22:50 ` Greg Kroah-Hartman [this message]
2013-03-26 22:50 ` [ 15/72] sfc: Do not attempt to flush queues if DMA is disabled Greg Kroah-Hartman
2013-03-26 22:50 ` [ 16/72] sfc: Convert firmware subtypes to native byte order in efx_mcdi_get_board_cfg() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 17/72] sfc: Add parentheses around use of bitfield macro arguments Greg Kroah-Hartman
2013-03-26 22:51 ` [ 18/72] sfc: Fix MCDI structure field lookup Greg Kroah-Hartman
2013-03-26 22:51 ` [ 19/72] sfc: Really disable flow control while flushing Greg Kroah-Hartman
2013-03-26 22:51 ` [ 20/72] sfc: Work-around flush timeout when flushes have completed Greg Kroah-Hartman
2013-03-26 22:51 ` [ 21/72] sfc: lock TX queues when calling netif_device_detach() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 22/72] sfc: Fix timekeeping in efx_mcdi_poll() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 23/72] sfc: Disable VF queues during register self-test Greg Kroah-Hartman
2013-03-26 22:51 ` [ 24/72] sfc: Avoid generating over-length MC_CMD_FLUSH_RX_QUEUES request Greg Kroah-Hartman
2013-03-26 22:51 ` [ 25/72] sfc: Correctly initialise reset_method in siena_test_chip() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 26/72] sfc: Properly sync RX DMA buffer when it is not the last in the page Greg Kroah-Hartman
2013-03-26 22:51 ` [ 27/72] sfc: Fix efx_rx_buf_offset() in the presence of swiotlb Greg Kroah-Hartman
2013-03-26 22:51 ` [ 28/72] sfc: Detach net device when stopping queues for reconfiguration Greg Kroah-Hartman
2013-03-26 22:51 ` [ 29/72] sfc: Disable soft interrupt handling during efx_device_detach_sync() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 30/72] sfc: Only use TX push if a single descriptor is to be written Greg Kroah-Hartman
2013-03-26 22:51 ` [ 31/72] ALSA: hda/cirrus - Fix the digital beep registration Greg Kroah-Hartman
2013-03-26 22:51 ` [ 32/72] ALSA: hda - Fix typo in checking IEC958 emphasis bit Greg Kroah-Hartman
2013-03-26 22:51 ` [ 33/72] ALSA: snd-usb: mixer: propagate errors up the call chain Greg Kroah-Hartman
2013-03-26 22:51 ` [ 34/72] ALSA: snd-usb: mixer: ignore -EINVAL in snd_usb_mixer_controls() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 35/72] drm/i915: restrict kernel address leak in debugfs Greg Kroah-Hartman
2013-03-26 22:51 ` [ 36/72] tracing: Fix race in snapshot swapping Greg Kroah-Hartman
2013-03-26 22:51 ` [ 37/72] tracing: Fix free of probe entry by calling call_rcu_sched() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 38/72] rtlwifi: rtl8192cu: Fix schedule while atomic bug splat Greg Kroah-Hartman
2013-03-26 22:51 ` [ 39/72] rtlwifi: rtl8192cu: Fix problem that prevents reassociation Greg Kroah-Hartman
2013-03-26 22:51 ` [ 40/72] mwifiex: fix potential out-of-boundary access to ibss rate table Greg Kroah-Hartman
2013-03-26 22:51 ` [ 41/72] drm/i915: bounds check execbuffer relocation count Greg Kroah-Hartman
2013-03-26 22:51 ` [ 42/72] KMS: fix EDID detailed timing vsync parsing Greg Kroah-Hartman
2013-03-26 22:51 ` [ 43/72] KMS: fix EDID detailed timing frame rate Greg Kroah-Hartman
2013-03-26 22:51 ` [ 44/72] mm/hugetlb: fix total hugetlbfs pages count when using memory overcommit accouting Greg Kroah-Hartman
2013-03-26 22:51 ` [ 45/72] target/iscsi: Fix mutual CHAP auth on big-endian arches Greg Kroah-Hartman
2013-03-26 22:51 ` [ 46/72] drm/radeon: add Richland pci ids Greg Kroah-Hartman
2013-03-26 22:51 ` [ 47/72] drm/radeon: add support for Richland APUs Greg Kroah-Hartman
2013-03-26 22:51 ` [ 48/72] drm/radeon/benchmark: make sure bo blit copy exists before using it Greg Kroah-Hartman
2013-03-26 22:51 ` [ 49/72] cifs: ignore everything in SPNEGO blob after mechTypes Greg Kroah-Hartman
2013-03-26 22:51 ` [ 50/72] jbd2: fix use after free in jbd2_journal_dirty_metadata() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 51/72] ext4: fix the wrong number of the allocated blocks in ext4_split_extent() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 52/72] usb-storage: add unusual_devs entry for Samsung YP-Z3 mp3 player Greg Kroah-Hartman
2013-03-26 22:51 ` [ 53/72] ext4: fix data=journal fast mount/umount hang Greg Kroah-Hartman
2013-03-26 22:51 ` [ 54/72] IPoIB: Fix send lockup due to missed TX completion Greg Kroah-Hartman
2013-03-26 22:51 ` [ 55/72] clockevents: Dont allow dummy broadcast timers Greg Kroah-Hartman
2013-03-26 22:51 ` [ 56/72] x86-64: Fix the failure case in copy_user_handle_tail() Greg Kroah-Hartman
2013-03-26 22:51 ` [ 57/72] USB: xhci - fix bit definitions for IMAN register Greg Kroah-Hartman
2013-03-26 22:51 ` [ 58/72] USB: xhci: correctly enable interrupts Greg Kroah-Hartman
2013-03-26 22:51 ` [ 59/72] USB: cdc-acm: fix device unregistration Greg Kroah-Hartman
2013-03-26 22:51 ` [ 60/72] USB: serial: fix interface refcounting Greg Kroah-Hartman
2013-03-26 22:51 ` [ 61/72] nohz: Make tick_nohz_irq_exit() irq safe Greg Kroah-Hartman
2013-03-26 22:51 ` [ 62/72] udf: Fix bitmap overflow on large filesystems with small block size Greg Kroah-Hartman
2013-03-26 22:51 ` [ 63/72] USB: garmin_gps: fix memory leak on disconnect Greg Kroah-Hartman
2013-03-26 22:51 ` [ 64/72] USB: io_ti: fix get_icount for two port adapters Greg Kroah-Hartman
2013-03-26 22:51 ` [ 65/72] key: Fix resource leak Greg Kroah-Hartman
2013-03-26 22:51 ` [ 66/72] isofs: avoid info leak on export Greg Kroah-Hartman
2013-03-26 22:51 ` [ 67/72] udf: " Greg Kroah-Hartman
2013-03-26 22:51 ` [ 68/72] tools: hv: Netlink source address validation allows DoS Greg Kroah-Hartman
2013-03-26 22:51 ` [ 69/72] i915: initialize CADL in opregion Greg Kroah-Hartman
2013-03-26 22:51 ` [ 70/72] exec: use -ELOOP for max recursion depth Greg Kroah-Hartman
2013-03-26 22:51 ` [ 71/72] rt2x00: error in configurations with mesh support disabled Greg Kroah-Hartman
2013-03-26 22:51 ` [ 72/72] asus-laptop: Do not call HWRS on init Greg Kroah-Hartman
2013-03-27 18:33 ` [ 00/72] 3.4.38-stable review Shuah Khan
2013-03-28 14:17 ` Satoru Takeuchi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130326224921.206950724@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=eric.dumazet@gmail.com \
    --cc=hannes@stressinduktion.org \
    --cc=jbrouer@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).