stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Willem de Bruijn <willemb@google.com>,
	Peter Oskolkov <posk@google.com>,
	Eric Dumazet <edumazet@google.com>,
	Florian Westphal <fw@strlen.de>,
	"David S. Miller" <davem@davemloft.net>,
	Mao Wenan <maowenan@huawei.com>
Subject: [PATCH 4.4 61/65] ip: add helpers to process in-order fragments faster.
Date: Mon,  4 Feb 2019 11:36:54 +0100	[thread overview]
Message-ID: <20190204103620.364941364@linuxfoundation.org> (raw)
In-Reply-To: <20190204103610.583715954@linuxfoundation.org>

4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Peter Oskolkov <posk@google.com>

commit 353c9cb360874e737fb000545f783df756c06f9a upstream.

This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.

The new logic (fully implemented in the second patch) is as follows:

* Nodes in the rb-tree will now contain not single fragments, but lists
  of consecutive fragments ("runs").

* At each point in time, the current "active" run at the tail is
  maintained/tracked. Fragments that arrive in-order, adjacent
  to the previous tail fragment, are added to this tail run without
  triggering the re-balancing of the rb-tree.

* If a fragment arrives out of order with the offset _before_ the tail run,
  it is inserted into the rb-tree as a single fragment.

* If a fragment arrives after the current tail fragment (with a gap),
  it starts a new "tail" run, as is inserted into the rb-tree
  at the end as the head of the new run.

skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).

Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet_frag.h |    4 ++
 net/ipv4/ip_fragment.c  |   74 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 74 insertions(+), 4 deletions(-)

--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -48,6 +48,7 @@ struct inet_frag_queue {
 	struct sk_buff		*fragments;  /* Used in IPv6. */
 	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
+	struct sk_buff		*last_run_head;
 	ktime_t			stamp;
 	int			len;
 	int			meat;
@@ -118,6 +119,9 @@ struct inet_frag_queue *inet_frag_find(s
 void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 				   const char *prefix);
 
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
 static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
 {
 	if (atomic_dec_and_test(&q->refcnt))
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -58,13 +58,57 @@
 static int sysctl_ipfrag_max_dist __read_mostly = 64;
 static const char ip_frag_cache_name[] = "ip4-frags";
 
-struct ipfrag_skb_cb
-{
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
 	struct inet_skb_parm	h;
-	int			offset;
+	int                     offset;
+	struct sk_buff		*next_frag;
+	int			frag_run_len;
 };
 
-#define FRAG_CB(skb)	((struct ipfrag_skb_cb *)((skb)->cb))
+#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+	FRAG_CB(skb)->next_frag = NULL;
+	FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+					struct sk_buff *skb)
+{
+	RB_CLEAR_NODE(&skb->rbnode);
+	FRAG_CB(skb)->next_frag = NULL;
+
+	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+	FRAG_CB(q->fragments_tail)->next_frag = skb;
+	q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+	if (q->last_run_head)
+		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+			     &q->last_run_head->rbnode.rb_right);
+	else
+		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+	rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+	ip4_frag_init_run(skb);
+	q->fragments_tail = skb;
+	q->last_run_head = skb;
+}
 
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
@@ -721,6 +765,28 @@ struct sk_buff *ip_check_defrag(struct n
 }
 EXPORT_SYMBOL(ip_check_defrag);
 
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
+
+	while (p) {
+		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, root);
+		while (skb) {
+			struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+			sum += skb->truesize;
+			kfree_skb(skb);
+			skb = next;
+		}
+	}
+	return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
 #ifdef CONFIG_SYSCTL
 static int zero;
 



  parent reply	other threads:[~2019-02-04 11:07 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 10:35 [PATCH 4.4 00/65] 4.4.173-stable review Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 01/65] net: Fix usage of pskb_trim_rcsum Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 02/65] openvswitch: Avoid OOB read when parsing flow nlattrs Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 03/65] net: ipv4: Fix memory leak in network namespace dismantle Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 04/65] net_sched: refetch skb protocol for each filter Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 05/65] net: bridge: Fix ethernet header pointer before check skb forwardable Greg Kroah-Hartman
2019-02-04 10:35 ` [PATCH 4.4 06/65] mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS Greg Kroah-Hartman
2019-02-04 11:05   ` Georgi Djakov
2019-02-04 11:13     ` Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 07/65] USB: serial: simple: add Motorola Tetra TPG2200 device id Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 08/65] USB: serial: pl2303: add new PID to support PL2303TB Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 09/65] ASoC: atom: fix a missing check of snd_pcm_lib_malloc_pages Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 10/65] ARC: perf: map generic branches to correct hardware condition Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 11/65] s390/early: improve machine detection Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 12/65] s390/smp: fix CPU hotplug deadlock with CPU rescan Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 13/65] char/mwave: fix potential Spectre v1 vulnerability Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 14/65] staging: rtl8188eu: Add device code for D-Link DWA-121 rev B1 Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 15/65] tty: Handle problem if line discipline does not have receive_buf Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 16/65] tty/n_hdlc: fix __might_sleep warning Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 17/65] CIFS: Fix possible hang during async MTU reads and writes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 18/65] Input: xpad - add support for SteelSeries Stratus Duo Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 19/65] KVM: x86: Fix single-step debugging Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 20/65] x86/kaslr: Fix incorrect i8254 outb() parameters Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 21/65] can: dev: __can_get_echo_skb(): fix bogous check for non-existing skb by removing it Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 22/65] can: bcm: check timer values before ktime conversion Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 23/65] vt: invoke notifier on screen size change Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 24/65] perf unwind: Unwind with libdw doesnt take symfs into account Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 25/65] perf unwind: Take pgoff into account when reporting elf to libdwfl Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 26/65] irqchip/gic-v3-its: Align PCI Multi-MSI allocation on their size Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 27/65] arm64: mm: remove page_mapping check in __sync_icache_dcache Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 28/65] f2fs: read page index before freeing Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 29/65] Revert "loop: Fix double mutex_unlock(&loop_ctl_mutex) in loop_control_ioctl()" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 30/65] Revert "loop: Get rid of loop_index_mutex" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 31/65] Revert "loop: Fold __loop_release into loop_release" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 32/65] s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 33/65] fs: add the fsnotify call to vfs_iter_write Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 34/65] ipv6: Consider sk_bound_dev_if when binding a socket to an address Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 35/65] l2tp: copy 4 more bytes to linear part if necessary Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 36/65] net/mlx4_core: Add masking for a few queries on HCA caps Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 37/65] netrom: switch to sock timer API Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 38/65] net/rose: fix NULL ax25_cb kernel panic Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 39/65] ucc_geth: Reset BQL queue when stopping device Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 40/65] l2tp: remove l2specific_len dependency in l2tp_core Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 41/65] l2tp: fix reading optional fields of L2TPv3 Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 42/65] CIFS: Do not count -ENODATA as failure for query directory Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 43/65] fs/dcache: Fix incorrect nr_dentry_unused accounting in shrink_dcache_sb() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 44/65] ARM: cns3xxx: Fix writing to wrong PCI config registers after alignment Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 45/65] arm64: hyp-stub: Forbid kprobing of the hyp-stub Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 46/65] gfs2: Revert "Fix loop in gfs2_rbm_find" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 47/65] platform/x86: asus-nb-wmi: Map 0x35 to KEY_SCREENLOCK Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 48/65] platform/x86: asus-nb-wmi: Drop mapping of 0x33 and 0x34 scan codes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 49/65] mmc: sdhci-iproc: handle mmc_of_parse() errors during probe Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 50/65] kernel/exit.c: release ptraced tasks before zap_pid_ns_processes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 51/65] mm, oom: fix use-after-free in oom_kill_process Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 52/65] cifs: Always resolve hostname before reconnecting Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 53/65] drivers: core: Remove glue dirs from sysfs earlier Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 54/65] mm: migrate: dont rely on __PageMovable() of newpage after unlocking it Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 55/65] fs: dont scan the inode cache before SB_BORN is set Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 56/65] ip: discard IPv4 datagrams with overlapping segments Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 57/65] net: modify skb_rbtree_purge to return the truesize of all purged skbs Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 58/65] inet: frags: get rif of inet_frag_evicting() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 59/65] ip: use rb trees for IP frag queue Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 60/65] ipv6: defrag: drop non-last frags smaller than min mtu Greg Kroah-Hartman
2019-02-04 10:36 ` Greg Kroah-Hartman [this message]
2019-02-04 10:36 ` [PATCH 4.4 62/65] ip: process in-order fragments efficiently Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 63/65] net: ipv4: do not handle duplicate fragments as overlapping Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 64/65] ip: frags: fix crash in ip_do_fragment() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.4 65/65] ipv4: frags: precedence bug in ip_expire() Greg Kroah-Hartman
2019-02-04 22:48 ` [PATCH 4.4 00/65] 4.4.173-stable review Guenter Roeck
2019-02-05 14:42   ` Greg Kroah-Hartman
2019-02-05 15:12     ` Guenter Roeck
2019-02-05  6:24 ` Naresh Kamboju
2019-02-05 10:17 ` Jon Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204103620.364941364@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maowenan@huawei.com \
    --cc=posk@google.com \
    --cc=stable@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).