From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Willem de Bruijn <willemb@google.com>,
Peter Oskolkov <posk@google.com>,
Eric Dumazet <edumazet@google.com>,
Florian Westphal <fw@strlen.de>,
"David S. Miller" <davem@davemloft.net>,
Mao Wenan <maowenan@huawei.com>,
Ben Hutchings <ben.hutchings@codethink.co.uk>
Subject: [PATCH 4.4 27/34] ip: add helpers to process in-order fragments faster.
Date: Thu, 7 Feb 2019 12:42:09 +0100 [thread overview]
Message-ID: <20190207113026.617948238@linuxfoundation.org> (raw)
In-Reply-To: <20190207113025.552605181@linuxfoundation.org>
4.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Peter Oskolkov <posk@google.com>
commit 353c9cb360874e737fb000545f783df756c06f9a upstream.
This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.
The new logic (fully implemented in the second patch) is as follows:
* Nodes in the rb-tree will now contain not single fragments, but lists
of consecutive fragments ("runs").
* At each point in time, the current "active" run at the tail is
maintained/tracked. Fragments that arrive in-order, adjacent
to the previous tail fragment, are added to this tail run without
triggering the re-balancing of the rb-tree.
* If a fragment arrives out of order with the offset _before_ the tail run,
it is inserted into the rb-tree as a single fragment.
* If a fragment arrives after the current tail fragment (with a gap),
it starts a new "tail" run, as is inserted into the rb-tree
at the end as the head of the new run.
skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/inet_frag.h | 6 +++
net/ipv4/ip_fragment.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -55,7 +55,9 @@ struct frag_v6_compare_key {
* @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
@@ -76,6 +78,7 @@ struct inet_frag_queue {
struct sk_buff *fragments; /* Used in IPv6. */
struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
@@ -112,6 +115,9 @@ void inet_frag_kill(struct inet_frag_que
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -58,6 +58,57 @@
static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+ struct inet_skb_parm h;
+ struct sk_buff *next_frag;
+ int frag_run_len;
+};
+
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ ip4_frag_init_run(skb);
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
+
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -658,6 +709,28 @@ struct sk_buff *ip_check_defrag(struct n
}
EXPORT_SYMBOL(ip_check_defrag);
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
#ifdef CONFIG_SYSCTL
static int dist_min;
next prev parent reply other threads:[~2019-02-07 11:43 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-07 11:41 [PATCH 4.4 00/34] 4.4.174-stable review Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 01/34] inet: frags: change inet_frags_init_net() return value Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 02/34] inet: frags: add a pointer to struct netns_frags Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 03/34] inet: frags: refactor ipfrag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 04/34] inet: frags: refactor ipv6_frag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 05/34] inet: frags: refactor lowpan_net_frag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 06/34] rhashtable: add rhashtable_lookup_get_insert_key() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 07/34] rhashtable: Add rhashtable_lookup() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 08/34] rhashtable: add schedule points Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 09/34] inet: frags: use rhashtables for reassembly units Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 10/34] net: ieee802154: 6lowpan: fix frag reassembly Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 11/34] ipfrag: really prevent allocation on netns exit Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 12/34] inet: frags: remove some helpers Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 13/34] inet: frags: get rif of inet_frag_evicting() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 14/34] inet: frags: remove inet_frag_maybe_warn_overflow() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 15/34] inet: frags: break the 2GB limit for frags storage Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 16/34] inet: frags: do not clone skb in ip_expire() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 17/34] ipv6: frags: rewrite ip6_expire_frag_queue() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 18/34] rhashtable: reorganize struct rhashtable layout Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 19/34] inet: frags: reorganize struct netns_frags Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 20/34] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 21/34] inet: frags: fix ip6frag_low_thresh boundary Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 22/34] ip: discard IPv4 datagrams with overlapping segments Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 23/34] net: modify skb_rbtree_purge to return the truesize of all purged skbs Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 24/34] ipv6: defrag: drop non-last frags smaller than min mtu Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 25/34] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 26/34] ip: use rb trees for IP frag queue Greg Kroah-Hartman
2019-02-07 11:42 ` Greg Kroah-Hartman [this message]
2019-02-07 11:42 ` [PATCH 4.4 28/34] ip: process in-order fragments efficiently Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 29/34] ip: frags: fix crash in ip_do_fragment() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 30/34] ipv4: frags: precedence bug in ip_expire() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 31/34] inet: frags: better deal with smp races Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 32/34] net: fix pskb_trim_rcsum_slow() with odd trim offset Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 33/34] net: ipv4: do not handle duplicate fragments as overlapping Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 34/34] rcu: Force boolean subscript for expedited stall warnings Greg Kroah-Hartman
2019-02-07 14:20 ` [PATCH 4.4 00/34] 4.4.174-stable review Guenter Roeck
2019-02-07 14:41 ` Guenter Roeck
2019-02-07 15:46 ` Greg Kroah-Hartman
2019-02-07 18:57 ` Guenter Roeck
2019-02-07 15:47 ` Greg Kroah-Hartman
2019-02-07 19:16 ` Guenter Roeck
2019-02-07 18:18 ` kernelci.org bot
2019-02-08 6:13 ` Naresh Kamboju
2019-02-08 6:46 ` Greg Kroah-Hartman
2019-02-08 10:03 ` Jon Hunter
2019-02-08 10:03 ` Jon Hunter
2019-02-08 10:28 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190207113026.617948238@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=ben.hutchings@codethink.co.uk \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=maowenan@huawei.com \
--cc=posk@google.com \
--cc=stable@vger.kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.