From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Peter Oskolkov <posk@google.com>,
Eric Dumazet <edumazet@google.com>,
Florian Westphal <fw@strlen.de>,
Tom Herbert <tom@herbertland.com>,
"David S. Miller" <davem@davemloft.net>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.19 25/96] net: IP defrag: encapsulate rbtree defrag code into callable functions
Date: Wed, 24 Apr 2019 19:09:30 +0200 [thread overview]
Message-ID: <20190424170921.542276375@linuxfoundation.org> (raw)
In-Reply-To: <20190424170919.829037226@linuxfoundation.org>
[ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ]
This is a refactoring patch: without changing runtime behavior,
it moves rbtree-related code from IPv4-specific files/functions
into .h/.c defrag files shared with IPv6 defragmentation code.
v2: make handling of overlapping packets match upstream.
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
include/net/inet_frag.h | 16 ++-
net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++
net/ipv4/ip_fragment.c | 302 +++++----------------------------------
3 files changed, 342 insertions(+), 269 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 1662cbc0b46b..b02bf737d019 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -77,8 +77,8 @@ struct inet_frag_queue {
struct timer_list timer;
spinlock_t lock;
refcount_t refcnt;
- struct sk_buff *fragments; /* Used in IPv6. */
- struct rb_root rb_fragments; /* Used in IPv4. */
+ struct sk_buff *fragments; /* used in 6lopwpan IPv6. */
+ struct rb_root rb_fragments; /* Used in IPv4/IPv6. */
struct sk_buff *fragments_tail;
struct sk_buff *last_run_head;
ktime_t stamp;
@@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
extern const u8 ip_frag_ecn_table[16];
+/* Return values of inet_frag_queue_insert() */
+#define IPFRAG_OK 0
+#define IPFRAG_DUP 1
+#define IPFRAG_OVERLAP 2
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ int offset, int end);
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ struct sk_buff *parent);
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ void *reasm_data);
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
+
#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 760a9e52e02b..9f69411251d0 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,62 @@
#include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+ union {
+ struct inet_skb_parm h4;
+ struct inet6_skb_parm h6;
+ };
+ struct sk_buff *next_frag;
+ int frag_run_len;
+};
+
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void fragcb_clear(struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void fragrun_append_to_last(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ fragcb_clear(skb);
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+ fragcb_clear(skb);
+
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
@@ -123,6 +179,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
kmem_cache_free(f->frags_cachep, q);
}
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
@@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
return fq;
}
EXPORT_SYMBOL(inet_frag_find);
+
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ int offset, int end)
+{
+ struct sk_buff *last = q->fragments_tail;
+
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+ * Duplicates, however, should be ignored (i.e. skb dropped, but the
+ * queue/fragments kept for later reassembly).
+ */
+ if (!last)
+ fragrun_create(q, skb); /* First fragment. */
+ else if (last->ip_defrag_offset + last->len < end) {
+ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+ if (offset < last->ip_defrag_offset + last->len)
+ return IPFRAG_OVERLAP;
+ if (offset == last->ip_defrag_offset + last->len)
+ fragrun_append_to_last(q, skb);
+ else
+ fragrun_create(q, skb);
+ } else {
+ /* Binary search. Note that skb can become the first fragment,
+ * but not the last (covered above).
+ */
+ struct rb_node **rbn, *parent;
+
+ rbn = &q->rb_fragments.rb_node;
+ do {
+ struct sk_buff *curr;
+ int curr_run_end;
+
+ parent = *rbn;
+ curr = rb_to_skb(parent);
+ curr_run_end = curr->ip_defrag_offset +
+ FRAG_CB(curr)->frag_run_len;
+ if (end <= curr->ip_defrag_offset)
+ rbn = &parent->rb_left;
+ else if (offset >= curr_run_end)
+ rbn = &parent->rb_right;
+ else if (offset >= curr->ip_defrag_offset &&
+ end <= curr_run_end)
+ return IPFRAG_DUP;
+ else
+ return IPFRAG_OVERLAP;
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+ * one of its NULL left/right children. Insert skb.
+ */
+ fragcb_clear(skb);
+ rb_link_node(&skb->rbnode, parent, rbn);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+ }
+
+ skb->ip_defrag_offset = offset;
+
+ return IPFRAG_OK;
+}
+EXPORT_SYMBOL(inet_frag_queue_insert);
+
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ struct sk_buff *parent)
+{
+ struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+ struct sk_buff **nextp;
+ int delta;
+
+ if (head != skb) {
+ fp = skb_clone(skb, GFP_ATOMIC);
+ if (!fp)
+ return NULL;
+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ if (RB_EMPTY_NODE(&skb->rbnode))
+ FRAG_CB(parent)->next_frag = fp;
+ else
+ rb_replace_node(&skb->rbnode, &fp->rbnode,
+ &q->rb_fragments);
+ if (q->fragments_tail == skb)
+ q->fragments_tail = fp;
+ skb_morph(skb, head);
+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &q->rb_fragments);
+ consume_skb(head);
+ head = skb;
+ }
+ WARN_ON(head->ip_defrag_offset != 0);
+
+ delta = -head->truesize;
+
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ return NULL;
+
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(q->net, delta);
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments.
+ */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ return NULL;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->data_len = head->data_len - plen;
+ clone->len = clone->data_len;
+ head->truesize += clone->truesize;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+ add_frag_mem_limit(q->net, clone->truesize);
+ skb_shinfo(head)->frag_list = clone;
+ nextp = &clone->next;
+ } else {
+ nextp = &skb_shinfo(head)->frag_list;
+ }
+
+ return nextp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_prepare);
+
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ void *reasm_data)
+{
+ struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+ struct rb_node *rbn;
+ struct sk_buff *fp;
+
+ skb_push(head, head->data - skb_network_header(head));
+
+ /* Traverse the tree in order, to build frag_list. */
+ fp = FRAG_CB(head)->next_frag;
+ rbn = rb_next(&head->rbnode);
+ rb_erase(&head->rbnode, &q->rb_fragments);
+ while (rbn || fp) {
+ /* fp points to the next sk_buff in the current run;
+ * rbn points to the next run.
+ */
+ /* Go through the current run. */
+ while (fp) {
+ *nextp = fp;
+ nextp = &fp->next;
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ fp->sk = NULL;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ fp = FRAG_CB(fp)->next_frag;
+ }
+ /* Move to the next run. */
+ if (rbn) {
+ struct rb_node *rbnext = rb_next(rbn);
+
+ fp = rb_to_skb(rbn);
+ rb_erase(rbn, &q->rb_fragments);
+ rbn = rbnext;
+ }
+ }
+ sub_frag_mem_limit(q->net, head->truesize);
+
+ *nextp = NULL;
+ skb_mark_not_on_list(head);
+ head->prev = NULL;
+ head->tstamp = q->stamp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_finish);
+
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
+{
+ struct sk_buff *head;
+
+ if (q->fragments) {
+ head = q->fragments;
+ q->fragments = head->next;
+ } else {
+ struct sk_buff *skb;
+
+ head = skb_rb_first(&q->rb_fragments);
+ if (!head)
+ return NULL;
+ skb = FRAG_CB(head)->next_frag;
+ if (skb)
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &q->rb_fragments);
+ else
+ rb_erase(&head->rbnode, &q->rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
+ }
+ if (head == q->fragments_tail)
+ q->fragments_tail = NULL;
+
+ sub_frag_mem_limit(q->net, head->truesize);
+
+ return head;
+}
+EXPORT_SYMBOL(inet_frag_pull_head);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d95b32af4a0e..5a1d39e32196 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,57 +57,6 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-/* Use skb->cb to track consecutive/adjacent fragments coming at
- * the end of the queue. Nodes in the rb-tree queue will
- * contain "runs" of one or more adjacent fragments.
- *
- * Invariants:
- * - next_frag is NULL at the tail of a "run";
- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
- */
-struct ipfrag_skb_cb {
- struct inet_skb_parm h;
- struct sk_buff *next_frag;
- int frag_run_len;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
-static void ip4_frag_init_run(struct sk_buff *skb)
-{
- BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
-
- FRAG_CB(skb)->next_frag = NULL;
- FRAG_CB(skb)->frag_run_len = skb->len;
-}
-
-/* Append skb to the last "run". */
-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
- struct sk_buff *skb)
-{
- RB_CLEAR_NODE(&skb->rbnode);
- FRAG_CB(skb)->next_frag = NULL;
-
- FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
- FRAG_CB(q->fragments_tail)->next_frag = skb;
- q->fragments_tail = skb;
-}
-
-/* Create a new "run" with the skb. */
-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
-{
- if (q->last_run_head)
- rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
- &q->last_run_head->rbnode.rb_right);
- else
- rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
- rb_insert_color(&skb->rbnode, &q->rb_fragments);
-
- ip4_frag_init_run(skb);
- q->fragments_tail = skb;
- q->last_run_head = skb;
-}
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
* pull the head out of the tree in order to be able to
* deal with head->dev.
*/
- if (qp->q.fragments) {
- head = qp->q.fragments;
- qp->q.fragments = head->next;
- } else {
- head = skb_rb_first(&qp->q.rb_fragments);
- if (!head)
- goto out;
- if (FRAG_CB(head)->next_frag)
- rb_replace_node(&head->rbnode,
- &FRAG_CB(head)->next_frag->rbnode,
- &qp->q.rb_fragments);
- else
- rb_erase(&head->rbnode, &qp->q.rb_fragments);
- memset(&head->rbnode, 0, sizeof(head->rbnode));
- barrier();
- }
- if (head == qp->q.fragments_tail)
- qp->q.fragments_tail = NULL;
-
- sub_frag_mem_limit(qp->q.net, head->truesize);
-
+ head = inet_frag_pull_head(&qp->q);
+ if (!head)
+ goto out;
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
goto out;
@@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp)
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
- struct rb_node **rbn, *parent;
- struct sk_buff *skb1, *prev_tail;
- int ihl, end, skb1_run_end;
+ int ihl, end, flags, offset;
+ struct sk_buff *prev_tail;
struct net_device *dev;
unsigned int fragsize;
- int flags, offset;
int err = -ENOENT;
u8 ecn;
@@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
*/
if (end < qp->q.len ||
((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
- goto err;
+ goto discard_qp;
qp->q.flags |= INET_FRAG_LAST_IN;
qp->q.len = end;
} else {
@@ -394,82 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
if (qp->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_qp;
qp->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_qp;
err = -ENOMEM;
if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
- goto err;
+ goto discard_qp;
err = pskb_trim_rcsum(skb, end - offset);
if (err)
- goto err;
+ goto discard_qp;
/* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- *
- * We do the same here for IPv4 (and increment an snmp counter) but
- * we do not want to drop the whole queue in response to a duplicate
- * fragment.
- */
-
- err = -EINVAL;
- /* Find out where to put this fragment. */
prev_tail = qp->q.fragments_tail;
- if (!prev_tail)
- ip4_frag_create_run(&qp->q, skb); /* First fragment. */
- else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
- /* This is the common case: skb goes to the end. */
- /* Detect and discard overlaps. */
- if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
- goto discard_qp;
- if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
- ip4_frag_append_to_last_run(&qp->q, skb);
- else
- ip4_frag_create_run(&qp->q, skb);
- } else {
- /* Binary search. Note that skb can become the first fragment,
- * but not the last (covered above).
- */
- rbn = &qp->q.rb_fragments.rb_node;
- do {
- parent = *rbn;
- skb1 = rb_to_skb(parent);
- skb1_run_end = skb1->ip_defrag_offset +
- FRAG_CB(skb1)->frag_run_len;
- if (end <= skb1->ip_defrag_offset)
- rbn = &parent->rb_left;
- else if (offset >= skb1_run_end)
- rbn = &parent->rb_right;
- else if (offset >= skb1->ip_defrag_offset &&
- end <= skb1_run_end)
- goto err; /* No new data, potential duplicate */
- else
- goto discard_qp; /* Found an overlap */
- } while (*rbn);
- /* Here we have parent properly set, and rbn pointing to
- * one of its NULL left/right children. Insert skb.
- */
- ip4_frag_init_run(skb);
- rb_link_node(&skb->rbnode, parent, rbn);
- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
- }
+ err = inet_frag_queue_insert(&qp->q, skb, offset, end);
+ if (err)
+ goto insert_error;
if (dev)
qp->iif = dev->ifindex;
- skb->ip_defrag_offset = offset;
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
@@ -494,15 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
skb->_skb_refdst = 0UL;
err = ip_frag_reasm(qp, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
+ if (err)
+ inet_frag_kill(&qp->q);
return err;
}
skb_dst_drop(skb);
return -EINPROGRESS;
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
discard_qp:
inet_frag_kill(&qp->q);
- __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
err:
kfree_skb(skb);
return err;
@@ -514,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct iphdr *iph;
- struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
- struct sk_buff **nextp; /* To build frag_list. */
- struct rb_node *rbn;
- int len;
- int ihlen;
- int delta;
- int err;
+ void *reasm_data;
+ int len, err;
u8 ecn;
ipq_kill(qp);
@@ -530,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
err = -EINVAL;
goto out_fail;
}
- /* Make the one we just received the head. */
- if (head != skb) {
- fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- goto out_nomem;
- FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
- if (RB_EMPTY_NODE(&skb->rbnode))
- FRAG_CB(prev_tail)->next_frag = fp;
- else
- rb_replace_node(&skb->rbnode, &fp->rbnode,
- &qp->q.rb_fragments);
- if (qp->q.fragments_tail == skb)
- qp->q.fragments_tail = fp;
- skb_morph(skb, head);
- FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
- rb_replace_node(&head->rbnode, &skb->rbnode,
- &qp->q.rb_fragments);
- consume_skb(head);
- head = skb;
- }
- WARN_ON(head->ip_defrag_offset != 0);
-
- /* Allocate a new buffer for the datagram. */
- ihlen = ip_hdrlen(head);
- len = ihlen + qp->q.len;
+ /* Make the one we just received the head. */
+ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_nomem;
+ len = ip_hdrlen(skb) + qp->q.len;
err = -E2BIG;
if (len > 65535)
goto out_oversize;
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_nomem;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(qp->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_nomem;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->truesize += clone->truesize;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(qp->q.net, clone->truesize);
- skb_shinfo(head)->frag_list = clone;
- nextp = &clone->next;
- } else {
- nextp = &skb_shinfo(head)->frag_list;
- }
+ inet_frag_reasm_finish(&qp->q, skb, reasm_data);
- skb_push(head, head->data - skb_network_header(head));
+ skb->dev = dev;
+ IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
- /* Traverse the tree in order, to build frag_list. */
- fp = FRAG_CB(head)->next_frag;
- rbn = rb_next(&head->rbnode);
- rb_erase(&head->rbnode, &qp->q.rb_fragments);
- while (rbn || fp) {
- /* fp points to the next sk_buff in the current run;
- * rbn points to the next run.
- */
- /* Go through the current run. */
- while (fp) {
- *nextp = fp;
- nextp = &fp->next;
- fp->prev = NULL;
- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
- fp->sk = NULL;
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp = FRAG_CB(fp)->next_frag;
- }
- /* Move to the next run. */
- if (rbn) {
- struct rb_node *rbnext = rb_next(rbn);
-
- fp = rb_to_skb(rbn);
- rb_erase(rbn, &qp->q.rb_fragments);
- rbn = rbnext;
- }
- }
- sub_frag_mem_limit(qp->q.net, head->truesize);
-
- *nextp = NULL;
- head->next = NULL;
- head->prev = NULL;
- head->dev = dev;
- head->tstamp = qp->q.stamp;
- IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
-
- iph = ip_hdr(head);
+ iph = ip_hdr(skb);
iph->tot_len = htons(len);
iph->tos |= ecn;
@@ -653,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
* from one very small df-fragment and one large non-df frag.
*/
if (qp->max_df_size == qp->q.max_size) {
- IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+ IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
iph->frag_off = htons(IP_DF);
} else {
iph->frag_off = 0;
@@ -751,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);
-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
-{
- struct rb_node *p = rb_first(root);
- unsigned int sum = 0;
-
- while (p) {
- struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
-
- p = rb_next(p);
- rb_erase(&skb->rbnode, root);
- while (skb) {
- struct sk_buff *next = FRAG_CB(skb)->next_frag;
-
- sum += skb->truesize;
- kfree_skb(skb);
- skb = next;
- }
- }
- return sum;
-}
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
-
#ifdef CONFIG_SYSCTL
static int dist_min;
--
2.19.1
next prev parent reply other threads:[~2019-04-24 17:49 UTC|newest]
Thread overview: 105+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-24 17:09 [PATCH 4.19 00/96] 4.19.37-stable review Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 01/96] bonding: fix event handling for stacked bonds Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 02/96] failover: allow name change on IFF_UP slave interfaces Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 03/96] net: atm: Fix potential Spectre v1 vulnerabilities Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 04/96] net: bridge: fix per-port af_packet sockets Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 05/96] net: bridge: multicast: use rcu to access port list from br_multicast_start_querier Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 06/96] net: Fix missing meta data in skb with vlan packet Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 07/96] net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 08/96] tcp: tcp_grow_window() needs to respect tcp_space() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 09/96] team: set slave to promisc if team is already in promisc mode Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 10/96] tipc: missing entries in name table of publications Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 11/96] vhost: reject zero size iova range Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 12/96] ipv4: recompile ip options in ipv4_link_failure Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 13/96] ipv4: ensure rcu_read_lock() in ipv4_link_failure() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 14/96] net: thunderx: raise XDP MTU to 1508 Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 15/96] net: thunderx: dont allow jumbo frames with XDP Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 16/96] net/mlx5: FPGA, tls, hold rcu read lock a bit longer Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 17/96] net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 18/96] net/mlx5: FPGA, tls, idr remove on flow delete Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 19/96] route: Avoid crash from dereferencing NULL rt->from Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 20/96] sch_cake: Use tc_skb_protocol() helper for getting packet protocol Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 21/96] sch_cake: Make sure we can write the IP header before changing DSCP bits Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 22/96] nfp: flower: replace CFI with vlan present Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 23/96] nfp: flower: remove vlan CFI bit from push vlan action Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 24/96] sch_cake: Simplify logic in cake_select_tin() Greg Kroah-Hartman
2019-04-24 17:09 ` Greg Kroah-Hartman [this message]
2019-04-24 17:09 ` [PATCH 4.19 26/96] net: IP6 defrag: use rbtrees for IPv6 defrag Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 27/96] net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 28/96] CIFS: keep FileInfo handle live during oplock break Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 29/96] cifs: Fix use-after-free in SMB2_write Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 30/96] cifs: Fix use-after-free in SMB2_read Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 31/96] cifs: fix handle leak in smb2_query_symlink() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 32/96] KVM: x86: Dont clear EFER during SMM transitions for 32-bit vCPU Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 33/96] KVM: x86: svm: make sure NMI is injected after nmi_singlestep Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 34/96] Staging: iio: meter: fixed typo Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 35/96] staging: iio: ad7192: Fix ad7193 channel address Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 36/96] iio: gyro: mpu3050: fix chip ID reading Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 37/96] iio/gyro/bmg160: Use millidegrees for temperature scale Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 38/96] iio:chemical:bme680: Fix, report temperature in millidegrees Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 39/96] iio:chemical:bme680: Fix SPI read interface Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 40/96] iio: cros_ec: Fix the maths for gyro scale calculation Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 41/96] iio: ad_sigma_delta: select channel when reading register Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 42/96] iio: dac: mcp4725: add missing powerdown bits in store eeprom Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 43/96] iio: Fix scan mask selection Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 44/96] iio: adc: at91: disable adc channel interrupt in timeout case Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 45/96] iio: core: fix a possible circular locking dependency Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 46/96] io: accel: kxcjk1013: restore the range after resume Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 47/96] staging: most: core: use device description as name Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 48/96] staging: comedi: vmk80xx: Fix use of uninitialized semaphore Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 49/96] staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 50/96] staging: comedi: ni_usb6501: Fix use of uninitialized mutex Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 51/96] staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 52/96] ALSA: hda/realtek - add two more pin configuration sets to quirk table Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 53/96] ALSA: core: Fix card races between register and disconnect Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.19 54/96] Input: elan_i2c - add hardware ID for multiple Lenovo laptops Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 55/96] serial: sh-sci: Fix HSCIF RX sampling point adjustment Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 56/96] serial: sh-sci: Fix HSCIF RX sampling point calculation Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 57/96] vt: fix cursor when clearing the screen Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 58/96] scsi: core: set result when the command cannot be dispatched Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 59/96] Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO" Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 60/96] Revert "svm: Fix AVIC incomplete IPI emulation" Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 61/96] coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 62/96] ipmi: fix sleep-in-atomic in free_user at cleanup SRCU user->release_barrier Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 63/96] crypto: x86/poly1305 - fix overflow during partial reduction Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 64/96] drm/ttm: fix out-of-bounds read in ttm_put_pages() v2 Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 65/96] arm64: futex: Restore oldval initialization to work around buggy compilers Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 66/96] x86/kprobes: Verify stack frame on kretprobe Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 67/96] kprobes: Mark ftrace mcount handler functions nokprobe Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 68/96] kprobes: Fix error check when reusing optimized probes Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 69/96] rt2x00: do not increment sequence number while re-transmitting Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 70/96] mac80211: do not call driver wake_tx_queue op during reconfig Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 71/96] drm/amdgpu/gmc9: fix VM_L2_CNTL3 programming Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 72/96] perf/x86/amd: Add event map for AMD Family 17h Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 73/96] x86/cpu/bugs: Use __initconst for const init data Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 74/96] perf/x86: Fix incorrect PEBS_REGS Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 75/96] x86/speculation: Prevent deadlock on ssb_state::lock Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 76/96] timers/sched_clock: Prevent generic sched_clock wrap caused by tick_freeze() Greg Kroah-Hartman
2019-04-24 17:10 ` Greg Kroah-Hartman
2019-04-24 17:10 ` Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 77/96] nfit/ars: Remove ars_start_flags Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 78/96] nfit/ars: Introduce scrub_flags Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 79/96] nfit/ars: Allow root to busy-poll the ARS state machine Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 80/96] nfit/ars: Avoid stale ARS results Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 81/96] mmc: sdhci: Fix data command CRC error handling Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 82/96] mmc: sdhci: Rename SDHCI_ACMD12_ERR and SDHCI_INT_ACMD12ERR Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 83/96] mmc: sdhci: Handle auto-command errors Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 84/96] modpost: file2alias: go back to simple devtable lookup Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 85/96] modpost: file2alias: check prototype of handler Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 86/96] tpm/tpm_i2c_atmel: Return -E2BIG when the transfer is incomplete Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 87/96] tpm: Fix the type of the return value in calc_tpm2_event_size() Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 88/96] Revert "kbuild: use -Oz instead of -Os when using clang" Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 89/96] sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 90/96] device_cgroup: fix RCU imbalance in error case Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 91/96] mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 92/96] ALSA: info: Fix racy addition/deletion of nodes Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 93/96] percpu: stop printing kernel addresses Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 94/96] tools include: Adopt linux/bits.h Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 95/96] ASoC: rockchip: add missing INTERLEAVED PCM attribute Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.19 96/96] i2c-hid: properly terminate i2c_hid_dmi_desc_override_table[] array Greg Kroah-Hartman
2019-04-24 22:25 ` [PATCH 4.19 00/96] 4.19.37-stable review kernelci.org bot
2019-04-25 6:05 ` Naresh Kamboju
2019-04-25 11:56 ` Jon Hunter
2019-04-25 11:56 ` Jon Hunter
2019-04-25 16:24 ` shuah
2019-04-25 19:38 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190424170921.542276375@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=posk@google.com \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
--cc=tom@herbertland.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.