From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Peter Oskolkov <posk@google.com>,
Tom Herbert <tom@herbertland.com>,
Eric Dumazet <edumazet@google.com>,
Florian Westphal <fw@strlen.de>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.9 41/41] net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c
Date: Tue, 30 Apr 2019 13:38:52 +0200 [thread overview]
Message-ID: <20190430113535.261302059@linuxfoundation.org> (raw)
In-Reply-To: <20190430113524.451237916@linuxfoundation.org>
From: Peter Oskolkov <posk@google.com>
[ Upstream commit 997dd96471641e147cb2c33ad54284000d0f5e35 ]
Currently, IPv6 defragmentation code drops non-last fragments that
are smaller than 1280 bytes: see
commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
This behavior is not specified in IPv6 RFCs and appears to break
compatibility with some IPv6 implemenations, as reported here:
https://www.spinics.net/lists/netdev/msg543846.html
This patch re-uses common IP defragmentation queueing and reassembly
code in IP6 defragmentation in nf_conntrack, removing the 1280 byte
restriction.
Signed-off-by: Peter Oskolkov <posk@google.com>
Reported-by: Tom Herbert <tom@herbertland.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/ipv6/netfilter/nf_conntrack_reasm.c | 260 +++++++++-----------------------
1 file changed, 74 insertions(+), 186 deletions(-)
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -51,14 +51,6 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
@@ -144,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysc
}
#endif
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -184,9 +179,10 @@ static struct frag_queue *fq_find(struct
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
- struct sk_buff *prev, *next;
unsigned int payload_len;
- int offset, end;
+ struct net_device *dev;
+ struct sk_buff *prev;
+ int offset, end, err;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE) {
@@ -261,55 +257,19 @@ static int nf_ct_frag6_queue(struct frag
goto err;
}
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
- */
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
- goto discard_fq;
-
- NFCT_FRAG6_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
-
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -325,11 +285,25 @@ found:
fq->q.flags |= INET_FRAG_FIRST_IN;
}
- return 0;
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
-discard_fq:
+insert_error:
+ if (err == IPFRAG_DUP)
+ goto err;
inet_frag_kill(&fq->q);
err:
+ skb_dst_drop(skb);
return -EINVAL;
}
@@ -339,141 +313,67 @@ err:
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
- *
- * returns true if *prev skb has been transformed into the reassembled
- * skb, false otherwise.
*/
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
- WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
-
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- return false;
+ goto err;
+
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto err;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
- return false;
- }
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- return false;
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (clone == NULL)
- return false;
-
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- /* morph head into last received skb: prev.
- *
- * This allows callers of ipv6 conntrack defrag to continue
- * to use the last skb(frag) passed into the reasm engine.
- * The last skb frag 'silently' turns into the full reassembled skb.
- *
- * Since prev is also part of q->fragments we have to clone it first.
- */
- if (head != prev) {
- struct sk_buff *iter;
-
- fp = skb_clone(prev, GFP_ATOMIC);
- if (!fp)
- return false;
-
- fp->next = prev->next;
-
- iter = head;
- while (iter) {
- if (iter->next == prev) {
- iter->next = fp;
- break;
- }
- iter = iter->next;
- }
-
- skb_morph(prev, head);
- prev->next = head->next;
- consume_skb(head);
- head = prev;
+ goto err;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_shinfo(head)->frag_list = head->next;
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp = head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp->sk = NULL;
- }
- sub_frag_mem_limit(fq->q.net, head->truesize);
-
- head->ignore_df = 1;
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+
+ skb->ignore_df = 1;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_partial(skb_network_header(skb),
+ skb_network_header_len(skb),
+ skb->csum);
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
+
+ return 0;
- return true;
+err:
+ inet_frag_kill(&fq->q);
+ return -EINVAL;
}
/*
@@ -542,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
u16 savethdr = skb->transport_header;
- struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
@@ -565,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net,
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- return -EINVAL;
-
skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
@@ -580,24 +475,17 @@ int nf_ct_frag6_gather(struct net *net,
spin_lock_bh(&fq->q.lock);
ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
- if (ret < 0) {
- if (ret == -EPROTO) {
- skb->transport_header = savethdr;
- ret = 0;
- }
- goto out_unlock;
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
- ret = -EINPROGRESS;
- if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
+ if (ret)
+ ret = -EINPROGRESS;
-out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
next prev parent reply other threads:[~2019-04-30 11:41 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-30 11:38 [PATCH 4.9 00/41] 4.9.172-stable review Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 01/41] kbuild: simplify ld-option implementation Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 02/41] cifs: do not attempt cifs operation on smb2+ rename error Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 03/41] tracing: Fix a memory leak by early error exit in trace_pid_write() Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 04/41] MIPS: scall64-o32: Fix indirect syscall number load Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 05/41] trace: Fix preempt_enable_no_resched() abuse Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 06/41] IB/rdmavt: Fix frwr memory registration Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 07/41] sched/numa: Fix a possible divide-by-zero Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 08/41] ceph: ensure d_name stability in ceph_dentry_hash() Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 09/41] ceph: fix ci->i_head_snapc leak Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 10/41] nfsd: Dont release the callback slot unless it was actually held Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 11/41] sunrpc: dont mark uninitialised items as VALID Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 12/41] Input: synaptics-rmi4 - write config register values to the right offset Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 13/41] dmaengine: sh: rcar-dmac: With cyclic DMA residue 0 is valid Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 14/41] ARM: 8857/1: efi: enable CP15 DMB instructions before cleaning the cache Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 15/41] drm/vc4: Fix memory leak during gpu reset Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 16/41] drm/vc4: Fix compilation error reported by kbuild test bot Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 17/41] USB: Add new USB LPM helpers Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 18/41] USB: Consolidate LPM checks to avoid enabling LPM twice Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 19/41] vsock/virtio: fix kernel panic from virtio_transport_reset_no_sock Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 20/41] tipc: handle the err returned from cmd header function Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 21/41] slip: make slhc_free() silently accept an error pointer Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 22/41] intel_th: gth: Fix an off-by-one in output unassigning Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 23/41] fs/proc/proc_sysctl.c: Fix a NULL pointer dereference Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 24/41] NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 25/41] netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 26/41] fm10k: Fix a potential NULL pointer dereference Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 27/41] tipc: check bearer name with right length in tipc_nl_compat_bearer_enable Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 28/41] tipc: check link name with right length in tipc_nl_compat_link_set Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 29/41] Revert "block/loop: Use global lock for ioctl() operation." Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 30/41] ipv4: add sanity checks in ipv4_link_failure() Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 31/41] mlxsw: spectrum: Fix autoneg status in ethtool Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 32/41] net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 33/41] net: rds: exchange of 8K and 1M pool Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 34/41] team: fix possible recursive locking when add slaves Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 35/41] net: stmmac: move stmmac_check_ether_addr() to driver probe Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 36/41] ipv4: set the tcp_min_rtt_wlen range from 0 to one day Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 37/41] ipv6: frags: fix a lockdep false positive Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 38/41] net: IP defrag: encapsulate rbtree defrag code into callable functions Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 39/41] ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module Greg Kroah-Hartman
2019-04-30 11:38 ` [PATCH 4.9 40/41] net: IP6 defrag: use rbtrees for IPv6 defrag Greg Kroah-Hartman
2019-04-30 11:38 ` Greg Kroah-Hartman [this message]
2019-04-30 22:26 ` [PATCH 4.9 00/41] 4.9.172-stable review kernelci.org bot
2019-04-30 22:30 ` shuah
2019-05-01 6:01 ` Naresh Kamboju
2019-05-01 8:24 ` Jon Hunter
2019-05-01 8:24 ` Jon Hunter
2019-05-01 16:43 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190430113535.261302059@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=posk@google.com \
--cc=stable@vger.kernel.org \
--cc=tom@herbertland.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.