From: Pablo Neira Ayuso <pablo@netfilter.org>
To: David Miller <davem@davemloft.net>
Cc: Florian Westphal <fw@strlen.de>,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
Andy Zhou <azhou@nicira.com>
Subject: Re: [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter
Date: Mon, 16 Mar 2015 23:55:45 +0100 [thread overview]
Message-ID: <20150316225545.GA4454@salvia> (raw)
In-Reply-To: <1426179925-18220-2-git-send-email-fw@strlen.de>
Hi David,
This patch changes the interface ip_fragment(). If this sounds
reasonable to you, please ack it and I'll route it to you in the next
nf-next batch.
Thanks.
On Thu, Mar 12, 2015 at 06:05:20PM +0100, Florian Westphal wrote:
> Long time ago it was possible for the netfilter ip_conntrack
> core to call ip_fragment in POST_ROUTING hook.
>
> This is no longer the case, so the only case where bridge netfilter
> ends up calling ip_fragment is the direct call site in br_netfilter.c.
>
> Add ll and mtu arguments for ip_fragment and then get rid of the bridge
> netfilter specific helpers from ip_fragment.
>
> Cc: Andy Zhou <azhou@nicira.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> include/linux/netfilter_bridge.h | 17 -----------------
> include/net/ip.h | 4 ++--
> net/bridge/br_netfilter.c | 23 ++++++++++++++++++++---
> net/ipv4/ip_output.c | 37 +++++++++++++++++++++----------------
> 4 files changed, 43 insertions(+), 38 deletions(-)
>
> diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
> index ed0d3bf..fbbd5de 100644
> --- a/include/linux/netfilter_bridge.h
> +++ b/include/linux/netfilter_bridge.h
> @@ -35,24 +35,8 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
> }
> }
>
> -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
> -{
> - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
> - return PPPOE_SES_HLEN;
> - return 0;
> -}
> -
> int br_handle_frame_finish(struct sk_buff *skb);
>
> -/* This is called by the IP fragmenting code and it ensures there is
> - * enough room for the encapsulating header (if there is one). */
> -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
> -{
> - if (skb->nf_bridge)
> - return nf_bridge_encap_header_len(skb);
> - return 0;
> -}
> -
> static inline void br_drop_fake_rtable(struct sk_buff *skb)
> {
> struct dst_entry *dst = skb_dst(skb);
> @@ -62,7 +46,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
> }
>
> #else
> -#define nf_bridge_pad(skb) (0)
> #define br_drop_fake_rtable(skb) do { } while (0)
> #endif /* CONFIG_BRIDGE_NETFILTER */
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 025c61c..2905a4b 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
> int ip_mr_input(struct sk_buff *skb);
> int ip_output(struct sock *sk, struct sk_buff *skb);
> int ip_mc_output(struct sock *sk, struct sk_buff *skb);
> -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
> -int ip_do_nat(struct sk_buff *skb);
> +int ip_fragment(struct sk_buff *skb, unsigned int mtu,
> + unsigned int ll_rs, int (*output)(struct sk_buff *));
> void ip_send_check(struct iphdr *ip);
> int __ip_local_out(struct sk_buff *skb);
> int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index bd2d24d..550ee19 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -812,26 +812,43 @@ static int br_nf_push_frag_xmit(struct sk_buff *skb)
> return br_dev_queue_push_xmit(skb);
> }
>
> +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
> +{
> + if (skb->nf_bridge->mask & BRNF_PPPoE)
> + return PPPOE_SES_HLEN;
> + return 0;
> +}
> +
> static int br_nf_dev_queue_xmit(struct sk_buff *skb)
> {
> int ret;
> int frag_max_size;
> - unsigned int mtu_reserved;
> + unsigned int mtu_reserved, mtu;
>
> if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
> return br_dev_queue_push_xmit(skb);
>
> mtu_reserved = nf_bridge_mtu_reduction(skb);
> + mtu = min(skb->dev->mtu, IP_MAX_MTU);
> /* This is wrong! We should preserve the original fragment
> * boundaries by preserving frag_list rather than refragmenting.
> */
> - if (skb->len + mtu_reserved > skb->dev->mtu) {
> + if (skb->len + mtu_reserved > mtu) {
> + unsigned int llrs;
> +
> frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
> if (br_parse_ip_options(skb))
> /* Drop invalid packet */
> return NF_DROP;
> IPCB(skb)->frag_max_size = frag_max_size;
> - ret = ip_fragment(skb, br_nf_push_frag_xmit);
> +
> + /* for bridged IP traffic encapsulated inside f.e. a vlan header,
> + * we need to make room for the encapsulating header
> + */
> + llrs = nf_bridge_encap_header_len(skb);
> +
> + mtu -= mtu_reserved;
> + ret = ip_fragment(skb, mtu, llrs, br_nf_push_frag_xmit);
> } else
> ret = br_dev_queue_push_xmit(skb);
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index a7aea20..fe5ec3f 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -216,6 +216,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> netdev_features_t features;
> struct sk_buff *segs;
> int ret = 0;
> + unsigned int mtu;
>
> /* common case: locally created skb or seglen is <= mtu */
> if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
> @@ -236,6 +237,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> return -ENOMEM;
> }
>
> + mtu = ip_skb_dst_mtu(skb);
> consume_skb(skb);
>
> do {
> @@ -243,7 +245,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> int err;
>
> segs->next = NULL;
> - err = ip_fragment(segs, ip_finish_output2);
> + err = ip_fragment(segs, mtu, 0, ip_finish_output2);
>
> if (err && ret == 0)
> ret = err;
> @@ -255,6 +257,8 @@ static int ip_finish_output_gso(struct sk_buff *skb)
>
> static int ip_finish_output(struct sk_buff *skb)
> {
> + unsigned int mtu;
> +
> #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
> /* Policy lookup after SNAT yielded a new policy */
> if (skb_dst(skb)->xfrm != NULL) {
> @@ -265,8 +269,9 @@ static int ip_finish_output(struct sk_buff *skb)
> if (skb_is_gso(skb))
> return ip_finish_output_gso(skb);
>
> - if (skb->len > ip_skb_dst_mtu(skb))
> - return ip_fragment(skb, ip_finish_output2);
> + mtu = ip_skb_dst_mtu(skb);
> + if (skb->len > mtu)
> + return ip_fragment(skb, mtu, 0, ip_finish_output2);
>
> return ip_finish_output2(skb);
> }
> @@ -472,20 +477,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
> skb_copy_secmark(to, from);
> }
>
> -/*
> +/**
> + * ip_fragment - fragment IP datagram or send ICMP error
> + *
> + * @skb: the skb to fragment
> + * @mtu: mtu to use for fragmentation
> + * @ll_rs: extra linklayer space required
> + * @output: transmit function used to send fragments
> + *
> * This IP datagram is too large to be sent in one piece. Break it up into
> * smaller pieces (each of size equal to IP header plus
> * a block of the data of the original IP data part) that will yet fit in a
> * single device frame, and queue such a frame for sending.
> */
> -
> -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
> +int ip_fragment(struct sk_buff *skb,
> + unsigned int mtu, unsigned int ll_rs,
> + int (*output)(struct sk_buff *))
> {
> struct iphdr *iph;
> int ptr;
> struct net_device *dev;
> struct sk_buff *skb2;
> - unsigned int mtu, hlen, left, len, ll_rs;
> + unsigned int hlen, left, len;
> int offset;
> __be16 not_last_frag;
> struct rtable *rt = skb_rtable(skb);
> @@ -499,7 +512,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
>
> iph = ip_hdr(skb);
>
> - mtu = ip_skb_dst_mtu(skb);
> if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
> (IPCB(skb)->frag_max_size &&
> IPCB(skb)->frag_max_size > mtu))) {
> @@ -516,10 +528,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
>
> hlen = iph->ihl * 4;
> mtu = mtu - hlen; /* Size of data space */
> -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> - if (skb->nf_bridge)
> - mtu -= nf_bridge_mtu_reduction(skb);
> -#endif
> IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
>
> /* When frag_list is given, use it. First, check its validity:
> @@ -636,10 +644,7 @@ slow_path:
> left = skb->len - hlen; /* Space per frame */
> ptr = hlen; /* Where to start from */
>
> - /* for bridged IP traffic encapsulated inside f.e. a vlan header,
> - * we need to make room for the encapsulating header
> - */
> - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
> + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, ll_rs);
>
> /*
> * Fragment the datagram.
> --
> 2.0.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2015-03-16 22:55 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-12 17:05 [PATCH v2 nf-next 0/6] more bridge netfilter refactoring Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter Florian Westphal
2015-03-13 0:38 ` Andy Zhou
2015-03-16 22:55 ` Pablo Neira Ayuso [this message]
2015-03-17 4:42 ` David Miller
2015-03-17 10:11 ` Florian Westphal
2015-03-17 17:12 ` David Miller
2015-03-17 20:40 ` Florian Westphal
2015-03-17 21:38 ` David Miller
2015-03-12 17:05 ` [PATCH v2 nf-next 2/6] netfilter: bridge: don't use nf_bridge_info to store mac header Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 3/6] netfilter: bridge: use skb->cb to track otherhost mangling Florian Westphal
2015-03-12 18:02 ` Oliver Hartkopp
2015-03-12 18:31 ` Florian Westphal
2015-03-12 18:35 ` Florian Westphal
2015-03-12 18:40 ` Oliver Hartkopp
2015-03-12 17:05 ` [PATCH v2 nf-next 4/6] netfilter: bridge: don't use nf_bridge_info to store proto value Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 5/6] netfilter: bridge: replace remaining flags with state enum Florian Westphal
2015-03-12 17:05 ` [PATCH nf-next 6/6] netfilter: bridge: don't use nf_bridge storage during neigh resolution Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150316225545.GA4454@salvia \
--to=pablo@netfilter.org \
--cc=azhou@nicira.com \
--cc=davem@davemloft.net \
--cc=fw@strlen.de \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.