Re: [PATCH net] vrf: Fix fast path output packet handling with async Netfilter rules

netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Martin Willi <martin@strongswan.org>
Cc: David Ahern <dsahern@kernel.org>,
	Shrijeet Mukherjee <shrijeet@gmail.com>,
	Jakub Kicinski <kuba@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, netfilter-devel@vger.kernel.org
Subject: Re: [PATCH net] vrf: Fix fast path output packet handling with async Netfilter rules
Date: Tue, 10 Nov 2020 14:35:06 +0100	[thread overview]
Message-ID: <20201110133506.GA1777@salvia> (raw)
In-Reply-To: <20201106073030.3974927-1-martin@strongswan.org>

Hi Martin,

Just a few nitpicks, see below.

On Fri, Nov 06, 2020 at 08:30:30AM +0100, Martin Willi wrote:
> VRF devices use an optimized direct path on output if a default qdisc
> is involved, calling Netfilter hooks directly. This path, however, does
> not consider Netfilter rules completing asynchronously, such as with
> NFQUEUE. The Netfilter okfn() is called for asynchronously accepted
> packets, but the VRF never passes that packet down the stack to send
> it out over the slave device. Using the slower redirect path for this
> seems not feasible, as we do not know beforehand if a Netfilter hook
> has asynchronously completing rules.
> 
> Fix the use of asynchronously completing Netfilter rules in OUTPUT and
> POSTROUTING by using a special completion function that additionally
> calls dst_output() to pass the packet down the stack. Also, slightly
> adjust the use of nf_reset_ct() so that is called in the asynchronous
> case, too.
> 
> Fixes: dcdd43c41e60 ("net: vrf: performance improvements for IPv4")
> Fixes: a9ec54d1b0cd ("net: vrf: performance improvements for IPv6")
> Signed-off-by: Martin Willi <martin@strongswan.org>
> ---
>  drivers/net/vrf.c | 92 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index 60c1aadece89..f2793ffde191 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -608,8 +608,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
>  	return ret;
>  }
>  
> -static int vrf_finish_direct(struct net *net, struct sock *sk,
> -			     struct sk_buff *skb)
> +static void vrf_finish_direct(struct sk_buff *skb)
>  {
>  	struct net_device *vrf_dev = skb->dev;
>  
> @@ -628,7 +627,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
>  		skb_pull(skb, ETH_HLEN);
>  	}
>  
> -	return 1;
> +	/* reset skb device */
> +	nf_reset_ct(skb);
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> @@ -707,15 +707,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
>  	return skb;
>  }
>  
> +static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
> +				     struct sk_buff *skb)
> +{
> +	vrf_finish_direct(skb);
> +
> +	return vrf_ip6_local_out(net, sk, skb);
> +}
> +
>  static int vrf_output6_direct(struct net *net, struct sock *sk,
>  			      struct sk_buff *skb)
>  {
> +	int err = 1;
> +
>  	skb->protocol = htons(ETH_P_IPV6);
>  
> -	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
> -			    net, sk, skb, NULL, skb->dev,
> -			    vrf_finish_direct,
> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
> +		err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
> +			      NULL, skb->dev, vrf_output6_direct_finish);

I might missing something... this looks very similar to NF_HOOK_COND
but it's open-coded.

My question, could you still use NF_HOOK_COND?

        ret = NF_HOOK_COND(NFPROTO_IPV6, ..., vrf_output6_direct_finish);

just update the okfn.

> +
> +	if (likely(err == 1))

I'd suggest you remove likely() here and elsewhere in this patch.
Just let the branch predictor make its work instead of assuming that
the ruleset accepts traffic.

> +		vrf_finish_direct(skb);
> +
> +	return err;
> +}
> +
> +static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
> +				     struct sk_buff *skb)
> +{
> +	int err;
> +
> +	err = vrf_output6_direct(net, sk, skb);
> +	if (likely(err == 1))
> +		err = vrf_ip6_local_out(net, sk, skb);
> +
> +	return err;
>  }
>  
>  static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
> @@ -728,18 +754,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
>  	skb->dev = vrf_dev;
>  
>  	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
> -		      skb, NULL, vrf_dev, vrf_output6_direct);
> +		      skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
>  
>  	if (likely(err == 1))
>  		err = vrf_output6_direct(net, sk, skb);
>  
> -	/* reset skb device */
>  	if (likely(err == 1))
> -		nf_reset_ct(skb);
> -	else
> -		skb = NULL;
> +		return skb;
>  
> -	return skb;
> +	return NULL;
>  }
>  
>  static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
> @@ -919,15 +942,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
>  	return skb;
>  }
>  
> +static int vrf_output_direct_finish(struct net *net, struct sock *sk,
> +				    struct sk_buff *skb)
> +{
> +	vrf_finish_direct(skb);
> +
> +	return vrf_ip_local_out(net, sk, skb);
> +}
> +
>  static int vrf_output_direct(struct net *net, struct sock *sk,
>  			     struct sk_buff *skb)
>  {
> +	int err = 1;
> +
>  	skb->protocol = htons(ETH_P_IP);
>  
> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> -			    net, sk, skb, NULL, skb->dev,
> -			    vrf_finish_direct,
> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
> +		err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
> +			      NULL, skb->dev, vrf_output_direct_finish);
> +
> +	if (likely(err == 1))
> +		vrf_finish_direct(skb);
> +
> +	return err;
> +}
> +
> +static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
> +				    struct sk_buff *skb)
> +{
> +	int err;
> +
> +	err = vrf_output_direct(net, sk, skb);
> +	if (likely(err == 1))
> +		err = vrf_ip_local_out(net, sk, skb);
> +
> +	return err;
>  }
>  
>  static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
> @@ -940,18 +989,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
>  	skb->dev = vrf_dev;
>  
>  	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
> -		      skb, NULL, vrf_dev, vrf_output_direct);
> +		      skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
>  
>  	if (likely(err == 1))
>  		err = vrf_output_direct(net, sk, skb);

Could you use NF_HOOK() here instead?

Thanks.

next prev parent reply	other threads:[~2020-11-10 13:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-06  7:30 [PATCH net] vrf: Fix fast path output packet handling with async Netfilter rules Martin Willi
2020-11-09 23:44 ` Jakub Kicinski
2020-11-10  4:04   ` David Ahern
2020-11-10 13:35 ` Pablo Neira Ayuso [this message]
2020-11-10 15:02   ` Martin Willi
2020-11-11 23:43     ` Pablo Neira Ayuso
2020-11-12 15:49       ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201110133506.GA1777@salvia \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=kuba@kernel.org \
    --cc=martin@strongswan.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=shrijeet@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).