All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Martin Willi <martin@strongswan.org>
Cc: David Ahern <dsahern@kernel.org>,
	Shrijeet Mukherjee <shrijeet@gmail.com>,
	Jakub Kicinski <kuba@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, netfilter-devel@vger.kernel.org
Subject: Re: [PATCH net] vrf: Fix fast path output packet handling with async Netfilter rules
Date: Tue, 10 Nov 2020 14:35:06 +0100	[thread overview]
Message-ID: <20201110133506.GA1777@salvia> (raw)
In-Reply-To: <20201106073030.3974927-1-martin@strongswan.org>

Hi Martin,

Just a few nitpicks, see below.

On Fri, Nov 06, 2020 at 08:30:30AM +0100, Martin Willi wrote:
> VRF devices use an optimized direct path on output if a default qdisc
> is involved, calling Netfilter hooks directly. This path, however, does
> not consider Netfilter rules completing asynchronously, such as with
> NFQUEUE. The Netfilter okfn() is called for asynchronously accepted
> packets, but the VRF never passes that packet down the stack to send
> it out over the slave device. Using the slower redirect path for this
> seems not feasible, as we do not know beforehand if a Netfilter hook
> has asynchronously completing rules.
> 
> Fix the use of asynchronously completing Netfilter rules in OUTPUT and
> POSTROUTING by using a special completion function that additionally
> calls dst_output() to pass the packet down the stack. Also, slightly
> adjust the use of nf_reset_ct() so that is called in the asynchronous
> case, too.
> 
> Fixes: dcdd43c41e60 ("net: vrf: performance improvements for IPv4")
> Fixes: a9ec54d1b0cd ("net: vrf: performance improvements for IPv6")
> Signed-off-by: Martin Willi <martin@strongswan.org>
> ---
>  drivers/net/vrf.c | 92 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index 60c1aadece89..f2793ffde191 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -608,8 +608,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
>  	return ret;
>  }
>  
> -static int vrf_finish_direct(struct net *net, struct sock *sk,
> -			     struct sk_buff *skb)
> +static void vrf_finish_direct(struct sk_buff *skb)
>  {
>  	struct net_device *vrf_dev = skb->dev;
>  
> @@ -628,7 +627,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
>  		skb_pull(skb, ETH_HLEN);
>  	}
>  
> -	return 1;
> +	/* reset skb device */
> +	nf_reset_ct(skb);
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> @@ -707,15 +707,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
>  	return skb;
>  }
>  
> +static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
> +				     struct sk_buff *skb)
> +{
> +	vrf_finish_direct(skb);
> +
> +	return vrf_ip6_local_out(net, sk, skb);
> +}
> +
>  static int vrf_output6_direct(struct net *net, struct sock *sk,
>  			      struct sk_buff *skb)
>  {
> +	int err = 1;
> +
>  	skb->protocol = htons(ETH_P_IPV6);
>  
> -	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
> -			    net, sk, skb, NULL, skb->dev,
> -			    vrf_finish_direct,
> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
> +		err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
> +			      NULL, skb->dev, vrf_output6_direct_finish);

I might missing something... this looks very similar to NF_HOOK_COND
but it's open-coded.

My question, could you still use NF_HOOK_COND?

        ret = NF_HOOK_COND(NFPROTO_IPV6, ..., vrf_output6_direct_finish);

just update the okfn.

> +
> +	if (likely(err == 1))

I'd suggest you remove likely() here and elsewhere in this patch.
Just let the branch predictor make its work instead of assuming that
the ruleset accepts traffic.

> +		vrf_finish_direct(skb);
> +
> +	return err;
> +}
> +
> +static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
> +				     struct sk_buff *skb)
> +{
> +	int err;
> +
> +	err = vrf_output6_direct(net, sk, skb);
> +	if (likely(err == 1))
> +		err = vrf_ip6_local_out(net, sk, skb);
> +
> +	return err;
>  }
>  
>  static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
> @@ -728,18 +754,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
>  	skb->dev = vrf_dev;
>  
>  	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
> -		      skb, NULL, vrf_dev, vrf_output6_direct);
> +		      skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
>  
>  	if (likely(err == 1))
>  		err = vrf_output6_direct(net, sk, skb);
>  
> -	/* reset skb device */
>  	if (likely(err == 1))
> -		nf_reset_ct(skb);
> -	else
> -		skb = NULL;
> +		return skb;
>  
> -	return skb;
> +	return NULL;
>  }
>  
>  static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
> @@ -919,15 +942,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
>  	return skb;
>  }
>  
> +static int vrf_output_direct_finish(struct net *net, struct sock *sk,
> +				    struct sk_buff *skb)
> +{
> +	vrf_finish_direct(skb);
> +
> +	return vrf_ip_local_out(net, sk, skb);
> +}
> +
>  static int vrf_output_direct(struct net *net, struct sock *sk,
>  			     struct sk_buff *skb)
>  {
> +	int err = 1;
> +
>  	skb->protocol = htons(ETH_P_IP);
>  
> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> -			    net, sk, skb, NULL, skb->dev,
> -			    vrf_finish_direct,
> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
> +		err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
> +			      NULL, skb->dev, vrf_output_direct_finish);
> +
> +	if (likely(err == 1))
> +		vrf_finish_direct(skb);
> +
> +	return err;
> +}
> +
> +static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
> +				    struct sk_buff *skb)
> +{
> +	int err;
> +
> +	err = vrf_output_direct(net, sk, skb);
> +	if (likely(err == 1))
> +		err = vrf_ip_local_out(net, sk, skb);
> +
> +	return err;
>  }
>  
>  static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
> @@ -940,18 +989,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
>  	skb->dev = vrf_dev;
>  
>  	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
> -		      skb, NULL, vrf_dev, vrf_output_direct);
> +		      skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
>  
>  	if (likely(err == 1))
>  		err = vrf_output_direct(net, sk, skb);

Could you use NF_HOOK() here instead?

Thanks.

  parent reply	other threads:[~2020-11-10 13:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-06  7:30 [PATCH net] vrf: Fix fast path output packet handling with async Netfilter rules Martin Willi
2020-11-09 23:44 ` Jakub Kicinski
2020-11-10  4:04   ` David Ahern
2020-11-10 13:35 ` Pablo Neira Ayuso [this message]
2020-11-10 15:02   ` Martin Willi
2020-11-11 23:43     ` Pablo Neira Ayuso
2020-11-12 15:49       ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201110133506.GA1777@salvia \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=kuba@kernel.org \
    --cc=martin@strongswan.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=shrijeet@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.