All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Ahern <dsahern@kernel.org>
To: Mike Manning <mvrmanning@gmail.com>,
	netdev@vger.kernel.org, jluebbe@lasnet.de,
	Andy Roulin <aroulin@nvidia.com>
Subject: Re: [PATCH] net: allow unbound socket for packets in VRF when tcp_l3mdev_accept set
Date: Mon, 25 Jul 2022 12:27:16 -0600	[thread overview]
Message-ID: <33db44a5-cbbb-0a3f-3487-9ceef35fd57e@kernel.org> (raw)
In-Reply-To: <20220725181442.18041-1-mvrmanning@gmail.com>

[ cc Andy who reported a similar issue ]

On 7/25/22 12:14 PM, Mike Manning wrote:
> The commit 3c82a21f4320 ("net: allow binding socket in a VRF when
> there's an unbound socket") changed the inet socket lookup to avoid
> packets in a VRF from matching an unbound socket. This is to ensure the
> necessary isolation between the default and other VRFs for routing and
> forwarding. VRF-unaware processes running in the default VRF cannot
> access another VRF and have to be run with 'ip vrf exec <vrf>'. This is
> to be expected with tcp_l3mdev_accept disabled, but could be reallowed
> when this sysctl option is enabled. So instead of directly checking dif
> and sdif in inet[6]_match, here call inet_sk_bound_dev_eq(). This
> allows a match on unbound socket for non-zero sdif i.e. for packets in
> a VRF, if tcp_l3mdev_accept is enabled.
> 
> Fixes: 3c82a21f4320 ("net: allow binding socket in a VRF when there's an unbound socket")
> Signed-off-by: Mike Manning <mvrmanning@gmail.com>
> Link: https://lore.kernel.org/netdev/a54c149aed38fded2d3b5fdb1a6c89e36a083b74.camel@lasnet.de/
> ---
> 
> Nettest results for VRF testing remain unchanged:
> 
> $ diff nettest-baseline-502c6f8cedcc.txt nettest-fix.txt
> $ tail -3 nettest-fix.txt
> Tests passed: 869
> Tests failed:   5
> 
> Disclaimer: While I do not think that there should be any noticeable
> socket throughput degradation due to these changes, I am unable to
> carry out any performance tests for this, nor provide any follow-up
> support if there is any such degradation.
> 
> ---
>  include/net/inet6_hashtables.h |  7 +++----
>  include/net/inet_hashtables.h  | 19 +++----------------
>  include/net/inet_sock.h        | 11 +++++++++++
>  3 files changed, 17 insertions(+), 20 deletions(-)
> 
> diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
> index f259e1ae14ba..56f1286583d3 100644
> --- a/include/net/inet6_hashtables.h
> +++ b/include/net/inet6_hashtables.h
> @@ -110,8 +110,6 @@ static inline bool inet6_match(struct net *net, const struct sock *sk,
>  			       const __portpair ports,
>  			       const int dif, const int sdif)
>  {
> -	int bound_dev_if;
> -
>  	if (!net_eq(sock_net(sk), net) ||
>  	    sk->sk_family != AF_INET6 ||
>  	    sk->sk_portpair != ports ||
> @@ -119,8 +117,9 @@ static inline bool inet6_match(struct net *net, const struct sock *sk,
>  	    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
>  		return false;
>  
> -	bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
> -	return bound_dev_if == dif || bound_dev_if == sdif;
> +	/* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
> +	return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
> +				    sdif);
>  }
>  #endif /* IS_ENABLED(CONFIG_IPV6) */
>  
> diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
> index fd6b510d114b..e9cf2157ed8a 100644
> --- a/include/net/inet_hashtables.h
> +++ b/include/net/inet_hashtables.h
> @@ -175,17 +175,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
>  	hashinfo->ehash_locks = NULL;
>  }
>  
> -static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
> -					int dif, int sdif)
> -{
> -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
> -	return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
> -				 bound_dev_if, dif, sdif);
> -#else
> -	return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
> -#endif
> -}
> -
>  struct inet_bind_bucket *
>  inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
>  			struct inet_bind_hashbucket *head,
> @@ -271,16 +260,14 @@ static inline bool inet_match(struct net *net, const struct sock *sk,
>  			      const __addrpair cookie, const __portpair ports,
>  			      int dif, int sdif)
>  {
> -	int bound_dev_if;
> -
>  	if (!net_eq(sock_net(sk), net) ||
>  	    sk->sk_portpair != ports ||
>  	    sk->sk_addrpair != cookie)
>  	        return false;
>  
> -	/* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */
> -	bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
> -	return bound_dev_if == dif || bound_dev_if == sdif;
> +	/* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
> +	return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
> +				    sdif);
>  }
>  
>  /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 6395f6b9a5d2..bf5654ce711e 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -149,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
>  	return bound_dev_if == dif || bound_dev_if == sdif;
>  }
>  
> +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
> +					int dif, int sdif)
> +{
> +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
> +	return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
> +				 bound_dev_if, dif, sdif);
> +#else
> +	return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
> +#endif
> +}
> +
>  struct inet_cork {
>  	unsigned int		flags;
>  	__be32			addr;


  reply	other threads:[~2022-07-25 18:27 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-25 18:14 [PATCH] net: allow unbound socket for packets in VRF when tcp_l3mdev_accept set Mike Manning
2022-07-25 18:27 ` David Ahern [this message]
2022-07-29  2:56 ` David Ahern
2022-07-29 11:10 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=33db44a5-cbbb-0a3f-3487-9ceef35fd57e@kernel.org \
    --to=dsahern@kernel.org \
    --cc=aroulin@nvidia.com \
    --cc=jluebbe@lasnet.de \
    --cc=mvrmanning@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.