netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Zhu Yanjun <yanjun.zhu@intel.com>,
	jgg@ziepe.ca, leon@kernel.org, zyjzyj2000@gmail.com,
	linux-rdma@vger.kernel.org, parav@nvidia.com,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Subject: Re: [PATCHv3 7/8] RDMA/rxe: Add the support of net namespace notifier
Date: Thu, 23 Feb 2023 21:14:57 +0800	[thread overview]
Message-ID: <d7b7b79b-27c5-02fe-9d8d-ead68a353d22@linux.dev> (raw)
In-Reply-To: <20230214060634.427162-8-yanjun.zhu@intel.com>

在 2023/2/14 14:06, Zhu Yanjun 写道:
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> 
> The functions register_pernet_subsys/unregister_pernet_subsys register a
> notifier of net namespace. When a new net namespace is created, the init
> function of rxe will be called to initialize sk4 and sk6 socks. When a
> net namespace is destroyed, the exit function will be called to handle
> sk4 and sk6 socks.
> 
> The functions rxe_ns_pernet_sk4 and rxe_ns_pernet_sk6 are used to get
> sk4 and sk6 socks.
> 
> The functions rxe_ns_pernet_set_sk4 and rxe_ns_pernet_set_sk6 are used
> to set sk4 and sk6 socks.
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>

Add netdev@vger.kernel.org.

Zhu Yanjun
> ---
>   drivers/infiniband/sw/rxe/Makefile  |   3 +-
>   drivers/infiniband/sw/rxe/rxe.c     |   9 ++
>   drivers/infiniband/sw/rxe/rxe_net.c |  50 +++++------
>   drivers/infiniband/sw/rxe/rxe_ns.c  | 134 ++++++++++++++++++++++++++++
>   drivers/infiniband/sw/rxe/rxe_ns.h  |  17 ++++
>   5 files changed, 187 insertions(+), 26 deletions(-)
>   create mode 100644 drivers/infiniband/sw/rxe/rxe_ns.c
>   create mode 100644 drivers/infiniband/sw/rxe/rxe_ns.h
> 
> diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
> index 5395a581f4bb..8380f97674cb 100644
> --- a/drivers/infiniband/sw/rxe/Makefile
> +++ b/drivers/infiniband/sw/rxe/Makefile
> @@ -22,4 +22,5 @@ rdma_rxe-y := \
>   	rxe_mcast.o \
>   	rxe_task.o \
>   	rxe_net.o \
> -	rxe_hw_counters.o
> +	rxe_hw_counters.o \
> +	rxe_ns.o
> diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
> index 4a17e4a003f5..c297677bf06a 100644
> --- a/drivers/infiniband/sw/rxe/rxe.c
> +++ b/drivers/infiniband/sw/rxe/rxe.c
> @@ -9,6 +9,7 @@
>   #include "rxe.h"
>   #include "rxe_loc.h"
>   #include "rxe_net.h"
> +#include "rxe_ns.h"
>   
>   MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
>   MODULE_DESCRIPTION("Soft RDMA transport");
> @@ -234,6 +235,12 @@ static int __init rxe_module_init(void)
>   		return -1;
>   	}
>   
> +	err = rxe_namespace_init();
> +	if (err) {
> +		pr_err("Failed to register net namespace notifier\n");
> +		return -1;
> +	}
> +
>   	pr_info("loaded\n");
>   	return 0;
>   }
> @@ -244,6 +251,8 @@ static void __exit rxe_module_exit(void)
>   	ib_unregister_driver(RDMA_DRIVER_RXE);
>   	rxe_net_exit();
>   
> +	rxe_namespace_exit();
> +
>   	pr_info("unloaded\n");
>   }
>   
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 9af90587642a..8135876b11f6 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -17,6 +17,7 @@
>   #include "rxe.h"
>   #include "rxe_net.h"
>   #include "rxe_loc.h"
> +#include "rxe_ns.h"
>   
>   static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>   					 struct net_device *ndev,
> @@ -554,33 +555,30 @@ void rxe_net_del(struct ib_device *dev)
>   
>   	rdev = container_of(dev, struct rxe_dev, ib_dev);
>   
> -	rcu_read_lock();
> -	sk = udp4_lib_lookup(dev_net(rdev->ndev), 0, 0, htonl(INADDR_ANY),
> -			     htons(ROCE_V2_UDP_DPORT), 0);
> -	rcu_read_unlock();
> +	sk = rxe_ns_pernet_sk4(dev_net(rdev->ndev));
>   	if (!sk)
>   		return;
>   
> -	__sock_put(sk);
>   
> -	if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL)
> +	if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL) {
>   		__sock_put(sk);
> -	else
> +	} else {
>   		rxe_release_udp_tunnel(sk->sk_socket);
> +		sk = NULL;
> +		rxe_ns_pernet_set_sk4(dev_net(rdev->ndev), sk);
> +	}
>   
> -	rcu_read_lock();
> -	sk = udp6_lib_lookup(dev_net(rdev->ndev), NULL, 0, &in6addr_any,
> -			     htons(ROCE_V2_UDP_DPORT), 0);
> -	rcu_read_unlock();
> +	sk = rxe_ns_pernet_sk6(dev_net(rdev->ndev));
>   	if (!sk)
>   		return;
>   
> -	__sock_put(sk);
> -
> -	if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL)
> +	if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL) {
>   		__sock_put(sk);
> -	else
> +	} else {
>   		rxe_release_udp_tunnel(sk->sk_socket);
> +		sk = NULL;
> +		rxe_ns_pernet_set_sk6(dev_net(rdev->ndev), sk);
> +	}
>   }
>   #undef SK_REF_FOR_TUNNEL
>   
> @@ -681,18 +679,18 @@ static int rxe_net_ipv4_init(struct net_device *ndev)
>   	struct sock *sk;
>   	struct socket *sock;
>   
> -	rcu_read_lock();
> -	sk = udp4_lib_lookup(dev_net(ndev), 0, 0, htonl(INADDR_ANY),
> -			     htons(ROCE_V2_UDP_DPORT), 0);
> -	rcu_read_unlock();
> -	if (sk)
> +	sk = rxe_ns_pernet_sk4(dev_net(ndev));
> +	if (sk) {
> +		sock_hold(sk);
>   		return 0;
> +	}
>   
>   	sock = rxe_setup_udp_tunnel(dev_net(ndev), htons(ROCE_V2_UDP_DPORT), false);
>   	if (IS_ERR(sock)) {
>   		pr_err("Failed to create IPv4 UDP tunnel\n");
>   		return -1;
>   	}
> +	rxe_ns_pernet_set_sk4(dev_net(ndev), sock->sk);
>   
>   	return 0;
>   }
> @@ -703,12 +701,11 @@ static int rxe_net_ipv6_init(struct net_device *ndev)
>   	struct sock *sk;
>   	struct socket *sock;
>   
> -	rcu_read_lock();
> -	sk = udp6_lib_lookup(dev_net(ndev), NULL, 0, &in6addr_any,
> -			     htons(ROCE_V2_UDP_DPORT), 0);
> -	rcu_read_unlock();
> -	if (sk)
> +	sk = rxe_ns_pernet_sk6(dev_net(ndev));
> +	if (sk) {
> +		sock_hold(sk);
>   		return 0;
> +	}
>   
>   	sock = rxe_setup_udp_tunnel(dev_net(ndev), htons(ROCE_V2_UDP_DPORT), true);
>   	if (PTR_ERR(sock) == -EAFNOSUPPORT) {
> @@ -720,6 +717,9 @@ static int rxe_net_ipv6_init(struct net_device *ndev)
>   		pr_err("Failed to create IPv6 UDP tunnel\n");
>   		return -1;
>   	}
> +
> +	rxe_ns_pernet_set_sk6(dev_net(ndev), sock->sk);
> +
>   #endif
>   	return 0;
>   }
> diff --git a/drivers/infiniband/sw/rxe/rxe_ns.c b/drivers/infiniband/sw/rxe/rxe_ns.c
> new file mode 100644
> index 000000000000..29d08899dcda
> --- /dev/null
> +++ b/drivers/infiniband/sw/rxe/rxe_ns.c
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/*
> + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
> + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
> + */
> +
> +#include <net/sock.h>
> +#include <net/netns/generic.h>
> +#include <net/net_namespace.h>
> +#include <linux/module.h>
> +#include <linux/skbuff.h>
> +#include <linux/pid_namespace.h>
> +#include <net/udp_tunnel.h>
> +
> +#include "rxe_ns.h"
> +
> +/*
> + * Per network namespace data
> + */
> +struct rxe_ns_sock {
> +	struct sock __rcu *rxe_sk4;
> +	struct sock __rcu *rxe_sk6;
> +};
> +
> +/*
> + * Index to store custom data for each network namespace.
> + */
> +static unsigned int rxe_pernet_id;
> +
> +/*
> + * Called for every existing and added network namespaces
> + */
> +static int __net_init rxe_ns_init(struct net *net)
> +{
> +	/*
> +	 * create (if not present) and access data item in network namespace
> +	 * (net) using the id (net_id)
> +	 */
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +
> +	rcu_assign_pointer(ns_sk->rxe_sk4, NULL); /* initialize sock 4 socket */
> +	rcu_assign_pointer(ns_sk->rxe_sk6, NULL); /* initialize sock 6 socket */
> +	synchronize_rcu();
> +
> +	return 0;
> +}
> +
> +static void __net_exit rxe_ns_exit(struct net *net)
> +{
> +	/*
> +	 * called when the network namespace is removed
> +	 */
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +	struct sock *rxe_sk4 = NULL;
> +	struct sock *rxe_sk6 = NULL;
> +
> +	rcu_read_lock();
> +	rxe_sk4 = rcu_dereference(ns_sk->rxe_sk4);
> +	rxe_sk6 = rcu_dereference(ns_sk->rxe_sk6);
> +	rcu_read_unlock();
> +
> +	/* close socket */
> +	if (rxe_sk4 && rxe_sk4->sk_socket) {
> +		udp_tunnel_sock_release(rxe_sk4->sk_socket);
> +		rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
> +		synchronize_rcu();
> +	}
> +
> +	if (rxe_sk6 && rxe_sk6->sk_socket) {
> +		udp_tunnel_sock_release(rxe_sk6->sk_socket);
> +		rcu_assign_pointer(ns_sk->rxe_sk6, NULL);
> +		synchronize_rcu();
> +	}
> +}
> +
> +/*
> + * callback to make the module network namespace aware
> + */
> +static struct pernet_operations rxe_net_ops __net_initdata = {
> +	.init = rxe_ns_init,
> +	.exit = rxe_ns_exit,
> +	.id = &rxe_pernet_id,
> +	.size = sizeof(struct rxe_ns_sock),
> +};
> +
> +struct sock *rxe_ns_pernet_sk4(struct net *net)
> +{
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +	struct sock *sk;
> +
> +	rcu_read_lock();
> +	sk = rcu_dereference(ns_sk->rxe_sk4);
> +	rcu_read_unlock();
> +
> +	return sk;
> +}
> +
> +void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk)
> +{
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +
> +	rcu_assign_pointer(ns_sk->rxe_sk4, sk);
> +	synchronize_rcu();
> +}
> +
> +struct sock *rxe_ns_pernet_sk6(struct net *net)
> +{
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +	struct sock *sk;
> +
> +	rcu_read_lock();
> +	sk = rcu_dereference(ns_sk->rxe_sk6);
> +	rcu_read_unlock();
> +
> +	return sk;
> +}
> +
> +void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk)
> +{
> +	struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
> +
> +	rcu_assign_pointer(ns_sk->rxe_sk6, sk);
> +	synchronize_rcu();
> +}
> +
> +int __init rxe_namespace_init(void)
> +{
> +	return register_pernet_subsys(&rxe_net_ops);
> +}
> +
> +void __exit rxe_namespace_exit(void)
> +{
> +	unregister_pernet_subsys(&rxe_net_ops);
> +}
> diff --git a/drivers/infiniband/sw/rxe/rxe_ns.h b/drivers/infiniband/sw/rxe/rxe_ns.h
> new file mode 100644
> index 000000000000..a3eac9558889
> --- /dev/null
> +++ b/drivers/infiniband/sw/rxe/rxe_ns.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/*
> + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
> + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
> + */
> +
> +#ifndef RXE_NS_H
> +#define RXE_NS_H
> +
> +struct sock *rxe_ns_pernet_sk4(struct net *net);
> +struct sock *rxe_ns_pernet_sk6(struct net *net);
> +void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk);
> +void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk);
> +int __init rxe_namespace_init(void);
> +void __exit rxe_namespace_exit(void);
> +
> +#endif /* RXE_NS_H */


  parent reply	other threads:[~2023-02-23 13:15 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20230214060634.427162-1-yanjun.zhu@intel.com>
2023-02-23  0:31 ` [PATCHv3 0/8] Fix the problem that rxe can not work in net namespace Zhu Yanjun
2023-02-23  4:56   ` Jakub Kicinski
2023-02-23 11:42     ` Zhu Yanjun
2023-02-25  8:43   ` Rain River
     [not found] ` <20230214060634.427162-2-yanjun.zhu@intel.com>
2023-02-23 13:10   ` [PATCHv3 1/8] RDMA/rxe: Creating listening sock in newlink function Zhu Yanjun
     [not found] ` <20230214060634.427162-3-yanjun.zhu@intel.com>
2023-02-23 13:10   ` [PATCHv3 2/8] RDMA/rxe: Support more rdma links in init_net Zhu Yanjun
     [not found] ` <20230214060634.427162-4-yanjun.zhu@intel.com>
2023-02-23 13:11   ` [PATCHv3 3/8] RDMA/nldev: Add dellink function pointer Zhu Yanjun
     [not found] ` <20230214060634.427162-5-yanjun.zhu@intel.com>
2023-02-23 13:12   ` [PATCHv3 4/8] RDMA/rxe: Implement dellink in rxe Zhu Yanjun
     [not found] ` <20230214060634.427162-6-yanjun.zhu@intel.com>
2023-02-23 13:13   ` [PATCHv3 5/8] RDMA/rxe: Replace global variable with sock lookup functions Zhu Yanjun
     [not found] ` <20230214060634.427162-7-yanjun.zhu@intel.com>
2023-02-23 13:14   ` [PATCHv3 6/8] RDMA/rxe: add the support of net namespace Zhu Yanjun
     [not found] ` <20230214060634.427162-8-yanjun.zhu@intel.com>
2023-02-23 13:14   ` Zhu Yanjun [this message]
     [not found] ` <20230214060634.427162-9-yanjun.zhu@intel.com>
2023-02-23 13:15   ` [PATCHv3 8/8] RDMA/rxe: Replace l_sk6 with sk6 in " Zhu Yanjun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d7b7b79b-27c5-02fe-9d8d-ead68a353d22@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=parav@nvidia.com \
    --cc=yanjun.zhu@intel.com \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).