* [PATCH] RDMA/rxe: Add network namespace support @ 2026-02-25 17:26 David Ahern 2026-02-25 18:14 ` yanjun.zhu 2026-02-26 14:08 ` kernel test robot 0 siblings, 2 replies; 12+ messages in thread From: David Ahern @ 2026-02-25 17:26 UTC (permalink / raw) To: zyjzyj2000, jgg, leon; +Cc: linux-rdma, David Ahern Allow rxe to work across network namespaces by making the sockets per namespace using net_generic. Defer socket initialization until a device is created in the namespace. Signed-off-by: David Ahern <dsahern@kernel.org> --- drivers/infiniband/sw/rxe/rxe_net.c | 123 ++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0bd0902b11f7..f51afc38c9df 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -18,7 +18,10 @@ #include "rxe_net.h" #include "rxe_loc.h" -static struct rxe_recv_sockets recv_sockets; +static int __rxe_netns_init(struct net *net, + struct rxe_recv_sockets *sockets); + +static unsigned int rxe_net_id; #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -105,6 +108,7 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, struct in_addr *saddr, struct in_addr *daddr) { + struct net *net = dev_net(ndev); struct rtable *rt; struct flowi4 fl = { { 0 } }; @@ -114,7 +118,7 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, memcpy(&fl.daddr, daddr, sizeof(*daddr)); fl.flowi4_proto = IPPROTO_UDP; - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) { rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; @@ -129,6 +133,8 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, struct in6_addr *saddr, struct in6_addr *daddr) { + struct net *net = dev_net(ndev); + struct rxe_recv_sockets *recv_socket = net_generic(net, rxe_net_id); struct dst_entry *ndst; struct flowi6 fl6 = { { 0 } }; @@ -138,9 +144,8 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; - ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), - recv_sockets.sk6->sk, &fl6, - NULL); + ndst = ipv6_stub->ipv6_dst_lookup_flow(net, recv_socket->sk6->sk, + &fl6, NULL); if (IS_ERR(ndst)) { rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; @@ -606,8 +611,16 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { - int err; + struct net *net = dev_net(ndev); + struct rxe_recv_sockets *sockets = net_generic(net, rxe_net_id); struct rxe_dev *rxe = NULL; + int err; + + if (!sockets->sk4) { + err = __rxe_netns_init(net, sockets); + if (err) + return err; + } rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) @@ -709,12 +722,13 @@ static struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -static int rxe_net_ipv4_init(void) +static int rxe_net_ipv4_init(struct net *net, + struct rxe_recv_sockets *sockets) { - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); - if (IS_ERR(recv_sockets.sk4)) { - recv_sockets.sk4 = NULL; + sockets->sk4 = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), + false); + if (IS_ERR(sockets->sk4)) { + sockets->sk4 = NULL; pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } @@ -722,31 +736,74 @@ static int rxe_net_ipv4_init(void) return 0; } -static int rxe_net_ipv6_init(void) -{ #if IS_ENABLED(CONFIG_IPV6) - - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { - recv_sockets.sk6 = NULL; - pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); - return 0; - } - - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; +static int rxe_net_ipv6_init(struct net *net, + struct rxe_recv_sockets *sockets) +{ + sockets->sk6 = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), + true); + if (IS_ERR(sockets->sk6)) { + sockets->sk6 = NULL; pr_err("Failed to create IPv6 UDP tunnel\n"); return -1; } + return 0; +} +#endif + +/* Initialize per network namespace state */ +static int __rxe_netns_init(struct net *net, + struct rxe_recv_sockets *sockets) +{ + int err; + + err = rxe_net_ipv4_init(net, sockets); + if (err) + return err; + +#if IS_ENABLED(CONFIG_IPV6) + err = rxe_net_ipv6_init(net, sockets); + if (err) { + rxe_release_udp_tunnel(sockets->sk4); + return err; + } #endif + + return 0; +} + +static int __net_init rxe_netns_init(struct net *net) +{ + /* defer socket create in the namespace to the first + * device create. + */ return 0; } +static void __net_exit rxe_netns_exit(struct net *net) +{ + struct rxe_recv_sockets *sockets; + + sockets = net_generic(net, rxe_net_id); + +#if IS_ENABLED(CONFIG_IPV6) + if (sockets->sk6) + rxe_release_udp_tunnel(sockets->sk6); +#endif + if (sockets->sk4) + rxe_release_udp_tunnel(sockets->sk4); +} + +static struct pernet_operations rxe_net_ops __net_initdata = { + .init = rxe_netns_init, + .exit = rxe_netns_exit, + .id = &rxe_net_id, + .size = sizeof(struct rxe_recv_sockets), +}; + void rxe_net_exit(void) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); + unregister_pernet_device(&rxe_net_ops); unregister_netdevice_notifier(&rxe_net_notifier); } @@ -754,21 +811,17 @@ int rxe_net_init(void) { int err; - recv_sockets.sk6 = NULL; - - err = rxe_net_ipv4_init(); - if (err) - return err; - err = rxe_net_ipv6_init(); - if (err) - goto err_out; err = register_netdevice_notifier(&rxe_net_notifier); if (err) { pr_err("Failed to register netdev notifier\n"); goto err_out; } + err = register_pernet_device(&rxe_net_ops); + if (err) + goto err_out; + return 0; err_out: - rxe_net_exit(); + unregister_netdevice_notifier(&rxe_net_notifier); return err; } -- 2.43.0 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-25 17:26 [PATCH] RDMA/rxe: Add network namespace support David Ahern @ 2026-02-25 18:14 ` yanjun.zhu 2026-02-25 18:50 ` David Ahern 2026-02-26 14:08 ` kernel test robot 1 sibling, 1 reply; 12+ messages in thread From: yanjun.zhu @ 2026-02-25 18:14 UTC (permalink / raw) To: David Ahern, zyjzyj2000, jgg, leon; +Cc: linux-rdma On 2/25/26 9:26 AM, David Ahern wrote: > Allow rxe to work across network namespaces by making the sockets > per namespace using net_generic. Defer socket initialization until > a device is created in the namespace. https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace Do you make tests with the above link? Compared with the net namespace in the above link, what is the difference between this commit and the above link? Thanks a lot. Yanjun.Zhu > > Signed-off-by: David Ahern <dsahern@kernel.org> > --- > drivers/infiniband/sw/rxe/rxe_net.c | 123 ++++++++++++++++++++-------- > 1 file changed, 88 insertions(+), 35 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 0bd0902b11f7..f51afc38c9df 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -18,7 +18,10 @@ > #include "rxe_net.h" > #include "rxe_loc.h" > > -static struct rxe_recv_sockets recv_sockets; > +static int __rxe_netns_init(struct net *net, > + struct rxe_recv_sockets *sockets); > + > +static unsigned int rxe_net_id; > > #ifdef CONFIG_DEBUG_LOCK_ALLOC > /* > @@ -105,6 +108,7 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > struct in_addr *saddr, > struct in_addr *daddr) > { > + struct net *net = dev_net(ndev); > struct rtable *rt; > struct flowi4 fl = { { 0 } }; > > @@ -114,7 +118,7 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > memcpy(&fl.daddr, daddr, sizeof(*daddr)); > fl.flowi4_proto = IPPROTO_UDP; > > - rt = ip_route_output_key(&init_net, &fl); > + rt = ip_route_output_key(net, &fl); > if (IS_ERR(rt)) { > rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); > return NULL; > @@ -129,6 +133,8 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, > struct in6_addr *saddr, > struct in6_addr *daddr) > { > + struct net *net = dev_net(ndev); > + struct rxe_recv_sockets *recv_socket = net_generic(net, rxe_net_id); > struct dst_entry *ndst; > struct flowi6 fl6 = { { 0 } }; > > @@ -138,9 +144,8 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, > memcpy(&fl6.daddr, daddr, sizeof(*daddr)); > fl6.flowi6_proto = IPPROTO_UDP; > > - ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), > - recv_sockets.sk6->sk, &fl6, > - NULL); > + ndst = ipv6_stub->ipv6_dst_lookup_flow(net, recv_socket->sk6->sk, > + &fl6, NULL); > if (IS_ERR(ndst)) { > rxe_dbg_qp(qp, "no route to %pI6\n", daddr); > return NULL; > @@ -606,8 +611,16 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) > > int rxe_net_add(const char *ibdev_name, struct net_device *ndev) > { > - int err; > + struct net *net = dev_net(ndev); > + struct rxe_recv_sockets *sockets = net_generic(net, rxe_net_id); > struct rxe_dev *rxe = NULL; > + int err; > + > + if (!sockets->sk4) { > + err = __rxe_netns_init(net, sockets); > + if (err) > + return err; > + } > > rxe = ib_alloc_device(rxe_dev, ib_dev); > if (!rxe) > @@ -709,12 +722,13 @@ static struct notifier_block rxe_net_notifier = { > .notifier_call = rxe_notify, > }; > > -static int rxe_net_ipv4_init(void) > +static int rxe_net_ipv4_init(struct net *net, > + struct rxe_recv_sockets *sockets) > { > - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, > - htons(ROCE_V2_UDP_DPORT), false); > - if (IS_ERR(recv_sockets.sk4)) { > - recv_sockets.sk4 = NULL; > + sockets->sk4 = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), > + false); > + if (IS_ERR(sockets->sk4)) { > + sockets->sk4 = NULL; > pr_err("Failed to create IPv4 UDP tunnel\n"); > return -1; > } > @@ -722,31 +736,74 @@ static int rxe_net_ipv4_init(void) > return 0; > } > > -static int rxe_net_ipv6_init(void) > -{ > #if IS_ENABLED(CONFIG_IPV6) > - > - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, > - htons(ROCE_V2_UDP_DPORT), true); > - if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { > - recv_sockets.sk6 = NULL; > - pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); > - return 0; > - } > - > - if (IS_ERR(recv_sockets.sk6)) { > - recv_sockets.sk6 = NULL; > +static int rxe_net_ipv6_init(struct net *net, > + struct rxe_recv_sockets *sockets) > +{ > + sockets->sk6 = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), > + true); > + if (IS_ERR(sockets->sk6)) { > + sockets->sk6 = NULL; > pr_err("Failed to create IPv6 UDP tunnel\n"); > return -1; > } > + return 0; > +} > +#endif > + > +/* Initialize per network namespace state */ > +static int __rxe_netns_init(struct net *net, > + struct rxe_recv_sockets *sockets) > +{ > + int err; > + > + err = rxe_net_ipv4_init(net, sockets); > + if (err) > + return err; > + > +#if IS_ENABLED(CONFIG_IPV6) > + err = rxe_net_ipv6_init(net, sockets); > + if (err) { > + rxe_release_udp_tunnel(sockets->sk4); > + return err; > + } > #endif > + > + return 0; > +} > + > +static int __net_init rxe_netns_init(struct net *net) > +{ > + /* defer socket create in the namespace to the first > + * device create. > + */ > return 0; > } > > +static void __net_exit rxe_netns_exit(struct net *net) > +{ > + struct rxe_recv_sockets *sockets; > + > + sockets = net_generic(net, rxe_net_id); > + > +#if IS_ENABLED(CONFIG_IPV6) > + if (sockets->sk6) > + rxe_release_udp_tunnel(sockets->sk6); > +#endif > + if (sockets->sk4) > + rxe_release_udp_tunnel(sockets->sk4); > +} > + > +static struct pernet_operations rxe_net_ops __net_initdata = { > + .init = rxe_netns_init, > + .exit = rxe_netns_exit, > + .id = &rxe_net_id, > + .size = sizeof(struct rxe_recv_sockets), > +}; > + > void rxe_net_exit(void) > { > - rxe_release_udp_tunnel(recv_sockets.sk6); > - rxe_release_udp_tunnel(recv_sockets.sk4); > + unregister_pernet_device(&rxe_net_ops); > unregister_netdevice_notifier(&rxe_net_notifier); > } > > @@ -754,21 +811,17 @@ int rxe_net_init(void) > { > int err; > > - recv_sockets.sk6 = NULL; > - > - err = rxe_net_ipv4_init(); > - if (err) > - return err; > - err = rxe_net_ipv6_init(); > - if (err) > - goto err_out; > err = register_netdevice_notifier(&rxe_net_notifier); > if (err) { > pr_err("Failed to register netdev notifier\n"); > goto err_out; > } > + err = register_pernet_device(&rxe_net_ops); > + if (err) > + goto err_out; > + > return 0; > err_out: > - rxe_net_exit(); > + unregister_netdevice_notifier(&rxe_net_notifier); > return err; > } ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-25 18:14 ` yanjun.zhu @ 2026-02-25 18:50 ` David Ahern 2026-02-25 21:07 ` yanjun.zhu 0 siblings, 1 reply; 12+ messages in thread From: David Ahern @ 2026-02-25 18:50 UTC (permalink / raw) To: yanjun.zhu, zyjzyj2000, jgg, leon; +Cc: linux-rdma On 2/25/26 11:14 AM, yanjun.zhu wrote: > On 2/25/26 9:26 AM, David Ahern wrote: >> Allow rxe to work across network namespaces by making the sockets >> per namespace using net_generic. Defer socket initialization until >> a device is created in the namespace. > > https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace > > Do you make tests with the above link? no. I had no knowledge of that branch until this moment. It is almost 12 months old, so not sure the relevance if it is not being actively fixed on top of tree. > > Compared with the net namespace in the above link, what is the > difference between this commit and the above link? > no idea. This patch was in our tree at enfabrica dating back to 2021. Someone started looking into automated tests with rxe, so I pulled it from the tree, rebased to 7.0 and sent it out. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-25 18:50 ` David Ahern @ 2026-02-25 21:07 ` yanjun.zhu 2026-02-26 6:47 ` Leon Romanovsky 0 siblings, 1 reply; 12+ messages in thread From: yanjun.zhu @ 2026-02-25 21:07 UTC (permalink / raw) To: David Ahern, zyjzyj2000, jgg, leon; +Cc: linux-rdma On 2/25/26 10:50 AM, David Ahern wrote: > On 2/25/26 11:14 AM, yanjun.zhu wrote: >> On 2/25/26 9:26 AM, David Ahern wrote: >>> Allow rxe to work across network namespaces by making the sockets >>> per namespace using net_generic. Defer socket initialization until >>> a device is created in the namespace. >> >> https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace >> >> Do you make tests with the above link? > > no. I had no knowledge of that branch until this moment. It is almost 12 > months old, so not sure the relevance if it is not being actively fixed > on top of tree. > >> >> Compared with the net namespace in the above link, what is the >> difference between this commit and the above link? >> > > no idea. This patch was in our tree at enfabrica dating back to 2021. > Someone started looking into automated tests with rxe, so I pulled it > from the tree, rebased to 7.0 and sent it out. > https://patchwork.kernel.org/project/linux-rdma/cover/20230624073927.707915-1-yanjun.zhu@intel.com/ In the above link, there is some testcases for the link https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace. I am wondering if this commit can pass all the testcases or not. Thanks a lot. Zhu Yanjun ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-25 21:07 ` yanjun.zhu @ 2026-02-26 6:47 ` Leon Romanovsky 2026-02-26 15:51 ` Zhu Yanjun 0 siblings, 1 reply; 12+ messages in thread From: Leon Romanovsky @ 2026-02-26 6:47 UTC (permalink / raw) To: yanjun.zhu; +Cc: David Ahern, zyjzyj2000, jgg, linux-rdma On Wed, Feb 25, 2026 at 01:07:12PM -0800, yanjun.zhu wrote: > On 2/25/26 10:50 AM, David Ahern wrote: > > On 2/25/26 11:14 AM, yanjun.zhu wrote: > > > On 2/25/26 9:26 AM, David Ahern wrote: > > > > Allow rxe to work across network namespaces by making the sockets > > > > per namespace using net_generic. Defer socket initialization until > > > > a device is created in the namespace. > > > > > > https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace > > > > > > Do you make tests with the above link? > > > > no. I had no knowledge of that branch until this moment. It is almost 12 > > months old, so not sure the relevance if it is not being actively fixed > > on top of tree. > > > > > > > > Compared with the net namespace in the above link, what is the > > > difference between this commit and the above link? > > > > > > > no idea. This patch was in our tree at enfabrica dating back to 2021. > > Someone started looking into automated tests with rxe, so I pulled it > > from the tree, rebased to 7.0 and sent it out. > > > > https://patchwork.kernel.org/project/linux-rdma/cover/20230624073927.707915-1-yanjun.zhu@intel.com/ > > In the above link, there is some testcases for the link > https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace. > > I am wondering if this commit can pass all the testcases or not. Zhu, It is a bit unreasonable to expect a random RXE contributor to compare against something that lives out‑of‑tree. Please feel free to pick up that patch and run it through your tests. Thanks > > Thanks a lot. > Zhu Yanjun > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-26 6:47 ` Leon Romanovsky @ 2026-02-26 15:51 ` Zhu Yanjun 2026-02-26 16:06 ` David Ahern 0 siblings, 1 reply; 12+ messages in thread From: Zhu Yanjun @ 2026-02-26 15:51 UTC (permalink / raw) To: Leon Romanovsky; +Cc: David Ahern, zyjzyj2000, jgg, linux-rdma 在 2026/2/25 22:47, Leon Romanovsky 写道: > On Wed, Feb 25, 2026 at 01:07:12PM -0800, yanjun.zhu wrote: >> On 2/25/26 10:50 AM, David Ahern wrote: >>> On 2/25/26 11:14 AM, yanjun.zhu wrote: >>>> On 2/25/26 9:26 AM, David Ahern wrote: >>>>> Allow rxe to work across network namespaces by making the sockets >>>>> per namespace using net_generic. Defer socket initialization until >>>>> a device is created in the namespace. >>>> https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace >>>> >>>> Do you make tests with the above link? >>> no. I had no knowledge of that branch until this moment. It is almost 12 >>> months old, so not sure the relevance if it is not being actively fixed >>> on top of tree. >>> >>>> Compared with the net namespace in the above link, what is the >>>> difference between this commit and the above link? >>>> >>> no idea. This patch was in our tree at enfabrica dating back to 2021. >>> Someone started looking into automated tests with rxe, so I pulled it >>> from the tree, rebased to 7.0 and sent it out. >>> >> https://patchwork.kernel.org/project/linux-rdma/cover/20230624073927.707915-1-yanjun.zhu@intel.com/ >> >> In the above link, there is some testcases for the link >> https://github.com/zhuyj/linux/tree/upstream/6.14-net-namespace. >> >> I am wondering if this commit can pass all the testcases or not. > Zhu, > > It is a bit unreasonable to expect a random RXE contributor to compare > against something that lives out‑of‑tree. Please feel free to pick up that > patch and run it through your tests. Hi, Leaon Thanks a lot for your reply. I’ve already submitted a similar patch earlier. About this commit, after running it through my test cases, I found that some of them do not pass. I’ve replied on the thread to let the developer to check it. At the moment, I’m working on addressing a few related problems and re-validating the behavior. Once things are in a better shape and if time permits, I plan to resend my version of the patch for further review and testing. Thanks for your understanding. Zhu Yanjun > > Thanks > >> Thanks a lot. >> Zhu Yanjun >> >> -- Best Regards, Yanjun.Zhu ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-26 15:51 ` Zhu Yanjun @ 2026-02-26 16:06 ` David Ahern 2026-02-27 0:06 ` yanjun.zhu 0 siblings, 1 reply; 12+ messages in thread From: David Ahern @ 2026-02-26 16:06 UTC (permalink / raw) To: Zhu Yanjun, Leon Romanovsky; +Cc: zyjzyj2000, jgg, linux-rdma On 2/26/26 8:51 AM, Zhu Yanjun wrote: > Thanks a lot for your reply. I’ve already submitted a similar patch > earlier. In Jan 2021, rxe had no network namespace support. We fixed that and carried a patch (in stealth mode at the time). In Feb 2026, rxe in Linus' master and rdma-next do not have network namespace support. I sent our well tested solution that works out of the box with behavior similar to how init_net works. If you are interested in your design approach getting merged, then make it happen for 7.0-next. If you do not have the time to commit to it now, then step back and let this patch move forward. That is how Linux works - post ready-to-merge patches, not intentions. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-26 16:06 ` David Ahern @ 2026-02-27 0:06 ` yanjun.zhu 2026-02-27 2:05 ` David Ahern 0 siblings, 1 reply; 12+ messages in thread From: yanjun.zhu @ 2026-02-27 0:06 UTC (permalink / raw) To: David Ahern, Leon Romanovsky; +Cc: zyjzyj2000, jgg, linux-rdma On 2/26/26 8:06 AM, David Ahern wrote: > On 2/26/26 8:51 AM, Zhu Yanjun wrote: >> Thanks a lot for your reply. I’ve already submitted a similar patch >> earlier. > > In Jan 2021, rxe had no network namespace support. We fixed that and > carried a patch (in stealth mode at the time). > > In Feb 2026, rxe in Linus' master and rdma-next do not have network > namespace support. I sent our well tested solution that works out of the > box with behavior similar to how init_net works. > > If you are interested in your design approach getting merged, then make > it happen for 7.0-next. If you do not have the time to commit to it now, > then step back and let this patch move forward. That is how Linux works > - post ready-to-merge patches, not intentions. Hi, David Thank you for your feedback and for pushing this forward. I completely agree that "ready-to-merge patches" are what drive the kernel forward. To that end, I have just finished rebasing and updating my implementation from 6.14 to the current 6.19-rc (and it’s ready for 7.0-next). I have full respect for your long-standing work on this. Since we both have functional solutions now, I suggest we quickly compare the design. My goal is the same as yours: to finally get netns support into rxe for 7.0. I will post my updated patch set shortly so the maintainers can evaluate both. The patch link is: https://github.com/zhuyj/linux/tree/6.19-net-namespace Please code review. Best regards, Zhu Yanjun > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-27 0:06 ` yanjun.zhu @ 2026-02-27 2:05 ` David Ahern 2026-02-27 6:28 ` Zhu Yanjun 2026-02-27 22:13 ` Yanjun.Zhu 0 siblings, 2 replies; 12+ messages in thread From: David Ahern @ 2026-02-27 2:05 UTC (permalink / raw) To: yanjun.zhu, Leon Romanovsky; +Cc: zyjzyj2000, jgg, linux-rdma On 2/26/26 5:06 PM, yanjun.zhu wrote: > > The patch link is: https://github.com/zhuyj/linux/tree/6.19-net-namespace please send the patches; I cannot give comments to a github tree. Scanning the patches, I think you have over complicated what needs to be done. 1. socket lookups are not free. If the rxe module is going to own the socket, let it own the socket. See my patch with the net_generic way of retrieving the socket per namespace. <several patches later> Oh, you also bring in net_generic, so why make this so complicated? 2. current code creates the socket for init_net at module load time. My patch changes it to first rxe link create and then leaves it enabled until the namespace is deleted. Why? Well, any solution trying to track how many devices are in the namespace is overly complicated. If an rxe is created once what are the odds it will be created again? This is a very specific type of workload. Besides, it makes the code very simple. I bet this is why my patch fails any test cases you have. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-27 2:05 ` David Ahern @ 2026-02-27 6:28 ` Zhu Yanjun 2026-02-27 22:13 ` Yanjun.Zhu 1 sibling, 0 replies; 12+ messages in thread From: Zhu Yanjun @ 2026-02-27 6:28 UTC (permalink / raw) To: David Ahern, Leon Romanovsky; +Cc: zyjzyj2000, jgg, linux-rdma 在 2026/2/26 18:05, David Ahern 写道: > On 2/26/26 5:06 PM, yanjun.zhu wrote: >> The patch link is: https://github.com/zhuyj/linux/tree/6.19-net-namespace > please send the patches; I cannot give comments to a github tree. > > Scanning the patches, I think you have over complicated what needs to be > done. > > 1. socket lookups are not free. If the rxe module is going to own the > socket, let it own the socket. See my patch with the net_generic way of > retrieving the socket per namespace. <several patches later> Oh, you > also bring in net_generic, so why make this so complicated? Thanks, I will use net_generic later. > 2. current code creates the socket for init_net at module load time. My > patch changes it to first rxe link create and then leaves it enabled > until the namespace is deleted. Why? Well, any solution trying to track > how many devices are in the namespace is overly complicated. If an rxe > is created once what are the odds it will be created again? This is a > very specific type of workload. Besides, it makes the code very simple. > I bet this is why my patch fails any test cases you have. Yes. I will send out my commit very soon. Thanks a lot. Zhu Yanjun > -- Best Regards, Yanjun.Zhu ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-27 2:05 ` David Ahern 2026-02-27 6:28 ` Zhu Yanjun @ 2026-02-27 22:13 ` Yanjun.Zhu 1 sibling, 0 replies; 12+ messages in thread From: Yanjun.Zhu @ 2026-02-27 22:13 UTC (permalink / raw) To: David Ahern, Leon Romanovsky; +Cc: zyjzyj2000, jgg, linux-rdma On 2/26/26 6:05 PM, David Ahern wrote: > On 2/26/26 5:06 PM, yanjun.zhu wrote: >> The patch link is: https://github.com/zhuyj/linux/tree/6.19-net-namespace > please send the patches; I cannot give comments to a github tree. From c986e6fe7fde0166544bebb30a2e6268df6f2146 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun <yanjun.zhu@linux.dev> Date: Thu, 5 Oct 2023 12:05:10 +0800 Subject: [PATCH 1/1] RDMA/rxe: Add the support that rxe can work in net namespace When run "ip link add" command to add a rxe rdma link in a net namespace, normally this rxe rdma link can not work in a net name space. The root cause is that a sock listening on udp port 4791 is created in init_net when the rdma_rxe module is loaded into kernel. That is, the sock listening on udp port 4791 is created in init_net. Other net namespace is difficult to use this sock. The following commits will solve this problem. In the first commit, move the creating sock listening on udp port 4791 from module_init function to rdma link creating functions. That is, after the module rdma_rxe is loaded, the sock will not be created. When run "rdma link add ..." command, the sock will be created. So when creating a rdma link in the net namespace, the sock will be created in this net namespace. In the second commit, the functions udp4_lib_lookup and udp6_lib_lookup will check the sock exists in the net namespace or not. If yes, rdma link will increase the reference count of this sock, then continue other jobs instead of creating a new sock to listen on udp port 4791. Since the network notifier is global, when the module rdma_rxe is loaded, this notifier will be registered. After the rdma link is created, the command "rdma link del" is to delete rdma link at the same time the sock is checked. If the reference count of this sock is greater than the sock reference count needed by udp tunnel, the sock reference count is decreased by one. If equal, it indicates that this rdma link is the last one. As such, the udp tunnel is shut down and the sock is closed. The above work should be implemented in linkdel function. But currently no dellink function in rxe. So the 3rd commit addes dellink function pointer. And the 4th commit implements the dellink function in rxe. To now, it is not necessary to keep a global variable to store the sock listening udp port 4791. This global variable can be replaced by the functions udp4_lib_lookup and udp6_lib_lookup totally. Because the function udp6_lib_lookup is in the fast path, a member variable l_sk6 is added to store the sock. If l_sk6 is NULL, udp6_lib_lookup is called to lookup the sock, then the sock is stored in l_sk6, in the future,it can be used directly. All the above work has been done in init_net. And it can also work in the net namespace. So the init_net is replaced by the individual net namespace. This is what the 6th commit does. Because rxe device is dependent on the net device and the sock listening on udp port 4791, every rxe device is in exclusive mode in the individual net namespace. Other rdma netns operations will be considerred in the future. In the 7th commit, the register_pernet_subsys/unregister_pernet_subsys functions are added. When a new net namespace is created, the init function will initialize the sk4 and sk6 socks. Then the 2 socks will be released when the net namespace is destroyed. The functions rxe_ns_pernet_sk4/rxe_ns_pernet_set_sk4 will get and set sk4 in the net namespace. The functions rxe_ns_pernet_sk6/rxe_ns_pernet_set_sk6 will handle sk6. Then sk4 and sk6 are used in the previous commits. As the sk4 and sk6 in pernet namespace can be accessed, it is not necessary to add a new l_sk6. As such, in the 8th commit, the l_sk6 is replaced with the sk6 in pernet namespace. Test steps: 1) Suppose that 2 NICs are in 2 different net namespaces. # ip netns exec net0 ip link 3: eno2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP link/ether 00:1e:67:a0:22:3f brd ff:ff:ff:ff:ff:ff altname enp5s0 # ip netns exec net1 ip link 4: eno3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel link/ether f8:e4:3b:3b:e4:10 brd ff:ff:ff:ff:ff:ff 2) Add rdma link in the different net namespace net0: # ip netns exec net0 rdma link add rxe0 type rxe netdev eno2 net1: # ip netns exec net1 rdma link add rxe1 type rxe netdev eno3 3) Run rping test. net0 # ip netns exec net0 rping -s -a 192.168.2.1 -C 1& [1] 1737 # ip netns exec net1 rping -c -a 192.168.2.1 -d -v -C 1 verbose count 1 ... ping data: rdma-ping-0: ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqr ... 4) Remove the rdma links from the net namespaces. net0: # ip netns exec net0 ss -lu State Recv-Q Send-Q Local Address:Port Peer Address:Port Process UNCONN 0 0 0.0.0.0:4791 0.0.0.0:* UNCONN 0 0 [::]:4791 [::]:* # ip netns exec net0 rdma link del rxe0 # ip netns exec net0 ss -lu State Recv-Q Send-Q Local Address:Port Peer Address:Port Process net1: # ip netns exec net0 ss -lu State Recv-Q Send-Q Local Address:Port Peer Address:Port Process UNCONN 0 0 0.0.0.0:4791 0.0.0.0:* UNCONN 0 0 [::]:4791 [::]:* # ip netns exec net1 rdma link del rxe1 # ip netns exec net0 ss -lu State Recv-Q Send-Q Local Address:Port Peer Address:Port Process Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> --- drivers/infiniband/core/nldev.c | 6 ++ drivers/infiniband/sw/rxe/Makefile | 3 +- drivers/infiniband/sw/rxe/rxe.c | 32 ++++++- drivers/infiniband/sw/rxe/rxe_net.c | 127 ++++++++++++++++++++------ drivers/infiniband/sw/rxe/rxe_net.h | 9 +- drivers/infiniband/sw/rxe/rxe_ns.c | 134 ++++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_ns.h | 17 ++++ include/rdma/rdma_netlink.h | 2 + 8 files changed, 291 insertions(+), 39 deletions(-) create mode 100644 drivers/infiniband/sw/rxe/rxe_ns.c create mode 100644 drivers/infiniband/sw/rxe/rxe_ns.h diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2220a2dfab24..48684930660a 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1824,6 +1824,12 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (device->link_ops) { + err = device->link_ops->dellink(device); + if (err) + return err; + } + ib_unregister_device_and_put(device); return 0; } diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile index 93134f1d1d0c..3977f4f13258 100644 --- a/drivers/infiniband/sw/rxe/Makefile +++ b/drivers/infiniband/sw/rxe/Makefile @@ -22,6 +22,7 @@ rdma_rxe-y := \ rxe_mcast.o \ rxe_task.o \ rxe_net.o \ - rxe_hw_counters.o + rxe_hw_counters.o \ + rxe_ns.o rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index e891199cbdef..165155f9be6d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -8,6 +8,8 @@ #include <net/addrconf.h> #include "rxe.h" #include "rxe_loc.h" +#include "rxe_net.h" +#include "rxe_ns.h" MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); @@ -200,6 +202,8 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) port->mtu_cap = ib_mtu_enum_to_int(mtu); } +static struct rdma_link_ops rxe_link_ops; + /* called by ifc layer to create new rxe device. * The caller should allocate memory for rxe by calling ib_alloc_device. */ @@ -208,6 +212,7 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, { rxe_init(rxe, ndev); rxe_set_mtu(rxe, mtu); + rxe->ib_dev.link_ops = &rxe_link_ops; return rxe_register_device(rxe, ibdev_name, ndev); } @@ -231,6 +236,10 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) goto err; } + err = rxe_net_init(ndev); + if (err) + return err; + err = rxe_net_add(ibdev_name, ndev); if (err) { rxe_err("failed to add %s\n", ndev->name); @@ -240,9 +249,17 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) return err; } +static int rxe_dellink(struct ib_device *dev) +{ + rxe_net_del(dev); + + return 0; +} + static struct rdma_link_ops rxe_link_ops = { .type = "rxe", .newlink = rxe_newlink, + .dellink = rxe_dellink, }; static int __init rxe_module_init(void) @@ -253,13 +270,20 @@ static int __init rxe_module_init(void) if (err) return err; - err = rxe_net_init(); + rdma_link_register(&rxe_link_ops); + err = rxe_register_notifier(); if (err) { + pr_err("Failed to register netdev notifier\n"); rxe_destroy_wq(); - return err; + return -1; + } + + err = rxe_namespace_init(); + if (err) { + pr_err("Failed to register net namespace notifier\n"); + return -1; } - rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; } @@ -271,6 +295,8 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_destroy_wq(); + rxe_namespace_exit(); + pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0bd0902b11f7..a9e5a60b6d02 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -17,8 +17,7 @@ #include "rxe.h" #include "rxe_net.h" #include "rxe_loc.h" - -static struct rxe_recv_sockets recv_sockets; +#include "rxe_ns.h" #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -114,7 +113,7 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, memcpy(&fl.daddr, daddr, sizeof(*daddr)); fl.flowi4_proto = IPPROTO_UDP; - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_key(dev_net(ndev), &fl); if (IS_ERR(rt)) { rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; @@ -138,8 +137,8 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; - ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), - recv_sockets.sk6->sk, &fl6, + ndst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(ndev), + rxe_ns_pernet_sk6(dev_net(ndev)), &fl6, NULL); if (IS_ERR(ndst)) { rxe_dbg_qp(qp, "no route to %pI6\n", daddr); @@ -624,6 +623,49 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev) return 0; } +#define SK_REF_FOR_TUNNEL 2 +void rxe_net_del(struct ib_device *dev) +{ + struct sock *sk; + struct rxe_dev *rxe; + struct net_device *ndev; + + rxe = container_of(dev, struct rxe_dev, ib_dev); + + ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); + if (!ndev) + return; + + sk = rxe_ns_pernet_sk4(dev_net(ndev)); + if (!sk) + goto err_out; + + + if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL) { + __sock_put(sk); + } else { + rxe_release_udp_tunnel(sk->sk_socket); + sk = NULL; + rxe_ns_pernet_set_sk4(dev_net(ndev), sk); + } + + sk = rxe_ns_pernet_sk6(dev_net(ndev)); + if (!sk) + goto err_out; + + if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL) { + __sock_put(sk); + } else { + rxe_release_udp_tunnel(sk->sk_socket); + sk = NULL; + rxe_ns_pernet_set_sk6(dev_net(ndev), sk); + } + +err_out: + dev_put(ndev); +} +#undef SK_REF_FOR_TUNNEL + static void rxe_port_event(struct rxe_dev *rxe, enum ib_event_type event) { @@ -680,6 +722,7 @@ static int rxe_notify(struct notifier_block *not_blk, switch (event) { case NETDEV_UNREGISTER: ib_unregister_device_queued(&rxe->ib_dev); + rxe_net_del(&rxe->ib_dev); break; case NETDEV_CHANGEMTU: rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); @@ -709,66 +752,92 @@ static struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -static int rxe_net_ipv4_init(void) +static int rxe_net_ipv4_init(struct net_device *ndev) { - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); - if (IS_ERR(recv_sockets.sk4)) { - recv_sockets.sk4 = NULL; + struct sock *sk; + struct socket *sock; + + sk = rxe_ns_pernet_sk4(dev_net(ndev)); + if (sk) { + sock_hold(sk); + return 0; + } + + sock = rxe_setup_udp_tunnel(dev_net(ndev), htons(ROCE_V2_UDP_DPORT), false); + if (IS_ERR(sock)) { pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } + rxe_ns_pernet_set_sk4(dev_net(ndev), sock->sk); return 0; } -static int rxe_net_ipv6_init(void) +static int rxe_net_ipv6_init(struct net_device *ndev) { #if IS_ENABLED(CONFIG_IPV6) + struct sock *sk; + struct socket *sock; - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { - recv_sockets.sk6 = NULL; + sk = rxe_ns_pernet_sk6(dev_net(ndev)); + if (sk) { + sock_hold(sk); + return 0; + } + + sock = rxe_setup_udp_tunnel(dev_net(ndev), htons(ROCE_V2_UDP_DPORT), true); + if (PTR_ERR(sock) == -EAFNOSUPPORT) { pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); return 0; } - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; + if (IS_ERR(sock)) { pr_err("Failed to create IPv6 UDP tunnel\n"); return -1; } + + rxe_ns_pernet_set_sk6(dev_net(ndev), sock->sk); + #endif return 0; } +int rxe_register_notifier(void) +{ + int err; + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("Failed to register netdev notifier\n"); + return -1; + } + + return 0; +} + void rxe_net_exit(void) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } -int rxe_net_init(void) +int rxe_net_init(struct net_device *ndev) { int err; - recv_sockets.sk6 = NULL; - - err = rxe_net_ipv4_init(); + err = rxe_net_ipv4_init(ndev); if (err) return err; - err = rxe_net_ipv6_init(); + + err = rxe_net_ipv6_init(ndev); if (err) goto err_out; - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - pr_err("Failed to register netdev notifier\n"); - goto err_out; - } + return 0; + err_out: + /* If ipv6 error, release ipv4 resource */ + udp_tunnel_sock_release(rxe_ns_pernet_sk4(dev_net(ndev))->sk_socket); + rxe_ns_pernet_set_sk4(dev_net(ndev), NULL); rxe_net_exit(); return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 45d80d00f86b..56249677d692 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -11,14 +11,11 @@ #include <net/if_inet6.h> #include <linux/module.h> -struct rxe_recv_sockets { - struct socket *sk4; - struct socket *sk6; -}; - int rxe_net_add(const char *ibdev_name, struct net_device *ndev); +void rxe_net_del(struct ib_device *dev); -int rxe_net_init(void); +int rxe_register_notifier(void); +int rxe_net_init(struct net_device *ndev); void rxe_net_exit(void); #endif /* RXE_NET_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_ns.c b/drivers/infiniband/sw/rxe/rxe_ns.c new file mode 100644 index 000000000000..29d08899dcda --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_ns.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#include <net/sock.h> +#include <net/netns/generic.h> +#include <net/net_namespace.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/pid_namespace.h> +#include <net/udp_tunnel.h> + +#include "rxe_ns.h" + +/* + * Per network namespace data + */ +struct rxe_ns_sock { + struct sock __rcu *rxe_sk4; + struct sock __rcu *rxe_sk6; +}; + +/* + * Index to store custom data for each network namespace. + */ +static unsigned int rxe_pernet_id; + +/* + * Called for every existing and added network namespaces + */ +static int __net_init rxe_ns_init(struct net *net) +{ + /* + * create (if not present) and access data item in network namespace + * (net) using the id (net_id) + */ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + + rcu_assign_pointer(ns_sk->rxe_sk4, NULL); /* initialize sock 4 socket */ + rcu_assign_pointer(ns_sk->rxe_sk6, NULL); /* initialize sock 6 socket */ + synchronize_rcu(); + + return 0; +} + +static void __net_exit rxe_ns_exit(struct net *net) +{ + /* + * called when the network namespace is removed + */ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + struct sock *rxe_sk4 = NULL; + struct sock *rxe_sk6 = NULL; + + rcu_read_lock(); + rxe_sk4 = rcu_dereference(ns_sk->rxe_sk4); + rxe_sk6 = rcu_dereference(ns_sk->rxe_sk6); + rcu_read_unlock(); + + /* close socket */ + if (rxe_sk4 && rxe_sk4->sk_socket) { + udp_tunnel_sock_release(rxe_sk4->sk_socket); + rcu_assign_pointer(ns_sk->rxe_sk4, NULL); + synchronize_rcu(); + } + + if (rxe_sk6 && rxe_sk6->sk_socket) { + udp_tunnel_sock_release(rxe_sk6->sk_socket); + rcu_assign_pointer(ns_sk->rxe_sk6, NULL); + synchronize_rcu(); + } +} + +/* + * callback to make the module network namespace aware + */ +static struct pernet_operations rxe_net_ops __net_initdata = { + .init = rxe_ns_init, + .exit = rxe_ns_exit, + .id = &rxe_pernet_id, + .size = sizeof(struct rxe_ns_sock), +}; + +struct sock *rxe_ns_pernet_sk4(struct net *net) +{ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + struct sock *sk; + + rcu_read_lock(); + sk = rcu_dereference(ns_sk->rxe_sk4); + rcu_read_unlock(); + + return sk; +} + +void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk) +{ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + + rcu_assign_pointer(ns_sk->rxe_sk4, sk); + synchronize_rcu(); +} + +struct sock *rxe_ns_pernet_sk6(struct net *net) +{ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + struct sock *sk; + + rcu_read_lock(); + sk = rcu_dereference(ns_sk->rxe_sk6); + rcu_read_unlock(); + + return sk; +} + +void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk) +{ + struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id); + + rcu_assign_pointer(ns_sk->rxe_sk6, sk); + synchronize_rcu(); +} + +int __init rxe_namespace_init(void) +{ + return register_pernet_subsys(&rxe_net_ops); +} + +void __exit rxe_namespace_exit(void) +{ + unregister_pernet_subsys(&rxe_net_ops); +} diff --git a/drivers/infiniband/sw/rxe/rxe_ns.h b/drivers/infiniband/sw/rxe/rxe_ns.h new file mode 100644 index 000000000000..da5bfcea1274 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_ns.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +#ifndef RXE_NS_H +#define RXE_NS_H + +struct sock *rxe_ns_pernet_sk4(struct net *net); +struct sock *rxe_ns_pernet_sk6(struct net *net); +void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk); +void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk); +int __init rxe_namespace_init(void); +void __exit rxe_namespace_exit(void); + +#endif /* RXE_NS_H */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 326deaf56d5d..2fd1358ea57d 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -5,6 +5,7 @@ #include <linux/netlink.h> #include <uapi/rdma/rdma_netlink.h> +#include <rdma/ib_verbs.h> struct ib_device; @@ -126,6 +127,7 @@ struct rdma_link_ops { struct list_head list; const char *type; int (*newlink)(const char *ibdev_name, struct net_device *ndev); + int (*dellink)(struct ib_device *dev); }; void rdma_link_register(struct rdma_link_ops *ops); -- 2.43.0 > > Scanning the patches, I think you have over complicated what needs to be > done. > > 1. socket lookups are not free. If the rxe module is going to own the > socket, let it own the socket. See my patch with the net_generic way of > retrieving the socket per namespace. <several patches later> Oh, you > also bring in net_generic, so why make this so complicated? > > 2. current code creates the socket for init_net at module load time. My > patch changes it to first rxe link create and then leaves it enabled > until the namespace is deleted. Why? Well, any solution trying to track > how many devices are in the namespace is overly complicated. If an rxe > is created once what are the odds it will be created again? This is a > very specific type of workload. Besides, it makes the code very simple. > I bet this is why my patch fails any test cases you have. > ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] RDMA/rxe: Add network namespace support 2026-02-25 17:26 [PATCH] RDMA/rxe: Add network namespace support David Ahern 2026-02-25 18:14 ` yanjun.zhu @ 2026-02-26 14:08 ` kernel test robot 1 sibling, 0 replies; 12+ messages in thread From: kernel test robot @ 2026-02-26 14:08 UTC (permalink / raw) To: David Ahern, zyjzyj2000, jgg, leon; +Cc: oe-kbuild-all, linux-rdma, David Ahern Hi David, kernel test robot noticed the following build warnings: [auto build test WARNING on rdma/for-next] [also build test WARNING on linus/master v7.0-rc1 next-20260225] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/David-Ahern/RDMA-rxe-Add-network-namespace-support/20260226-012818 base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next patch link: https://lore.kernel.org/r/20260225172622.7589-1-dsahern%40kernel.org patch subject: [PATCH] RDMA/rxe: Add network namespace support config: x86_64-randconfig-r064-20260226 (https://download.01.org/0day-ci/archive/20260226/202602262248.Yp5zxM1F-lkp@intel.com/config) compiler: gcc-14 (Debian 14.2.0-19) 14.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260226/202602262248.Yp5zxM1F-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202602262248.Yp5zxM1F-lkp@intel.com/ All warnings (new ones prefixed by >>, old ones prefixed by <<): >> WARNING: modpost: vmlinux: section mismatch in reference: rxe_net_exit+0x3 (section: .text) -> rxe_net_ops (section: .init.data) >> WARNING: modpost: vmlinux: section mismatch in reference: rxe_net_init+0x18 (section: .text) -> rxe_net_ops (section: .init.data) -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2026-02-27 22:13 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-02-25 17:26 [PATCH] RDMA/rxe: Add network namespace support David Ahern 2026-02-25 18:14 ` yanjun.zhu 2026-02-25 18:50 ` David Ahern 2026-02-25 21:07 ` yanjun.zhu 2026-02-26 6:47 ` Leon Romanovsky 2026-02-26 15:51 ` Zhu Yanjun 2026-02-26 16:06 ` David Ahern 2026-02-27 0:06 ` yanjun.zhu 2026-02-27 2:05 ` David Ahern 2026-02-27 6:28 ` Zhu Yanjun 2026-02-27 22:13 ` Yanjun.Zhu 2026-02-26 14:08 ` kernel test robot
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox