* Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address [not found] <20231205002613.10219-1-rpearsonhpe@gmail.com> @ 2023-12-05 5:55 ` Zhu Yanjun 2023-12-05 10:29 ` Zhu Yanjun [not found] ` <20231205002613.10219-2-rpearsonhpe@gmail.com> ` (3 subsequent siblings) 4 siblings, 1 reply; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 5:55 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Currently the rdma_rxe driver does not receive mcast packets at all. > > Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > the IP mcast address. This is required for mcast traffic to reach the > rxe driver when coming from an external source. > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > drivers/infiniband/sw/rxe/rxe_net.h | 1 + > drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > 4 files changed, 102 insertions(+), 21 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index 86cc2e18a7fd..54735d07cee5 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -19,38 +19,116 @@ > * mcast packets in the rxe receive path. > */ > > +#include <linux/igmp.h> > + > #include "rxe.h" > > -/** > - * rxe_mcast_add - add multicast address to rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > { > + struct in6_addr *addr6 = (struct in6_addr *)mgid; > + struct sock *sk = recv_sockets.sk6->sk; > unsigned char ll_addr[ETH_ALEN]; > + int err; > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + if (err && err != -EADDRINUSE) > + goto err_out; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_drop; > + > + return 0; > > - return dev_mc_add(rxe->ndev, ll_addr); > +err_drop: > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > +err_out: > + return err; > } > > -/** > - * rxe_mcast_del - delete multicast address from rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add(struct rxe_mcg *mcg) > { > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err; > + > + if (mcg->is_ipv6) > + return rxe_mcast_add6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + rtnl_lock(); > + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > + if (err && err != -EADDRINUSE) > + goto err_out; > + > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_leave; > + > + return 0; > + > +err_leave: > + rtnl_lock(); > + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > +err_out: > + return err; > +} > + > +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > +{ > + struct sock *sk = recv_sockets.sk6->sk; > + unsigned char ll_addr[ETH_ALEN]; > + int err, err2; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > + (struct in6_addr *)mgid); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + > + return err ?: err2; > +} > + > +static int rxe_mcast_del(struct rxe_mcg *mcg) > +{ > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > + unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err, err2; > + > + if (mcg->is_ipv6) > + return rxe_mcast_del6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + rtnl_lock(); > + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > > - return dev_mc_del(rxe->ndev, ll_addr); > + return err ?: err2; > } > > /** > @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > { > kref_init(&mcg->ref_cnt); > memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > INIT_LIST_HEAD(&mcg->qp_list); > mcg->rxe = rxe; > > @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > spin_unlock_bh(&rxe->mcg_lock); > > /* add mcast address outside of lock */ > - err = rxe_mcast_add(rxe, mgid); > + err = rxe_mcast_add(mcg); > if (!err) > return mcg; > > @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > static void rxe_destroy_mcg(struct rxe_mcg *mcg) > { > /* delete mcast address outside of lock */ > - rxe_mcast_del(mcg->rxe, &mcg->mgid); > + rxe_mcast_del(mcg); > > spin_lock_bh(&mcg->rxe->mcg_lock); > __rxe_destroy_mcg(mcg); > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 58c3f3759bf0..b481f8da2002 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -18,7 +18,7 @@ > #include "rxe_net.h" > #include "rxe_loc.h" > > -static struct rxe_recv_sockets recv_sockets; > +struct rxe_recv_sockets recv_sockets; > > static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > struct net_device *ndev, > diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h > index 45d80d00f86b..89cee7d5340f 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.h > +++ b/drivers/infiniband/sw/rxe/rxe_net.h > @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > struct socket *sk4; > struct socket *sk6; > }; > +extern struct rxe_recv_sockets recv_sockets; > > int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index ccb9d19ffe8a..7be9e6232dd9 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -352,6 +352,7 @@ struct rxe_mcg { > atomic_t qp_num; > u32 qkey; > u16 pkey; > + bool is_ipv6; > }; > > struct rxe_mca { ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address 2023-12-05 5:55 ` [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Zhu Yanjun @ 2023-12-05 10:29 ` Zhu Yanjun 2023-12-07 1:47 ` Rain River 0 siblings, 1 reply; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 10:29 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev 在 2023/12/5 13:55, Zhu Yanjun 写道: > Add David S. Miller and David Ahern. > > They are the maintainers in netdev and very familiar with mcast. > > Zhu Yanjun > > 在 2023/12/5 8:26, Bob Pearson 写道: >> Currently the rdma_rxe driver does not receive mcast packets at all. >> >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister >> the IP mcast address. This is required for mcast traffic to reach the >> rxe driver when coming from an external source. >> >> Fixes: 8700e3e7c485 ("Soft RoCE driver") >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >> --- >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- >> drivers/infiniband/sw/rxe/rxe_net.h | 1 + >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + >> 4 files changed, 102 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c >> b/drivers/infiniband/sw/rxe/rxe_mcast.c >> index 86cc2e18a7fd..54735d07cee5 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >> @@ -19,38 +19,116 @@ >> * mcast packets in the rxe receive path. >> */ >> +#include <linux/igmp.h> >> + >> #include "rxe.h" >> -/** >> - * rxe_mcast_add - add multicast address to rxe device >> - * @rxe: rxe device object >> - * @mgid: multicast address as a gid >> - * >> - * Returns 0 on success else an error >> - */ >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) >> { >> + struct in6_addr *addr6 = (struct in6_addr *)mgid; >> + struct sock *sk = recv_sockets.sk6->sk; >> unsigned char ll_addr[ETH_ALEN]; >> + int err; >> + >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. Please Jason or experts from netdev comment on this. Thanks, Zhu Yanjun >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> + if (err && err != -EADDRINUSE) >> + goto err_out; >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >> + err = dev_mc_add(rxe->ndev, ll_addr); >> + if (err) >> + goto err_drop; >> + >> + return 0; >> - return dev_mc_add(rxe->ndev, ll_addr); >> +err_drop: >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> +err_out: >> + return err; >> } >> -/** >> - * rxe_mcast_del - delete multicast address from rxe device >> - * @rxe: rxe device object >> - * @mgid: multicast address as a gid >> - * >> - * Returns 0 on success else an error >> - */ >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) >> +static int rxe_mcast_add(struct rxe_mcg *mcg) >> { >> + struct rxe_dev *rxe = mcg->rxe; >> + union ib_gid *mgid = &mcg->mgid; >> unsigned char ll_addr[ETH_ALEN]; >> + struct ip_mreqn imr = {}; >> + int err; >> + >> + if (mcg->is_ipv6) >> + return rxe_mcast_add6(rxe, mgid); >> + >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >> + imr.imr_ifindex = rxe->ndev->ifindex; >> + rtnl_lock(); >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> + if (err && err != -EADDRINUSE) >> + goto err_out; >> + >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >> + err = dev_mc_add(rxe->ndev, ll_addr); >> + if (err) >> + goto err_leave; >> + >> + return 0; >> + >> +err_leave: >> + rtnl_lock(); >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> +err_out: >> + return err; >> +} >> + >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) >> +{ >> + struct sock *sk = recv_sockets.sk6->sk; >> + unsigned char ll_addr[ETH_ALEN]; >> + int err, err2; >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >> + err = dev_mc_del(rxe->ndev, ll_addr); >> + >> + spin_lock_bh(&sk->sk_lock.slock); >> + rtnl_lock(); >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, >> + (struct in6_addr *)mgid); >> + rtnl_unlock(); >> + spin_unlock_bh(&sk->sk_lock.slock); >> + >> + return err ?: err2; >> +} >> + >> +static int rxe_mcast_del(struct rxe_mcg *mcg) >> +{ >> + struct rxe_dev *rxe = mcg->rxe; >> + union ib_gid *mgid = &mcg->mgid; >> + unsigned char ll_addr[ETH_ALEN]; >> + struct ip_mreqn imr = {}; >> + int err, err2; >> + >> + if (mcg->is_ipv6) >> + return rxe_mcast_del6(rxe, mgid); >> + >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >> + imr.imr_ifindex = rxe->ndev->ifindex; >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >> + err = dev_mc_del(rxe->ndev, ll_addr); >> + >> + rtnl_lock(); >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >> + rtnl_unlock(); >> - return dev_mc_del(rxe->ndev, ll_addr); >> + return err ?: err2; >> } >> /** >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, >> union ib_gid *mgid, >> { >> kref_init(&mcg->ref_cnt); >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); >> INIT_LIST_HEAD(&mcg->qp_list); >> mcg->rxe = rxe; >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct >> rxe_dev *rxe, union ib_gid *mgid) >> spin_unlock_bh(&rxe->mcg_lock); >> /* add mcast address outside of lock */ >> - err = rxe_mcast_add(rxe, mgid); >> + err = rxe_mcast_add(mcg); >> if (!err) >> return mcg; >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >> { >> /* delete mcast address outside of lock */ >> - rxe_mcast_del(mcg->rxe, &mcg->mgid); >> + rxe_mcast_del(mcg); >> spin_lock_bh(&mcg->rxe->mcg_lock); >> __rxe_destroy_mcg(mcg); >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c >> b/drivers/infiniband/sw/rxe/rxe_net.c >> index 58c3f3759bf0..b481f8da2002 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_net.c >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c >> @@ -18,7 +18,7 @@ >> #include "rxe_net.h" >> #include "rxe_loc.h" >> -static struct rxe_recv_sockets recv_sockets; >> +struct rxe_recv_sockets recv_sockets; >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, >> struct net_device *ndev, >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h >> b/drivers/infiniband/sw/rxe/rxe_net.h >> index 45d80d00f86b..89cee7d5340f 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_net.h >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { >> struct socket *sk4; >> struct socket *sk6; >> }; >> +extern struct rxe_recv_sockets recv_sockets; >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h >> b/drivers/infiniband/sw/rxe/rxe_verbs.h >> index ccb9d19ffe8a..7be9e6232dd9 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >> @@ -352,6 +352,7 @@ struct rxe_mcg { >> atomic_t qp_num; >> u32 qkey; >> u16 pkey; >> + bool is_ipv6; >> }; >> struct rxe_mca { ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address 2023-12-05 10:29 ` Zhu Yanjun @ 2023-12-07 1:47 ` Rain River 2023-12-07 19:07 ` Bob Pearson 0 siblings, 1 reply; 9+ messages in thread From: Rain River @ 2023-12-07 1:47 UTC (permalink / raw) To: Zhu Yanjun; +Cc: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: > > > 在 2023/12/5 13:55, Zhu Yanjun 写道: > > Add David S. Miller and David Ahern. > > > > They are the maintainers in netdev and very familiar with mcast. > > > > Zhu Yanjun > > > > 在 2023/12/5 8:26, Bob Pearson 写道: > >> Currently the rdma_rxe driver does not receive mcast packets at all. > >> > >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > >> the IP mcast address. This is required for mcast traffic to reach the > >> rxe driver when coming from an external source. > >> > >> Fixes: 8700e3e7c485 ("Soft RoCE driver") > >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >> --- > >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > >> drivers/infiniband/sw/rxe/rxe_net.h | 1 + > >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > >> 4 files changed, 102 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> index 86cc2e18a7fd..54735d07cee5 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> @@ -19,38 +19,116 @@ > >> * mcast packets in the rxe receive path. > >> */ > >> +#include <linux/igmp.h> > >> + > >> #include "rxe.h" > >> -/** > >> - * rxe_mcast_add - add multicast address to rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > >> { > >> + struct in6_addr *addr6 = (struct in6_addr *)mgid; > >> + struct sock *sk = recv_sockets.sk6->sk; > >> unsigned char ll_addr[ETH_ALEN]; > >> + int err; > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > > > Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. ./net/ipv6/addrconf.c-2915- lock_sock(sk); ./net/ipv6/addrconf.c-2916- if (join) ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, ifindex, addr); ./net/ipv6/addrconf.c-2918- else ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, ifindex, addr); ./net/ipv6/addrconf.c-2920- release_sock(sk); Should be lock_sock? > > Please Jason or experts from netdev comment on this. > > Thanks, > > Zhu Yanjun > > > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_drop; > >> + > >> + return 0; > >> - return dev_mc_add(rxe->ndev, ll_addr); > >> +err_drop: > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> +err_out: > >> + return err; > >> } > >> -/** > >> - * rxe_mcast_del - delete multicast address from rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add(struct rxe_mcg *mcg) > >> { > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_add6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + rtnl_lock(); > >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> + > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_leave; > >> + > >> + return 0; > >> + > >> +err_leave: > >> + rtnl_lock(); > >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> +err_out: > >> + return err; > >> +} > >> + > >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > >> +{ > >> + struct sock *sk = recv_sockets.sk6->sk; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + int err, err2; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > >> + (struct in6_addr *)mgid); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + > >> + return err ?: err2; > >> +} > >> + > >> +static int rxe_mcast_del(struct rxe_mcg *mcg) > >> +{ > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err, err2; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_del6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + rtnl_lock(); > >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> - return dev_mc_del(rxe->ndev, ll_addr); > >> + return err ?: err2; > >> } > >> /** > >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, > >> union ib_gid *mgid, > >> { > >> kref_init(&mcg->ref_cnt); > >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > >> INIT_LIST_HEAD(&mcg->qp_list); > >> mcg->rxe = rxe; > >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct > >> rxe_dev *rxe, union ib_gid *mgid) > >> spin_unlock_bh(&rxe->mcg_lock); > >> /* add mcast address outside of lock */ > >> - err = rxe_mcast_add(rxe, mgid); > >> + err = rxe_mcast_add(mcg); > >> if (!err) > >> return mcg; > >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >> { > >> /* delete mcast address outside of lock */ > >> - rxe_mcast_del(mcg->rxe, &mcg->mgid); > >> + rxe_mcast_del(mcg); > >> spin_lock_bh(&mcg->rxe->mcg_lock); > >> __rxe_destroy_mcg(mcg); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c > >> b/drivers/infiniband/sw/rxe/rxe_net.c > >> index 58c3f3759bf0..b481f8da2002 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c > >> @@ -18,7 +18,7 @@ > >> #include "rxe_net.h" > >> #include "rxe_loc.h" > >> -static struct rxe_recv_sockets recv_sockets; > >> +struct rxe_recv_sockets recv_sockets; > >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > >> struct net_device *ndev, > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h > >> b/drivers/infiniband/sw/rxe/rxe_net.h > >> index 45d80d00f86b..89cee7d5340f 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h > >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > >> struct socket *sk4; > >> struct socket *sk6; > >> }; > >> +extern struct rxe_recv_sockets recv_sockets; > >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> index ccb9d19ffe8a..7be9e6232dd9 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> @@ -352,6 +352,7 @@ struct rxe_mcg { > >> atomic_t qp_num; > >> u32 qkey; > >> u16 pkey; > >> + bool is_ipv6; > >> }; > >> struct rxe_mca { > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address 2023-12-07 1:47 ` Rain River @ 2023-12-07 19:07 ` Bob Pearson 2023-12-08 1:24 ` Greg Sword 0 siblings, 1 reply; 9+ messages in thread From: Bob Pearson @ 2023-12-07 19:07 UTC (permalink / raw) To: Rain River, Zhu Yanjun; +Cc: jgg, linux-rdma, dsahern, davem, netdev On 12/6/23 19:47, Rain River wrote: > On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: >> >> >> 在 2023/12/5 13:55, Zhu Yanjun 写道: >>> Add David S. Miller and David Ahern. >>> >>> They are the maintainers in netdev and very familiar with mcast. >>> >>> Zhu Yanjun >>> >>> 在 2023/12/5 8:26, Bob Pearson 写道: >>>> Currently the rdma_rxe driver does not receive mcast packets at all. >>>> >>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister >>>> the IP mcast address. This is required for mcast traffic to reach the >>>> rxe driver when coming from an external source. >>>> >>>> Fixes: 8700e3e7c485 ("Soft RoCE driver") >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >>>> --- >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- >>>> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- >>>> drivers/infiniband/sw/rxe/rxe_net.h | 1 + >>>> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + >>>> 4 files changed, 102 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> index 86cc2e18a7fd..54735d07cee5 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> @@ -19,38 +19,116 @@ >>>> * mcast packets in the rxe receive path. >>>> */ >>>> +#include <linux/igmp.h> >>>> + >>>> #include "rxe.h" >>>> -/** >>>> - * rxe_mcast_add - add multicast address to rxe device >>>> - * @rxe: rxe device object >>>> - * @mgid: multicast address as a gid >>>> - * >>>> - * Returns 0 on success else an error >>>> - */ >>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> + struct in6_addr *addr6 = (struct in6_addr *)mgid; >>>> + struct sock *sk = recv_sockets.sk6->sk; >>>> unsigned char ll_addr[ETH_ALEN]; >>>> + int err; >>>> + >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); >> >> >> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. > > ./net/ipv6/addrconf.c-2915- lock_sock(sk); > ./net/ipv6/addrconf.c-2916- if (join) > ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, > ifindex, addr); > ./net/ipv6/addrconf.c-2918- else > ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, > ifindex, addr); > ./net/ipv6/addrconf.c-2920- release_sock(sk); > > Should be lock_sock? It works as well as spin_lock_bh() in preventing the RCU splat and looks like the preferred way. I'll make this change. Bob > >> >> Please Jason or experts from netdev comment on this. >> >> Thanks, >> >> Zhu Yanjun >> >> >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> + if (err && err != -EADDRINUSE) >>>> + goto err_out; >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >>>> + err = dev_mc_add(rxe->ndev, ll_addr); >>>> + if (err) >>>> + goto err_drop; >>>> + >>>> + return 0; >>>> - return dev_mc_add(rxe->ndev, ll_addr); >>>> +err_drop: >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> +err_out: >>>> + return err; >>>> } >>>> -/** >>>> - * rxe_mcast_del - delete multicast address from rxe device >>>> - * @rxe: rxe device object >>>> - * @mgid: multicast address as a gid >>>> - * >>>> - * Returns 0 on success else an error >>>> - */ >>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +static int rxe_mcast_add(struct rxe_mcg *mcg) >>>> { >>>> + struct rxe_dev *rxe = mcg->rxe; >>>> + union ib_gid *mgid = &mcg->mgid; >>>> unsigned char ll_addr[ETH_ALEN]; >>>> + struct ip_mreqn imr = {}; >>>> + int err; >>>> + >>>> + if (mcg->is_ipv6) >>>> + return rxe_mcast_add6(rxe, mgid); >>>> + >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >>>> + imr.imr_ifindex = rxe->ndev->ifindex; >>>> + rtnl_lock(); >>>> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> + if (err && err != -EADDRINUSE) >>>> + goto err_out; >>>> + >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >>>> + err = dev_mc_add(rxe->ndev, ll_addr); >>>> + if (err) >>>> + goto err_leave; >>>> + >>>> + return 0; >>>> + >>>> +err_leave: >>>> + rtnl_lock(); >>>> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> +err_out: >>>> + return err; >>>> +} >>>> + >>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) >>>> +{ >>>> + struct sock *sk = recv_sockets.sk6->sk; >>>> + unsigned char ll_addr[ETH_ALEN]; >>>> + int err, err2; >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); >>>> + err = dev_mc_del(rxe->ndev, ll_addr); >>>> + >>>> + spin_lock_bh(&sk->sk_lock.slock); >>>> + rtnl_lock(); >>>> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, >>>> + (struct in6_addr *)mgid); >>>> + rtnl_unlock(); >>>> + spin_unlock_bh(&sk->sk_lock.slock); >>>> + >>>> + return err ?: err2; >>>> +} >>>> + >>>> +static int rxe_mcast_del(struct rxe_mcg *mcg) >>>> +{ >>>> + struct rxe_dev *rxe = mcg->rxe; >>>> + union ib_gid *mgid = &mcg->mgid; >>>> + unsigned char ll_addr[ETH_ALEN]; >>>> + struct ip_mreqn imr = {}; >>>> + int err, err2; >>>> + >>>> + if (mcg->is_ipv6) >>>> + return rxe_mcast_del6(rxe, mgid); >>>> + >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); >>>> + imr.imr_ifindex = rxe->ndev->ifindex; >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); >>>> + err = dev_mc_del(rxe->ndev, ll_addr); >>>> + >>>> + rtnl_lock(); >>>> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); >>>> + rtnl_unlock(); >>>> - return dev_mc_del(rxe->ndev, ll_addr); >>>> + return err ?: err2; >>>> } >>>> /** >>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, >>>> union ib_gid *mgid, >>>> { >>>> kref_init(&mcg->ref_cnt); >>>> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); >>>> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); >>>> INIT_LIST_HEAD(&mcg->qp_list); >>>> mcg->rxe = rxe; >>>> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct >>>> rxe_dev *rxe, union ib_gid *mgid) >>>> spin_unlock_bh(&rxe->mcg_lock); >>>> /* add mcast address outside of lock */ >>>> - err = rxe_mcast_add(rxe, mgid); >>>> + err = rxe_mcast_add(mcg); >>>> if (!err) >>>> return mcg; >>>> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> { >>>> /* delete mcast address outside of lock */ >>>> - rxe_mcast_del(mcg->rxe, &mcg->mgid); >>>> + rxe_mcast_del(mcg); >>>> spin_lock_bh(&mcg->rxe->mcg_lock); >>>> __rxe_destroy_mcg(mcg); >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c >>>> b/drivers/infiniband/sw/rxe/rxe_net.c >>>> index 58c3f3759bf0..b481f8da2002 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c >>>> @@ -18,7 +18,7 @@ >>>> #include "rxe_net.h" >>>> #include "rxe_loc.h" >>>> -static struct rxe_recv_sockets recv_sockets; >>>> +struct rxe_recv_sockets recv_sockets; >>>> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, >>>> struct net_device *ndev, >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h >>>> b/drivers/infiniband/sw/rxe/rxe_net.h >>>> index 45d80d00f86b..89cee7d5340f 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h >>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { >>>> struct socket *sk4; >>>> struct socket *sk6; >>>> }; >>>> +extern struct rxe_recv_sockets recv_sockets; >>>> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> index ccb9d19ffe8a..7be9e6232dd9 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >>>> @@ -352,6 +352,7 @@ struct rxe_mcg { >>>> atomic_t qp_num; >>>> u32 qkey; >>>> u16 pkey; >>>> + bool is_ipv6; >>>> }; >>>> struct rxe_mca { >> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address 2023-12-07 19:07 ` Bob Pearson @ 2023-12-08 1:24 ` Greg Sword 0 siblings, 0 replies; 9+ messages in thread From: Greg Sword @ 2023-12-08 1:24 UTC (permalink / raw) To: Bob Pearson Cc: Rain River, Zhu Yanjun, jgg, linux-rdma, dsahern, davem, netdev On Fri, Dec 8, 2023 at 3:07 AM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > > > On 12/6/23 19:47, Rain River wrote: > > On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: > >> > >> > >> 在 2023/12/5 13:55, Zhu Yanjun 写道: > >>> Add David S. Miller and David Ahern. > >>> > >>> They are the maintainers in netdev and very familiar with mcast. > >>> > >>> Zhu Yanjun > >>> > >>> 在 2023/12/5 8:26, Bob Pearson 写道: > >>>> Currently the rdma_rxe driver does not receive mcast packets at all. > >>>> > >>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > >>>> the IP mcast address. This is required for mcast traffic to reach the > >>>> rxe driver when coming from an external source. > >>>> > >>>> Fixes: 8700e3e7c485 ("Soft RoCE driver") > >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >>>> --- > >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > >>>> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > >>>> drivers/infiniband/sw/rxe/rxe_net.h | 1 + > >>>> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > >>>> 4 files changed, 102 insertions(+), 21 deletions(-) > >>>> > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> index 86cc2e18a7fd..54735d07cee5 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> @@ -19,38 +19,116 @@ > >>>> * mcast packets in the rxe receive path. > >>>> */ > >>>> +#include <linux/igmp.h> > >>>> + > >>>> #include "rxe.h" > >>>> -/** > >>>> - * rxe_mcast_add - add multicast address to rxe device > >>>> - * @rxe: rxe device object > >>>> - * @mgid: multicast address as a gid > >>>> - * > >>>> - * Returns 0 on success else an error > >>>> - */ > >>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> + struct in6_addr *addr6 = (struct in6_addr *)mgid; > >>>> + struct sock *sk = recv_sockets.sk6->sk; > >>>> unsigned char ll_addr[ETH_ALEN]; > >>>> + int err; > >>>> + > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > >> > >> > >> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. > > > > ./net/ipv6/addrconf.c-2915- lock_sock(sk); > > ./net/ipv6/addrconf.c-2916- if (join) > > ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, > > ifindex, addr); > > ./net/ipv6/addrconf.c-2918- else > > ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, > > ifindex, addr); > > ./net/ipv6/addrconf.c-2920- release_sock(sk); > > > > Should be lock_sock? > > It works as well as spin_lock_bh() in preventing the RCU splat and > looks like the preferred way. I'll make this change. This is the implementation of lock_sock. lock_sock has not only spin_lock_bh, but also other protections for sk. You should use lock_sock, instead of spin_lock_bh. void lock_sock_nested(struct sock *sk, int subclass) { /* The sk_lock has mutex_lock() semantics here. */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sock_owned_by_user_nocheck(sk)) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); > > Bob > > > >> > >> Please Jason or experts from netdev comment on this. > >> > >> Thanks, > >> > >> Zhu Yanjun > >> > >> > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> + if (err && err != -EADDRINUSE) > >>>> + goto err_out; > >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >>>> + err = dev_mc_add(rxe->ndev, ll_addr); > >>>> + if (err) > >>>> + goto err_drop; > >>>> + > >>>> + return 0; > >>>> - return dev_mc_add(rxe->ndev, ll_addr); > >>>> +err_drop: > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> +err_out: > >>>> + return err; > >>>> } > >>>> -/** > >>>> - * rxe_mcast_del - delete multicast address from rxe device > >>>> - * @rxe: rxe device object > >>>> - * @mgid: multicast address as a gid > >>>> - * > >>>> - * Returns 0 on success else an error > >>>> - */ > >>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +static int rxe_mcast_add(struct rxe_mcg *mcg) > >>>> { > >>>> + struct rxe_dev *rxe = mcg->rxe; > >>>> + union ib_gid *mgid = &mcg->mgid; > >>>> unsigned char ll_addr[ETH_ALEN]; > >>>> + struct ip_mreqn imr = {}; > >>>> + int err; > >>>> + > >>>> + if (mcg->is_ipv6) > >>>> + return rxe_mcast_add6(rxe, mgid); > >>>> + > >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >>>> + imr.imr_ifindex = rxe->ndev->ifindex; > >>>> + rtnl_lock(); > >>>> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> + if (err && err != -EADDRINUSE) > >>>> + goto err_out; > >>>> + > >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >>>> + err = dev_mc_add(rxe->ndev, ll_addr); > >>>> + if (err) > >>>> + goto err_leave; > >>>> + > >>>> + return 0; > >>>> + > >>>> +err_leave: > >>>> + rtnl_lock(); > >>>> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> +err_out: > >>>> + return err; > >>>> +} > >>>> + > >>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> +{ > >>>> + struct sock *sk = recv_sockets.sk6->sk; > >>>> + unsigned char ll_addr[ETH_ALEN]; > >>>> + int err, err2; > >>>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >>>> + err = dev_mc_del(rxe->ndev, ll_addr); > >>>> + > >>>> + spin_lock_bh(&sk->sk_lock.slock); > >>>> + rtnl_lock(); > >>>> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > >>>> + (struct in6_addr *)mgid); > >>>> + rtnl_unlock(); > >>>> + spin_unlock_bh(&sk->sk_lock.slock); > >>>> + > >>>> + return err ?: err2; > >>>> +} > >>>> + > >>>> +static int rxe_mcast_del(struct rxe_mcg *mcg) > >>>> +{ > >>>> + struct rxe_dev *rxe = mcg->rxe; > >>>> + union ib_gid *mgid = &mcg->mgid; > >>>> + unsigned char ll_addr[ETH_ALEN]; > >>>> + struct ip_mreqn imr = {}; > >>>> + int err, err2; > >>>> + > >>>> + if (mcg->is_ipv6) > >>>> + return rxe_mcast_del6(rxe, mgid); > >>>> + > >>>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >>>> + imr.imr_ifindex = rxe->ndev->ifindex; > >>>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >>>> + err = dev_mc_del(rxe->ndev, ll_addr); > >>>> + > >>>> + rtnl_lock(); > >>>> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >>>> + rtnl_unlock(); > >>>> - return dev_mc_del(rxe->ndev, ll_addr); > >>>> + return err ?: err2; > >>>> } > >>>> /** > >>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, > >>>> union ib_gid *mgid, > >>>> { > >>>> kref_init(&mcg->ref_cnt); > >>>> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > >>>> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > >>>> INIT_LIST_HEAD(&mcg->qp_list); > >>>> mcg->rxe = rxe; > >>>> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct > >>>> rxe_dev *rxe, union ib_gid *mgid) > >>>> spin_unlock_bh(&rxe->mcg_lock); > >>>> /* add mcast address outside of lock */ > >>>> - err = rxe_mcast_add(rxe, mgid); > >>>> + err = rxe_mcast_add(mcg); > >>>> if (!err) > >>>> return mcg; > >>>> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> { > >>>> /* delete mcast address outside of lock */ > >>>> - rxe_mcast_del(mcg->rxe, &mcg->mgid); > >>>> + rxe_mcast_del(mcg); > >>>> spin_lock_bh(&mcg->rxe->mcg_lock); > >>>> __rxe_destroy_mcg(mcg); > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c > >>>> b/drivers/infiniband/sw/rxe/rxe_net.c > >>>> index 58c3f3759bf0..b481f8da2002 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c > >>>> @@ -18,7 +18,7 @@ > >>>> #include "rxe_net.h" > >>>> #include "rxe_loc.h" > >>>> -static struct rxe_recv_sockets recv_sockets; > >>>> +struct rxe_recv_sockets recv_sockets; > >>>> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > >>>> struct net_device *ndev, > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h > >>>> b/drivers/infiniband/sw/rxe/rxe_net.h > >>>> index 45d80d00f86b..89cee7d5340f 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h > >>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > >>>> struct socket *sk4; > >>>> struct socket *sk6; > >>>> }; > >>>> +extern struct rxe_recv_sockets recv_sockets; > >>>> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> index ccb9d19ffe8a..7be9e6232dd9 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > >>>> @@ -352,6 +352,7 @@ struct rxe_mcg { > >>>> atomic_t qp_num; > >>>> u32 qkey; > >>>> u16 pkey; > >>>> + bool is_ipv6; > >>>> }; > >>>> struct rxe_mca { > >> > ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20231205002613.10219-2-rpearsonhpe@gmail.com>]
* Re: [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock [not found] ` <20231205002613.10219-2-rpearsonhpe@gmail.com> @ 2023-12-05 5:56 ` Zhu Yanjun 0 siblings, 0 replies; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 5:56 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Change locking of read side operations of the mcast group > red-black tree to use rcu read locking. This will allow changing > the mcast lock in the next patch to be a mutex without > breaking rxe_recv.c which runs in an atomic state. It is also a > better implementation than the current use of a spin-lock per > rdma device since receiving mcast packets will be much more > common than registering/deregistering mcast groups. > > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_mcast.c | 59 +++++++++------------------ > drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > 2 files changed, 21 insertions(+), 39 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index 54735d07cee5..44948f9cb02b 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -151,13 +151,18 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg) > tmp = rb_entry(node, struct rxe_mcg, node); > > cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); > - if (cmp > 0) > + if (cmp > 0) { > link = &(*link)->rb_left; > - else > + } else if (cmp < 0) { > link = &(*link)->rb_right; > + } else { > + /* we must delete the old mcg before adding one */ > + WARN_ON_ONCE(1); > + return; > + } > } > > - rb_link_node(&mcg->node, node, link); > + rb_link_node_rcu(&mcg->node, node, link); > rb_insert_color(&mcg->node, tree); > } > > @@ -172,15 +177,11 @@ static void __rxe_remove_mcg(struct rxe_mcg *mcg) > rb_erase(&mcg->node, &mcg->rxe->mcg_tree); > } > > -/** > - * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock > - * @rxe: rxe device object > - * @mgid: multicast IP address > - * > - * Context: caller must hold rxe->mcg_lock > - * Returns: mcg on success and takes a ref to mcg else NULL > +/* > + * Lookup mgid in the multicast group red-black tree and try to > + * get a ref on it. Return mcg on success else NULL. > */ > -static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, > union ib_gid *mgid) > { > struct rb_root *tree = &rxe->mcg_tree; > @@ -188,7 +189,8 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > struct rb_node *node; > int cmp; > > - node = tree->rb_node; > + rcu_read_lock(); > + node = rcu_dereference_raw(tree->rb_node); > > while (node) { > mcg = rb_entry(node, struct rxe_mcg, node); > @@ -196,35 +198,14 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); > > if (cmp > 0) > - node = node->rb_left; > + node = rcu_dereference_raw(node->rb_left); > else if (cmp < 0) > - node = node->rb_right; > + node = rcu_dereference_raw(node->rb_right); > else > break; > } > - > - if (node) { > - kref_get(&mcg->ref_cnt); > - return mcg; > - } > - > - return NULL; > -} > - > -/** > - * rxe_lookup_mcg - lookup up mcg in red-back tree > - * @rxe: rxe device object > - * @mgid: multicast IP address > - * > - * Returns: mcg if found else NULL > - */ > -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > -{ > - struct rxe_mcg *mcg; > - > - spin_lock_bh(&rxe->mcg_lock); > - mcg = __rxe_lookup_mcg(rxe, mgid); > - spin_unlock_bh(&rxe->mcg_lock); > + mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL; > + rcu_read_unlock(); > > return mcg; > } > @@ -292,7 +273,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > > spin_lock_bh(&rxe->mcg_lock); > /* re-check to see if someone else just added it */ > - tmp = __rxe_lookup_mcg(rxe, mgid); > + tmp = rxe_lookup_mcg(rxe, mgid); > if (tmp) { > spin_unlock_bh(&rxe->mcg_lock); > atomic_dec(&rxe->mcg_num); > @@ -322,7 +303,7 @@ void rxe_cleanup_mcg(struct kref *kref) > { > struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); > > - kfree(mcg); > + kfree_rcu(mcg, rcu); > } > > /** > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index 7be9e6232dd9..8058e5039322 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -345,6 +345,7 @@ struct rxe_mw { > > struct rxe_mcg { > struct rb_node node; > + struct rcu_head rcu; > struct kref ref_cnt; > struct rxe_dev *rxe; > struct list_head qp_list; ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20231205002613.10219-3-rpearsonhpe@gmail.com>]
* Re: [PATCH for-next v5 5/7] RDMA/rxe: Split multicast lock [not found] ` <20231205002613.10219-3-rpearsonhpe@gmail.com> @ 2023-12-05 5:57 ` Zhu Yanjun 0 siblings, 0 replies; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 5:57 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Split rxe->mcg_lock into two locks. One to protect mcg->qp_list > and one to protect rxe->mcg_tree (red-black tree) write side > operations and provide serialization between rxe_attach_mcast > and rxe_detach_mcast. > > Make the qp_list lock a spin_lock_irqsave lock and move to the > mcg struct. It protects the qp_list from simultaneous access > from rxe_mcast.c and rxe_recv.c when processing incoming multi- > cast packets. In theory some ethernet driver could bypass NAPI > so an irq lock is better than a bh lock. > > Make the mcg_tree lock a mutex since the attach/detach APIs are > not called in atomic context. This allows some significant cleanup > since we can call kzalloc while holding the mutex so some recheck > code can be eliminated. > > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe.c | 2 +- > drivers/infiniband/sw/rxe/rxe_mcast.c | 254 ++++++++++---------------- > drivers/infiniband/sw/rxe/rxe_recv.c | 5 +- > drivers/infiniband/sw/rxe/rxe_verbs.h | 3 +- > 4 files changed, 105 insertions(+), 159 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 54c723a6edda..147cb16e937d 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -142,7 +142,7 @@ static void rxe_init(struct rxe_dev *rxe) > INIT_LIST_HEAD(&rxe->pending_mmaps); > > /* init multicast support */ > - spin_lock_init(&rxe->mcg_lock); > + mutex_init(&rxe->mcg_mutex); > rxe->mcg_tree = RB_ROOT; > > mutex_init(&rxe->usdev_lock); > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index 44948f9cb02b..ac8da0bc8428 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -135,7 +135,7 @@ static int rxe_mcast_del(struct rxe_mcg *mcg) > * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) > * @mcg: mcg object with an embedded red-black tree node > * > - * Context: caller must hold a reference to mcg and rxe->mcg_lock and > + * Context: caller must hold a reference to mcg and rxe->mcg_mutex and > * is responsible to avoid adding the same mcg twice to the tree. > */ > static void __rxe_insert_mcg(struct rxe_mcg *mcg) > @@ -170,7 +170,7 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg) > * __rxe_remove_mcg - remove an mcg from red-black tree holding lock > * @mcg: mcast group object with an embedded red-black tree node > * > - * Context: caller must hold a reference to mcg and rxe->mcg_lock > + * Context: caller must hold a reference to mcg and rxe->mcg_mutex > */ > static void __rxe_remove_mcg(struct rxe_mcg *mcg) > { > @@ -210,34 +210,6 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, > return mcg; > } > > -/** > - * __rxe_init_mcg - initialize a new mcg > - * @rxe: rxe device > - * @mgid: multicast address as a gid > - * @mcg: new mcg object > - * > - * Context: caller should hold rxe->mcg lock > - */ > -static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > - struct rxe_mcg *mcg) > -{ > - kref_init(&mcg->ref_cnt); > - memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > - mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > - INIT_LIST_HEAD(&mcg->qp_list); > - mcg->rxe = rxe; > - > - /* caller holds a ref on mcg but that will be > - * dropped when mcg goes out of scope. We need to take a ref > - * on the pointer that will be saved in the red-black tree > - * by __rxe_insert_mcg and used to lookup mcg from mgid later. > - * Inserting mcg makes it visible to outside so this should > - * be done last after the object is ready. > - */ > - kref_get(&mcg->ref_cnt); > - __rxe_insert_mcg(mcg); > -} > - > /** > * rxe_get_mcg - lookup or allocate a mcg > * @rxe: rxe device object > @@ -247,51 +219,48 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > */ > static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > { > - struct rxe_mcg *mcg, *tmp; > + struct rxe_mcg *mcg; > int err; > > - if (rxe->attr.max_mcast_grp == 0) > - return ERR_PTR(-EINVAL); > - > - /* check to see if mcg already exists */ > + mutex_lock(&rxe->mcg_mutex); > mcg = rxe_lookup_mcg(rxe, mgid); > if (mcg) > - return mcg; > + goto out; /* nothing to do */ > > - /* check to see if we have reached limit */ > if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { > err = -ENOMEM; > goto err_dec; > } > > - /* speculative alloc of new mcg */ > mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); > if (!mcg) { > err = -ENOMEM; > goto err_dec; > } > > - spin_lock_bh(&rxe->mcg_lock); > - /* re-check to see if someone else just added it */ > - tmp = rxe_lookup_mcg(rxe, mgid); > - if (tmp) { > - spin_unlock_bh(&rxe->mcg_lock); > - atomic_dec(&rxe->mcg_num); > - kfree(mcg); > - return tmp; > - } > - > - __rxe_init_mcg(rxe, mgid, mcg); > - spin_unlock_bh(&rxe->mcg_lock); > + memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > + mcg->rxe = rxe; > + kref_init(&mcg->ref_cnt); > + INIT_LIST_HEAD(&mcg->qp_list); > + spin_lock_init(&mcg->lock); > + kref_get(&mcg->ref_cnt); > + __rxe_insert_mcg(mcg); > > - /* add mcast address outside of lock */ > err = rxe_mcast_add(mcg); > - if (!err) > - return mcg; > + if (err) > + goto err_free; > + > +out: > + mutex_unlock(&rxe->mcg_mutex); > + return mcg; > > +err_free: > + __rxe_remove_mcg(mcg); > kfree(mcg); > err_dec: > atomic_dec(&rxe->mcg_num); > + mutex_unlock(&rxe->mcg_mutex); > return ERR_PTR(err); > } > > @@ -307,10 +276,10 @@ void rxe_cleanup_mcg(struct kref *kref) > } > > /** > - * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock > + * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_mutex > * @mcg: the mcg object > * > - * Context: caller is holding rxe->mcg_lock > + * Context: caller is holding rxe->mcg_mutex > * no qp's are attached to mcg > */ > static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > @@ -335,151 +304,123 @@ static void rxe_destroy_mcg(struct rxe_mcg *mcg) > /* delete mcast address outside of lock */ > rxe_mcast_del(mcg); > > - spin_lock_bh(&mcg->rxe->mcg_lock); > + mutex_lock(&mcg->rxe->mcg_mutex); > __rxe_destroy_mcg(mcg); > - spin_unlock_bh(&mcg->rxe->mcg_lock); > + mutex_unlock(&mcg->rxe->mcg_mutex); > } > > /** > - * __rxe_init_mca - initialize a new mca holding lock > + * rxe_attach_mcg - attach qp to mcg if not already attached > * @qp: qp object > * @mcg: mcg object > - * @mca: empty space for new mca > - * > - * Context: caller must hold references on qp and mcg, rxe->mcg_lock > - * and pass memory for new mca > * > * Returns: 0 on success else an error > */ > -static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, > - struct rxe_mca *mca) > +static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > { > - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); > - int n; > + struct rxe_dev *rxe = mcg->rxe; > + struct rxe_mca *mca; > + unsigned long flags; > + int err; > > - n = atomic_inc_return(&rxe->mcg_attach); > - if (n > rxe->attr.max_total_mcast_qp_attach) { > - atomic_dec(&rxe->mcg_attach); > - return -ENOMEM; > + mutex_lock(&rxe->mcg_mutex); > + spin_lock_irqsave(&mcg->lock, flags); > + list_for_each_entry(mca, &mcg->qp_list, qp_list) { > + if (mca->qp == qp) { > + spin_unlock_irqrestore(&mcg->lock, flags); > + goto out; /* nothing to do */ > + } > } > + spin_unlock_irqrestore(&mcg->lock, flags); > > - n = atomic_inc_return(&mcg->qp_num); > - if (n > rxe->attr.max_mcast_qp_attach) { > - atomic_dec(&mcg->qp_num); > - atomic_dec(&rxe->mcg_attach); > - return -ENOMEM; > + if (atomic_inc_return(&rxe->mcg_attach) > > + rxe->attr.max_total_mcast_qp_attach) { > + err = -EINVAL; > + goto err_dec_attach; > } > > - atomic_inc(&qp->mcg_num); > + if (atomic_inc_return(&mcg->qp_num) > > + rxe->attr.max_mcast_qp_attach) { > + err = -EINVAL; > + goto err_dec_qp_num; > + } > + > + mca = kzalloc(sizeof(*mca), GFP_KERNEL); > + if (!mca) { > + err = -ENOMEM; > + goto err_dec_qp_num; > + } > > + atomic_inc(&qp->mcg_num); > rxe_get(qp); > mca->qp = qp; > > + spin_lock_irqsave(&mcg->lock, flags); > list_add_tail(&mca->qp_list, &mcg->qp_list); > - > + spin_unlock_irqrestore(&mcg->lock, flags); > +out: > + mutex_unlock(&rxe->mcg_mutex); > return 0; > + > +err_dec_qp_num: > + atomic_dec(&mcg->qp_num); > +err_dec_attach: > + atomic_dec(&rxe->mcg_attach); > + mutex_unlock(&rxe->mcg_mutex); > + return err; > } > > /** > - * rxe_attach_mcg - attach qp to mcg if not already attached > - * @qp: qp object > + * rxe_detach_mcg - detach qp from mcg > * @mcg: mcg object > + * @qp: qp object > * > - * Context: caller must hold reference on qp and mcg. > - * Returns: 0 on success else an error > + * Returns: 0 on success else an error if qp is not attached. > */ > -static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > +static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > { > struct rxe_dev *rxe = mcg->rxe; > - struct rxe_mca *mca, *tmp; > - int err; > + struct rxe_mca *mca; > + unsigned long flags; > + int err = 0; > > - /* check to see if the qp is already a member of the group */ > - spin_lock_bh(&rxe->mcg_lock); > + mutex_lock(&rxe->mcg_mutex); > + spin_lock_irqsave(&mcg->lock, flags); > list_for_each_entry(mca, &mcg->qp_list, qp_list) { > if (mca->qp == qp) { > - spin_unlock_bh(&rxe->mcg_lock); > - return 0; > + spin_unlock_irqrestore(&mcg->lock, flags); > + goto found; > } > } > - spin_unlock_bh(&rxe->mcg_lock); > + spin_unlock_irqrestore(&mcg->lock, flags); > > - /* speculative alloc new mca without using GFP_ATOMIC */ > - mca = kzalloc(sizeof(*mca), GFP_KERNEL); > - if (!mca) > - return -ENOMEM; > - > - spin_lock_bh(&rxe->mcg_lock); > - /* re-check to see if someone else just attached qp */ > - list_for_each_entry(tmp, &mcg->qp_list, qp_list) { > - if (tmp->qp == qp) { > - kfree(mca); > - err = 0; > - goto out; > - } > - } > - > - err = __rxe_init_mca(qp, mcg, mca); > - if (err) > - kfree(mca); > -out: > - spin_unlock_bh(&rxe->mcg_lock); > - return err; > -} > + /* we didn't find the qp on the list */ > + err = -EINVAL; > + goto err_out; > > -/** > - * __rxe_cleanup_mca - cleanup mca object holding lock > - * @mca: mca object > - * @mcg: mcg object > - * > - * Context: caller must hold a reference to mcg and rxe->mcg_lock > - */ > -static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) > -{ > +found: > + spin_lock_irqsave(&mcg->lock, flags); > list_del(&mca->qp_list); > + spin_unlock_irqrestore(&mcg->lock, flags); > > atomic_dec(&mcg->qp_num); > atomic_dec(&mcg->rxe->mcg_attach); > atomic_dec(&mca->qp->mcg_num); > rxe_put(mca->qp); > - > kfree(mca); > -} > - > -/** > - * rxe_detach_mcg - detach qp from mcg > - * @mcg: mcg object > - * @qp: qp object > - * > - * Returns: 0 on success else an error if qp is not attached. > - */ > -static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > -{ > - struct rxe_dev *rxe = mcg->rxe; > - struct rxe_mca *mca, *tmp; > > - spin_lock_bh(&rxe->mcg_lock); > - list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { > - if (mca->qp == qp) { > - __rxe_cleanup_mca(mca, mcg); > - > - /* if the number of qp's attached to the > - * mcast group falls to zero go ahead and > - * tear it down. This will not free the > - * object since we are still holding a ref > - * from the caller > - */ > - if (atomic_read(&mcg->qp_num) <= 0) > - __rxe_destroy_mcg(mcg); > - > - spin_unlock_bh(&rxe->mcg_lock); > - return 0; > - } > - } > + /* if the number of qp's attached to the > + * mcast group falls to zero go ahead and > + * tear it down. This will not free the > + * object since we are still holding a ref > + * from the caller > + */ > + if (atomic_read(&mcg->qp_num) <= 0) > + __rxe_destroy_mcg(mcg); > > - /* we didn't find the qp on the list */ > - spin_unlock_bh(&rxe->mcg_lock); > - return -EINVAL; > +err_out: > + mutex_unlock(&rxe->mcg_mutex); > + return err; > } > > /** > @@ -497,6 +438,9 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) > struct rxe_qp *qp = to_rqp(ibqp); > struct rxe_mcg *mcg; > > + if (rxe->attr.max_mcast_grp == 0) > + return -EINVAL; > + > /* takes a ref on mcg if successful */ > mcg = rxe_get_mcg(rxe, mgid); > if (IS_ERR(mcg)) > diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c > index 7153de0799fc..6cf0da958864 100644 > --- a/drivers/infiniband/sw/rxe/rxe_recv.c > +++ b/drivers/infiniband/sw/rxe/rxe_recv.c > @@ -194,6 +194,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > struct rxe_mca *mca; > struct rxe_qp *qp; > union ib_gid dgid; > + unsigned long flags; > int err; > > if (skb->protocol == htons(ETH_P_IP)) > @@ -207,7 +208,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > if (!mcg) > goto drop; /* mcast group not registered */ > > - spin_lock_bh(&rxe->mcg_lock); > + spin_lock_irqsave(&mcg->lock, flags); > > /* this is unreliable datagram service so we let > * failures to deliver a multicast packet to a > @@ -259,7 +260,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > } > } > > - spin_unlock_bh(&rxe->mcg_lock); > + spin_unlock_irqrestore(&mcg->lock, flags); > > kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index 8058e5039322..f21963dcb2c8 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -351,6 +351,7 @@ struct rxe_mcg { > struct list_head qp_list; > union ib_gid mgid; > atomic_t qp_num; > + spinlock_t lock; /* protect qp_list */ > u32 qkey; > u16 pkey; > bool is_ipv6; > @@ -390,7 +391,7 @@ struct rxe_dev { > struct rxe_pool mw_pool; > > /* multicast support */ > - spinlock_t mcg_lock; > + struct mutex mcg_mutex; > struct rb_root mcg_tree; > atomic_t mcg_num; > atomic_t mcg_attach; ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20231205002613.10219-4-rpearsonhpe@gmail.com>]
* Re: [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime [not found] ` <20231205002613.10219-4-rpearsonhpe@gmail.com> @ 2023-12-05 5:57 ` Zhu Yanjun 0 siblings, 0 replies; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 5:57 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Currently the rdma_rxe driver has two different and not really > compatible ways of managing the lifetime of an mcast group, > by ref counting the mcg struct and counting the number of > attached qp's. They are each doing part of the job of cleaning > up an mcg when the last qp is detached and are racy in the > process. This patch removes using the use of the number of > qp's. > > Fix up mcg reference counting so the ref count will drop > to zero correctly and move code from rxe_destroy_mcg to > rxe_cleanup_mcg since rxe_destroy is no longer needed. > > This set of fixes scrambles the code in rxe_mast.c and as > a result a lot of cleanup has been done as well. > > Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_loc.h | 2 +- > drivers/infiniband/sw/rxe/rxe_mcast.c | 170 +++++++------------------- > drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- > 3 files changed, 46 insertions(+), 128 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h > index 62b2b25903fc..0509ccdaa2f2 100644 > --- a/drivers/infiniband/sw/rxe/rxe_loc.h > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h > @@ -37,7 +37,7 @@ void rxe_cq_cleanup(struct rxe_pool_elem *elem); > struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); > int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); > int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); > -void rxe_cleanup_mcg(struct kref *kref); > +int rxe_put_mcg(struct rxe_mcg *mcg); > > /* rxe_mmap.c */ > struct rxe_mmap_info { > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index ac8da0bc8428..c2a28aed9d34 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -131,13 +131,31 @@ static int rxe_mcast_del(struct rxe_mcg *mcg) > return err ?: err2; > } > > -/** > - * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) > - * @mcg: mcg object with an embedded red-black tree node > - * > - * Context: caller must hold a reference to mcg and rxe->mcg_mutex and > - * is responsible to avoid adding the same mcg twice to the tree. > - */ > +static void __rxe_remove_mcg(struct rxe_mcg *mcg) > +{ > + rb_erase(&mcg->node, &mcg->rxe->mcg_tree); > +} > + > +static void rxe_cleanup_mcg(struct kref *kref) > +{ > + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); > + > + __rxe_remove_mcg(mcg); > + rxe_mcast_del(mcg); > + atomic_dec(&mcg->rxe->mcg_num); > + kfree_rcu(mcg, rcu); > +} > + > +static int rxe_get_mcg(struct rxe_mcg *mcg) > +{ > + return kref_get_unless_zero(&mcg->ref_cnt); > +} > + > +int rxe_put_mcg(struct rxe_mcg *mcg) > +{ > + return kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > +} > + > static void __rxe_insert_mcg(struct rxe_mcg *mcg) > { > struct rb_root *tree = &mcg->rxe->mcg_tree; > @@ -166,23 +184,11 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg) > rb_insert_color(&mcg->node, tree); > } > > -/** > - * __rxe_remove_mcg - remove an mcg from red-black tree holding lock > - * @mcg: mcast group object with an embedded red-black tree node > - * > - * Context: caller must hold a reference to mcg and rxe->mcg_mutex > - */ > -static void __rxe_remove_mcg(struct rxe_mcg *mcg) > -{ > - rb_erase(&mcg->node, &mcg->rxe->mcg_tree); > -} > - > /* > * Lookup mgid in the multicast group red-black tree and try to > * get a ref on it. Return mcg on success else NULL. > */ > -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, > - union ib_gid *mgid) > +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > { > struct rb_root *tree = &rxe->mcg_tree; > struct rxe_mcg *mcg; > @@ -204,20 +210,14 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, > else > break; > } > - mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL; > + mcg = (node && rxe_get_mcg(mcg)) ? mcg : NULL; > rcu_read_unlock(); > > return mcg; > } > > -/** > - * rxe_get_mcg - lookup or allocate a mcg > - * @rxe: rxe device object > - * @mgid: multicast IP address as a gid > - * > - * Returns: mcg on success else ERR_PTR(error) > - */ > -static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > +/* find an existing mcg or allocate a new one */ > +static struct rxe_mcg *rxe_alloc_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > { > struct rxe_mcg *mcg; > int err; > @@ -228,7 +228,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > goto out; /* nothing to do */ > > if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { > - err = -ENOMEM; > + err = -EINVAL; > goto err_dec; > } > > @@ -244,19 +244,17 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > kref_init(&mcg->ref_cnt); > INIT_LIST_HEAD(&mcg->qp_list); > spin_lock_init(&mcg->lock); > - kref_get(&mcg->ref_cnt); > - __rxe_insert_mcg(mcg); > > err = rxe_mcast_add(mcg); > if (err) > goto err_free; > > + __rxe_insert_mcg(mcg); > out: > mutex_unlock(&rxe->mcg_mutex); > return mcg; > > err_free: > - __rxe_remove_mcg(mcg); > kfree(mcg); > err_dec: > atomic_dec(&rxe->mcg_num); > @@ -264,64 +262,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > return ERR_PTR(err); > } > > -/** > - * rxe_cleanup_mcg - cleanup mcg for kref_put > - * @kref: struct kref embnedded in mcg > - */ > -void rxe_cleanup_mcg(struct kref *kref) > -{ > - struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); > - > - kfree_rcu(mcg, rcu); > -} > - > -/** > - * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_mutex > - * @mcg: the mcg object > - * > - * Context: caller is holding rxe->mcg_mutex > - * no qp's are attached to mcg > - */ > -static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > -{ > - struct rxe_dev *rxe = mcg->rxe; > - > - /* remove mcg from red-black tree then drop ref */ > - __rxe_remove_mcg(mcg); > - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > - > - atomic_dec(&rxe->mcg_num); > -} > - > -/** > - * rxe_destroy_mcg - destroy mcg object > - * @mcg: the mcg object > - * > - * Context: no qp's are attached to mcg > - */ > -static void rxe_destroy_mcg(struct rxe_mcg *mcg) > -{ > - /* delete mcast address outside of lock */ > - rxe_mcast_del(mcg); > - > - mutex_lock(&mcg->rxe->mcg_mutex); > - __rxe_destroy_mcg(mcg); > - mutex_unlock(&mcg->rxe->mcg_mutex); > -} > - > -/** > - * rxe_attach_mcg - attach qp to mcg if not already attached > - * @qp: qp object > - * @mcg: mcg object > - * > - * Returns: 0 on success else an error > - */ > -static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > +static int rxe_attach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg) > { > struct rxe_dev *rxe = mcg->rxe; > struct rxe_mca *mca; > unsigned long flags; > - int err; > + int err = 0; > > mutex_lock(&rxe->mcg_mutex); > spin_lock_irqsave(&mcg->lock, flags); > @@ -355,29 +301,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > rxe_get(qp); > mca->qp = qp; > > + rxe_get_mcg(mcg); > + > spin_lock_irqsave(&mcg->lock, flags); > list_add_tail(&mca->qp_list, &mcg->qp_list); > spin_unlock_irqrestore(&mcg->lock, flags); > -out: > - mutex_unlock(&rxe->mcg_mutex); > - return 0; > + goto out; > > err_dec_qp_num: > atomic_dec(&mcg->qp_num); > err_dec_attach: > atomic_dec(&rxe->mcg_attach); > +out: > + rxe_put_mcg(mcg); > mutex_unlock(&rxe->mcg_mutex); > return err; > } > > -/** > - * rxe_detach_mcg - detach qp from mcg > - * @mcg: mcg object > - * @qp: qp object > - * > - * Returns: 0 on success else an error if qp is not attached. > - */ > -static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > +static int rxe_detach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg) > { > struct rxe_dev *rxe = mcg->rxe; > struct rxe_mca *mca; > @@ -394,7 +335,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > } > spin_unlock_irqrestore(&mcg->lock, flags); > > - /* we didn't find the qp on the list */ > err = -EINVAL; > goto err_out; > > @@ -402,23 +342,15 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > spin_lock_irqsave(&mcg->lock, flags); > list_del(&mca->qp_list); > spin_unlock_irqrestore(&mcg->lock, flags); > + rxe_put_mcg(mcg); > > atomic_dec(&mcg->qp_num); > atomic_dec(&mcg->rxe->mcg_attach); > atomic_dec(&mca->qp->mcg_num); > rxe_put(mca->qp); > kfree(mca); > - > - /* if the number of qp's attached to the > - * mcast group falls to zero go ahead and > - * tear it down. This will not free the > - * object since we are still holding a ref > - * from the caller > - */ > - if (atomic_read(&mcg->qp_num) <= 0) > - __rxe_destroy_mcg(mcg); > - > err_out: > + rxe_put_mcg(mcg); > mutex_unlock(&rxe->mcg_mutex); > return err; > } > @@ -433,7 +365,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > */ > int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) > { > - int err; > struct rxe_dev *rxe = to_rdev(ibqp->device); > struct rxe_qp *qp = to_rqp(ibqp); > struct rxe_mcg *mcg; > @@ -441,20 +372,11 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) > if (rxe->attr.max_mcast_grp == 0) > return -EINVAL; > > - /* takes a ref on mcg if successful */ > - mcg = rxe_get_mcg(rxe, mgid); > + mcg = rxe_alloc_mcg(rxe, mgid); > if (IS_ERR(mcg)) > return PTR_ERR(mcg); > > - err = rxe_attach_mcg(mcg, qp); > - > - /* if we failed to attach the first qp to mcg tear it down */ > - if (atomic_read(&mcg->qp_num) == 0) > - rxe_destroy_mcg(mcg); > - > - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > - > - return err; > + return rxe_attach_mcg(qp, mcg); > } > > /** > @@ -470,14 +392,10 @@ int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) > struct rxe_dev *rxe = to_rdev(ibqp->device); > struct rxe_qp *qp = to_rqp(ibqp); > struct rxe_mcg *mcg; > - int err; > > mcg = rxe_lookup_mcg(rxe, mgid); > if (!mcg) > return -EINVAL; > > - err = rxe_detach_mcg(mcg, qp); > - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > - > - return err; > + return rxe_detach_mcg(qp, mcg); > } > diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c > index 6cf0da958864..e3ec3dfc57f4 100644 > --- a/drivers/infiniband/sw/rxe/rxe_recv.c > +++ b/drivers/infiniband/sw/rxe/rxe_recv.c > @@ -262,7 +262,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > > spin_unlock_irqrestore(&mcg->lock, flags); > > - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); > + rxe_put_mcg(mcg); > > if (likely(!skb)) > return; ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20231205002613.10219-5-rpearsonhpe@gmail.com>]
* Re: [PATCH for-next v5 7/7] RDMA/rxe: Add module parameters for mcast limits [not found] ` <20231205002613.10219-5-rpearsonhpe@gmail.com> @ 2023-12-05 5:58 ` Zhu Yanjun 0 siblings, 0 replies; 9+ messages in thread From: Zhu Yanjun @ 2023-12-05 5:58 UTC (permalink / raw) To: Bob Pearson, jgg, linux-rdma, dsahern, davem, netdev Add David S. Miller and David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Add module parameters for max_mcast_grp, max_mcast_qp_attach, > and tot_mcast_qp_attach to allow setting these parameters to > small values when the driver is loaded to support testing these > limits. > > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/Makefile | 3 ++- > drivers/infiniband/sw/rxe/rxe.c | 6 +++--- > drivers/infiniband/sw/rxe/rxe_param.c | 23 +++++++++++++++++++++++ > drivers/infiniband/sw/rxe/rxe_param.h | 4 ++++ > 4 files changed, 32 insertions(+), 4 deletions(-) > create mode 100644 drivers/infiniband/sw/rxe/rxe_param.c > > diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile > index 5395a581f4bb..b183924ea01d 100644 > --- a/drivers/infiniband/sw/rxe/Makefile > +++ b/drivers/infiniband/sw/rxe/Makefile > @@ -22,4 +22,5 @@ rdma_rxe-y := \ > rxe_mcast.o \ > rxe_task.o \ > rxe_net.o \ > - rxe_hw_counters.o > + rxe_hw_counters.o \ > + rxe_param.o > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 147cb16e937d..599fbfdeb426 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -59,9 +59,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe) > rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; > rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; > rxe->attr.atomic_cap = IB_ATOMIC_HCA; > - rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; > - rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; > - rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; > + rxe->attr.max_mcast_grp = rxe_max_mcast_grp; > + rxe->attr.max_mcast_qp_attach = rxe_max_mcast_qp_attach; > + rxe->attr.max_total_mcast_qp_attach = rxe_max_tot_mcast_qp_attach; > rxe->attr.max_ah = RXE_MAX_AH; > rxe->attr.max_srq = RXE_MAX_SRQ; > rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; > diff --git a/drivers/infiniband/sw/rxe/rxe_param.c b/drivers/infiniband/sw/rxe/rxe_param.c > new file mode 100644 > index 000000000000..27873e7de753 > --- /dev/null > +++ b/drivers/infiniband/sw/rxe/rxe_param.c > @@ -0,0 +1,23 @@ > +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > +/* > + * Copyright (c) 2023 Hewlett Packard Enterprise, Inc. All rights reserved. > + */ > + > +#include "rxe.h" > + > +int rxe_max_mcast_grp = RXE_MAX_MCAST_GRP; > +module_param_named(max_mcast_grp, rxe_max_mcast_grp, int, 0444); > +MODULE_PARM_DESC(max_mcast_grp, > + "Maximum number of multicast groups per device"); > + > +int rxe_max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; > +module_param_named(max_mcast_qp_attach, rxe_max_mcast_qp_attach, > + int, 0444); > +MODULE_PARM_DESC(max_mcast_qp_attach, > + "Maximum number of QPs attached to a multicast group"); > + > +int rxe_max_tot_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; > +module_param_named(max_tot_mcast_qp_attach, rxe_max_tot_mcast_qp_attach, > + int, 0444); > +MODULE_PARM_DESC(max_tot_mcast_qp_attach, > + "Maximum total number of QPs attached to multicast groups per device"); > diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h > index d2f57ead78ad..d6fe50f5f483 100644 > --- a/drivers/infiniband/sw/rxe/rxe_param.h > +++ b/drivers/infiniband/sw/rxe/rxe_param.h > @@ -125,6 +125,10 @@ enum rxe_device_param { > RXE_VENDOR_ID = 0XFFFFFF, > }; > > +extern int rxe_max_mcast_grp; > +extern int rxe_max_mcast_qp_attach; > +extern int rxe_max_tot_mcast_qp_attach; > + > /* default/initial rxe port parameters */ > enum rxe_port_param { > RXE_PORT_GID_TBL_LEN = 1024, ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-12-08 1:24 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20231205002613.10219-1-rpearsonhpe@gmail.com>
2023-12-05 5:55 ` [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Zhu Yanjun
2023-12-05 10:29 ` Zhu Yanjun
2023-12-07 1:47 ` Rain River
2023-12-07 19:07 ` Bob Pearson
2023-12-08 1:24 ` Greg Sword
[not found] ` <20231205002613.10219-2-rpearsonhpe@gmail.com>
2023-12-05 5:56 ` [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock Zhu Yanjun
[not found] ` <20231205002613.10219-3-rpearsonhpe@gmail.com>
2023-12-05 5:57 ` [PATCH for-next v5 5/7] RDMA/rxe: Split multicast lock Zhu Yanjun
[not found] ` <20231205002613.10219-4-rpearsonhpe@gmail.com>
2023-12-05 5:57 ` [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime Zhu Yanjun
[not found] ` <20231205002613.10219-5-rpearsonhpe@gmail.com>
2023-12-05 5:58 ` [PATCH for-next v5 7/7] RDMA/rxe: Add module parameters for mcast limits Zhu Yanjun
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).