From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="mULoh7XE" X-Greylist: delayed 324 seconds by postgrey-1.37 at lindbergh.monkeyblade.net; Mon, 04 Dec 2023 21:55:56 PST Received: from out-178.mta1.migadu.com (out-178.mta1.migadu.com [95.215.58.178]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BC579111 for ; Mon, 4 Dec 2023 21:55:56 -0800 (PST) Message-ID: <763abeeb-64b2-496f-9249-b588d1d47e60@linux.dev> DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1701755755; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=2IUa23eY76FUwZSvCojiUW38oAeM2qvFBRI4OIJq4Rc=; b=mULoh7XEuWVsBbcFMOoRranVEhTJWIb3NXpJrYMKQhNU0cQlF7zaGS6+ZxlS+rec47JrFt cAm8/0qXGShigKw38r9PBfoJTxcQIJjt3fRQSJNuSrWsiZcb/jZU/4HZsmp8XCNzQ4bHIa qZQCt0QYMWBK5zrqq9X68FsxzO2tdFg= Date: Tue, 5 Dec 2023 13:55:50 +0800 Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Subject: Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address To: Bob Pearson , jgg@nvidia.com, linux-rdma@vger.kernel.org, dsahern@kernel.org, davem@davemloft.net, netdev@vger.kernel.org References: <20231205002613.10219-1-rpearsonhpe@gmail.com> X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Zhu Yanjun In-Reply-To: <20231205002613.10219-1-rpearsonhpe@gmail.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-Migadu-Flow: FLOW_OUT Add  David S. Miller and  David Ahern. They are the maintainers in netdev and very familiar with mcast. Zhu Yanjun 在 2023/12/5 8:26, Bob Pearson 写道: > Currently the rdma_rxe driver does not receive mcast packets at all. > > Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > the IP mcast address. This is required for mcast traffic to reach the > rxe driver when coming from an external source. > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > Signed-off-by: Bob Pearson > --- > drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > drivers/infiniband/sw/rxe/rxe_net.h | 1 + > drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > 4 files changed, 102 insertions(+), 21 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index 86cc2e18a7fd..54735d07cee5 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -19,38 +19,116 @@ > * mcast packets in the rxe receive path. > */ > > +#include > + > #include "rxe.h" > > -/** > - * rxe_mcast_add - add multicast address to rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > { > + struct in6_addr *addr6 = (struct in6_addr *)mgid; > + struct sock *sk = recv_sockets.sk6->sk; > unsigned char ll_addr[ETH_ALEN]; > + int err; > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + if (err && err != -EADDRINUSE) > + goto err_out; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_drop; > + > + return 0; > > - return dev_mc_add(rxe->ndev, ll_addr); > +err_drop: > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > +err_out: > + return err; > } > > -/** > - * rxe_mcast_del - delete multicast address from rxe device > - * @rxe: rxe device object > - * @mgid: multicast address as a gid > - * > - * Returns 0 on success else an error > - */ > -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > +static int rxe_mcast_add(struct rxe_mcg *mcg) > { > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err; > + > + if (mcg->is_ipv6) > + return rxe_mcast_add6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + rtnl_lock(); > + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > + if (err && err != -EADDRINUSE) > + goto err_out; > + > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_add(rxe->ndev, ll_addr); > + if (err) > + goto err_leave; > + > + return 0; > + > +err_leave: > + rtnl_lock(); > + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > +err_out: > + return err; > +} > + > +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > +{ > + struct sock *sk = recv_sockets.sk6->sk; > + unsigned char ll_addr[ETH_ALEN]; > + int err, err2; > > ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + spin_lock_bh(&sk->sk_lock.slock); > + rtnl_lock(); > + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > + (struct in6_addr *)mgid); > + rtnl_unlock(); > + spin_unlock_bh(&sk->sk_lock.slock); > + > + return err ?: err2; > +} > + > +static int rxe_mcast_del(struct rxe_mcg *mcg) > +{ > + struct rxe_dev *rxe = mcg->rxe; > + union ib_gid *mgid = &mcg->mgid; > + unsigned char ll_addr[ETH_ALEN]; > + struct ip_mreqn imr = {}; > + int err, err2; > + > + if (mcg->is_ipv6) > + return rxe_mcast_del6(rxe, mgid); > + > + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > + imr.imr_ifindex = rxe->ndev->ifindex; > + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > + err = dev_mc_del(rxe->ndev, ll_addr); > + > + rtnl_lock(); > + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > + rtnl_unlock(); > > - return dev_mc_del(rxe->ndev, ll_addr); > + return err ?: err2; > } > > /** > @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > { > kref_init(&mcg->ref_cnt); > memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > INIT_LIST_HEAD(&mcg->qp_list); > mcg->rxe = rxe; > > @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > spin_unlock_bh(&rxe->mcg_lock); > > /* add mcast address outside of lock */ > - err = rxe_mcast_add(rxe, mgid); > + err = rxe_mcast_add(mcg); > if (!err) > return mcg; > > @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > static void rxe_destroy_mcg(struct rxe_mcg *mcg) > { > /* delete mcast address outside of lock */ > - rxe_mcast_del(mcg->rxe, &mcg->mgid); > + rxe_mcast_del(mcg); > > spin_lock_bh(&mcg->rxe->mcg_lock); > __rxe_destroy_mcg(mcg); > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 58c3f3759bf0..b481f8da2002 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -18,7 +18,7 @@ > #include "rxe_net.h" > #include "rxe_loc.h" > > -static struct rxe_recv_sockets recv_sockets; > +struct rxe_recv_sockets recv_sockets; > > static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > struct net_device *ndev, > diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h > index 45d80d00f86b..89cee7d5340f 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.h > +++ b/drivers/infiniband/sw/rxe/rxe_net.h > @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > struct socket *sk4; > struct socket *sk6; > }; > +extern struct rxe_recv_sockets recv_sockets; > > int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index ccb9d19ffe8a..7be9e6232dd9 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -352,6 +352,7 @@ struct rxe_mcg { > atomic_t qp_num; > u32 qkey; > u16 pkey; > + bool is_ipv6; > }; > > struct rxe_mca {