From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Bob Pearson <rpearsonhpe@gmail.com>,
jgg@nvidia.com, linux-rdma@vger.kernel.org, dsahern@kernel.org,
davem@davemloft.net, netdev@vger.kernel.org
Subject: Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address
Date: Tue, 5 Dec 2023 18:29:52 +0800 [thread overview]
Message-ID: <7f5d614e-dc1a-47ce-b573-60ba8c5a21fa@linux.dev> (raw)
In-Reply-To: <763abeeb-64b2-496f-9249-b588d1d47e60@linux.dev>
在 2023/12/5 13:55, Zhu Yanjun 写道:
> Add David S. Miller and David Ahern.
>
> They are the maintainers in netdev and very familiar with mcast.
>
> Zhu Yanjun
>
> 在 2023/12/5 8:26, Bob Pearson 写道:
>> Currently the rdma_rxe driver does not receive mcast packets at all.
>>
>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
>> the IP mcast address. This is required for mcast traffic to reach the
>> rxe driver when coming from an external source.
>>
>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
>> drivers/infiniband/sw/rxe/rxe_net.c | 2 +-
>> drivers/infiniband/sw/rxe/rxe_net.h | 1 +
>> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 +
>> 4 files changed, 102 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
>> b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> index 86cc2e18a7fd..54735d07cee5 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> @@ -19,38 +19,116 @@
>> * mcast packets in the rxe receive path.
>> */
>> +#include <linux/igmp.h>
>> +
>> #include "rxe.h"
>> -/**
>> - * rxe_mcast_add - add multicast address to rxe device
>> - * @rxe: rxe device object
>> - * @mgid: multicast address as a gid
>> - *
>> - * Returns 0 on success else an error
>> - */
>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
>> {
>> + struct in6_addr *addr6 = (struct in6_addr *)mgid;
>> + struct sock *sk = recv_sockets.sk6->sk;
>> unsigned char ll_addr[ETH_ALEN];
>> + int err;
>> +
>> + spin_lock_bh(&sk->sk_lock.slock);
>> + rtnl_lock();
>> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.
Please Jason or experts from netdev comment on this.
Thanks,
Zhu Yanjun
>> + rtnl_unlock();
>> + spin_unlock_bh(&sk->sk_lock.slock);
>> + if (err && err != -EADDRINUSE)
>> + goto err_out;
>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>> + err = dev_mc_add(rxe->ndev, ll_addr);
>> + if (err)
>> + goto err_drop;
>> +
>> + return 0;
>> - return dev_mc_add(rxe->ndev, ll_addr);
>> +err_drop:
>> + spin_lock_bh(&sk->sk_lock.slock);
>> + rtnl_lock();
>> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
>> + rtnl_unlock();
>> + spin_unlock_bh(&sk->sk_lock.slock);
>> +err_out:
>> + return err;
>> }
>> -/**
>> - * rxe_mcast_del - delete multicast address from rxe device
>> - * @rxe: rxe device object
>> - * @mgid: multicast address as a gid
>> - *
>> - * Returns 0 on success else an error
>> - */
>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
>> +static int rxe_mcast_add(struct rxe_mcg *mcg)
>> {
>> + struct rxe_dev *rxe = mcg->rxe;
>> + union ib_gid *mgid = &mcg->mgid;
>> unsigned char ll_addr[ETH_ALEN];
>> + struct ip_mreqn imr = {};
>> + int err;
>> +
>> + if (mcg->is_ipv6)
>> + return rxe_mcast_add6(rxe, mgid);
>> +
>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>> + imr.imr_ifindex = rxe->ndev->ifindex;
>> + rtnl_lock();
>> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
>> + rtnl_unlock();
>> + if (err && err != -EADDRINUSE)
>> + goto err_out;
>> +
>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>> + err = dev_mc_add(rxe->ndev, ll_addr);
>> + if (err)
>> + goto err_leave;
>> +
>> + return 0;
>> +
>> +err_leave:
>> + rtnl_lock();
>> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>> + rtnl_unlock();
>> +err_out:
>> + return err;
>> +}
>> +
>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
>> +{
>> + struct sock *sk = recv_sockets.sk6->sk;
>> + unsigned char ll_addr[ETH_ALEN];
>> + int err, err2;
>> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>> + err = dev_mc_del(rxe->ndev, ll_addr);
>> +
>> + spin_lock_bh(&sk->sk_lock.slock);
>> + rtnl_lock();
>> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
>> + (struct in6_addr *)mgid);
>> + rtnl_unlock();
>> + spin_unlock_bh(&sk->sk_lock.slock);
>> +
>> + return err ?: err2;
>> +}
>> +
>> +static int rxe_mcast_del(struct rxe_mcg *mcg)
>> +{
>> + struct rxe_dev *rxe = mcg->rxe;
>> + union ib_gid *mgid = &mcg->mgid;
>> + unsigned char ll_addr[ETH_ALEN];
>> + struct ip_mreqn imr = {};
>> + int err, err2;
>> +
>> + if (mcg->is_ipv6)
>> + return rxe_mcast_del6(rxe, mgid);
>> +
>> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>> + imr.imr_ifindex = rxe->ndev->ifindex;
>> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>> + err = dev_mc_del(rxe->ndev, ll_addr);
>> +
>> + rtnl_lock();
>> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>> + rtnl_unlock();
>> - return dev_mc_del(rxe->ndev, ll_addr);
>> + return err ?: err2;
>> }
>> /**
>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
>> union ib_gid *mgid,
>> {
>> kref_init(&mcg->ref_cnt);
>> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
>> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
>> INIT_LIST_HEAD(&mcg->qp_list);
>> mcg->rxe = rxe;
>> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
>> rxe_dev *rxe, union ib_gid *mgid)
>> spin_unlock_bh(&rxe->mcg_lock);
>> /* add mcast address outside of lock */
>> - err = rxe_mcast_add(rxe, mgid);
>> + err = rxe_mcast_add(mcg);
>> if (!err)
>> return mcg;
>> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>> static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>> {
>> /* delete mcast address outside of lock */
>> - rxe_mcast_del(mcg->rxe, &mcg->mgid);
>> + rxe_mcast_del(mcg);
>> spin_lock_bh(&mcg->rxe->mcg_lock);
>> __rxe_destroy_mcg(mcg);
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
>> b/drivers/infiniband/sw/rxe/rxe_net.c
>> index 58c3f3759bf0..b481f8da2002 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>> @@ -18,7 +18,7 @@
>> #include "rxe_net.h"
>> #include "rxe_loc.h"
>> -static struct rxe_recv_sockets recv_sockets;
>> +struct rxe_recv_sockets recv_sockets;
>> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>> struct net_device *ndev,
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
>> b/drivers/infiniband/sw/rxe/rxe_net.h
>> index 45d80d00f86b..89cee7d5340f 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
>> struct socket *sk4;
>> struct socket *sk6;
>> };
>> +extern struct rxe_recv_sockets recv_sockets;
>> int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> index ccb9d19ffe8a..7be9e6232dd9 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> @@ -352,6 +352,7 @@ struct rxe_mcg {
>> atomic_t qp_num;
>> u32 qkey;
>> u16 pkey;
>> + bool is_ipv6;
>> };
>> struct rxe_mca {
next prev parent reply other threads:[~2023-12-05 10:30 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20231205002613.10219-1-rpearsonhpe@gmail.com>
2023-12-05 5:55 ` [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Zhu Yanjun
2023-12-05 10:29 ` Zhu Yanjun [this message]
2023-12-07 1:47 ` Rain River
2023-12-07 19:07 ` Bob Pearson
2023-12-08 1:24 ` Greg Sword
[not found] ` <20231205002613.10219-2-rpearsonhpe@gmail.com>
2023-12-05 5:56 ` [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock Zhu Yanjun
[not found] ` <20231205002613.10219-3-rpearsonhpe@gmail.com>
2023-12-05 5:57 ` [PATCH for-next v5 5/7] RDMA/rxe: Split multicast lock Zhu Yanjun
[not found] ` <20231205002613.10219-4-rpearsonhpe@gmail.com>
2023-12-05 5:57 ` [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime Zhu Yanjun
[not found] ` <20231205002613.10219-5-rpearsonhpe@gmail.com>
2023-12-05 5:58 ` [PATCH for-next v5 7/7] RDMA/rxe: Add module parameters for mcast limits Zhu Yanjun
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7f5d614e-dc1a-47ce-b573-60ba8c5a21fa@linux.dev \
--to=yanjun.zhu@linux.dev \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=rpearsonhpe@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).