All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: "D. Wythe" <alibuda@linux.alibaba.com>
Cc: mjambigi@linux.ibm.com, wenjia@linux.ibm.com,
	wintera@linux.ibm.com, dust.li@linux.alibaba.com,
	tonylu@linux.alibaba.com, guwen@linux.alibaba.com,
	kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org,
	linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org,
	pabeni@redhat.com, edumazet@google.com, sidraya@linux.ibm.com,
	jaka@linux.ibm.com
Subject: Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
Date: Mon, 27 Oct 2025 15:42:27 +0200	[thread overview]
Message-ID: <20251027134227.GL12554@unreal> (raw)
In-Reply-To: <20251022032309.66386-1-alibuda@linux.alibaba.com>

On Wed, Oct 22, 2025 at 11:23:09AM +0800, D. Wythe wrote:
> The current SMC implementation is IPv4-centric. While it contains a
> workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
> for native IPv6, preventing its use in modern dual-stack or IPv6-only
> networks.
> 
> This patch introduces full, native IPv6 support by refactoring the
> address handling mechanism to be IP-version agnostic, which is
> achieved by:
> 
> - Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
> - Implementing an IPv6-specific route lookup function.
> - Extend GID matching logic for both IPv4 and IPv6 addresses
> 
> With these changes, SMC can now discover RDMA devices and establish
> connections over both native IPv4 and IPv6 networks.

Why can't you use rdma-cm in-kernel API like any other in-kernel RDMA consumers?

Thanks

> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
> v2: Fix build failure with CONFIG_IPV6 disabled
> ---
>  net/smc/af_smc.c   |  48 +++++++++++----
>  net/smc/smc_core.h |  40 ++++++++++++-
>  net/smc/smc_ib.c   | 145 ++++++++++++++++++++++++++++++++++++++-------
>  net/smc/smc_ib.h   |   9 +++
>  net/smc/smc_llc.c  |   6 +-
>  5 files changed, 209 insertions(+), 39 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 77b99e8ef35a..b435c8ba95f5 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1132,12 +1132,15 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
>  
>  	/* check if there is an rdma v2 device available */
>  	ini->check_smcrv2 = true;
> -	ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
> -	if (!(ini->smcr_version & SMC_V2) ||
> +
> +	if (smc->clcsock->sk->sk_family == AF_INET)
> +		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, smc->clcsock->sk->sk_rcv_saddr);
>  #if IS_ENABLED(CONFIG_IPV6)
> -	    (smc->clcsock->sk->sk_family == AF_INET6 &&
> -	     !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
> -#endif
> +	else
> +		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &smc->clcsock->sk->sk_v6_rcv_saddr);
> +#endif /* CONFIG_IPV6 */
> +
> +	if (!(ini->smcr_version & SMC_V2) ||
>  	    !smc_clc_ueid_count() ||
>  	    smc_find_rdma_device(smc, ini))
>  		ini->smcr_version &= ~SMC_V2;
> @@ -1230,11 +1233,27 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
>  		memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
>  		ini->smcrv2.uses_gateway = false;
>  	} else {
> -		if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> -				      smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> -				      ini->smcrv2.nexthop_mac,
> -				      &ini->smcrv2.uses_gateway))
> +		struct smc_ipaddr peer_gid;
> +
> +		smc_ipaddr_from_gid(&peer_gid, aclc->r0.lcl.gid);
> +		if (peer_gid.family == AF_INET) {
> +			/* v4-mapped v6 address should also be treated as v4 address. */
> +			if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> +					      peer_gid.addr,
> +					      ini->smcrv2.nexthop_mac,
> +					      &ini->smcrv2.uses_gateway))
> +				return SMC_CLC_DECL_NOROUTE;
> +		} else {
> +#if IS_ENABLED(CONFIG_IPV6)
> +			if (smc_ib_find_route_v6(net, &smc->clcsock->sk->sk_v6_rcv_saddr,
> +						 &peer_gid.addr_v6,
> +						 ini->smcrv2.nexthop_mac,
> +						 &ini->smcrv2.uses_gateway))
> +				return SMC_CLC_DECL_NOROUTE;
> +#else
>  			return SMC_CLC_DECL_NOROUTE;
> +#endif /* CONFIG_IPV6 */
> +		}
>  		if (!ini->smcrv2.uses_gateway) {
>  			/* mismatch: peer claims indirect, but its direct */
>  			return SMC_CLC_DECL_NOINDIRECT;
> @@ -2307,8 +2326,15 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
>  	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
>  	ini->check_smcrv2 = true;
>  	ini->smcrv2.clc_sk = new_smc->clcsock->sk;
> -	ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
> -	ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
> +
> +	if (new_smc->clcsock->sk->sk_family == AF_INET)
> +		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, new_smc->clcsock->sk->sk_rcv_saddr);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	else
> +		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &new_smc->clcsock->sk->sk_v6_rcv_saddr);
> +#endif /* CONFIG_IPV6 */
> +	smc_ipaddr_from_gid(&ini->smcrv2.daddr, smc_v2_ext->roce);
> +
>  	rc = smc_find_rdma_device(new_smc, ini);
>  	if (rc) {
>  		smc_find_ism_store_rc(rc, ini);
> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> index a5a78cbff341..6b07785f9874 100644
> --- a/net/smc/smc_core.h
> +++ b/net/smc/smc_core.h
> @@ -279,6 +279,14 @@ struct smc_llc_flow {
>  	struct smc_llc_qentry *qentry;
>  };
>  
> +struct smc_ipaddr {
> +	sa_family_t family;
> +	union {
> +		__be32          addr;
> +		struct in6_addr addr_v6;
> +	};
> +};
> +
>  struct smc_link_group {
>  	struct list_head	list;
>  	struct rb_root		conns_all;	/* connection tree */
> @@ -359,7 +367,7 @@ struct smc_link_group {
>  						/* rsn code for termination */
>  			u8			nexthop_mac[ETH_ALEN];
>  			u8			uses_gateway;
> -			__be32			saddr;
> +			struct smc_ipaddr saddr;
>  						/* net namespace */
>  			struct net		*net;
>  			u8			max_conns;
> @@ -389,9 +397,9 @@ struct smc_gidlist {
>  
>  struct smc_init_info_smcrv2 {
>  	/* Input fields */
> -	__be32			saddr;
> +	struct smc_ipaddr saddr;
>  	struct sock		*clc_sk;
> -	__be32			daddr;
> +	struct smc_ipaddr daddr;
>  
>  	/* Output fields when saddr is set */
>  	struct smc_ib_device	*ib_dev_v2;
> @@ -618,4 +626,30 @@ static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
>  {
>  	return link->lgr;
>  }
> +
> +static inline void smc_ipaddr_from_v4addr(struct smc_ipaddr *ipaddr, __be32 v4_addr)
> +{
> +	ipaddr->family = AF_INET;
> +	ipaddr->addr = v4_addr;
> +}
> +
> +static inline void smc_ipaddr_from_v6addr(struct smc_ipaddr *ipaddr, const struct in6_addr *v6_addr)
> +{
> +	ipaddr->family = AF_INET6;
> +	ipaddr->addr_v6 = *v6_addr;
> +}
> +
> +static inline void smc_ipaddr_from_gid(struct smc_ipaddr *ipaddr, u8 gid[SMC_GID_SIZE])
> +{
> +	__be32 gid_v4 = smc_ib_gid_to_ipv4(gid);
> +
> +	if (gid_v4 != cpu_to_be32(INADDR_NONE)) {
> +		ipaddr->family = AF_INET;
> +		ipaddr->addr = gid_v4;
> +	} else {
> +		ipaddr->family = AF_INET6;
> +		ipaddr->addr_v6 = *smc_ib_gid_to_ipv6(gid);
> +	}
> +}
> +
>  #endif
> diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
> index 0052f02756eb..9c4c53be7bfc 100644
> --- a/net/smc/smc_ib.c
> +++ b/net/smc/smc_ib.c
> @@ -22,6 +22,7 @@
>  #include <linux/inetdevice.h>
>  #include <rdma/ib_verbs.h>
>  #include <rdma/ib_cache.h>
> +#include <net/ip6_route.h>
>  
>  #include "smc_pnet.h"
>  #include "smc_ib.h"
> @@ -225,48 +226,148 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>  	return -ENOENT;
>  }
>  
> -static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> +#if IS_ENABLED(CONFIG_IPV6)
> +int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
> +			 struct in6_addr *daddr, u8 nexthop_mac[],
> +			 u8 *uses_gateway)
> +{
> +	struct dst_entry *dst;
> +	struct rt6_info *rt;
> +	struct neighbour *neigh;
> +	struct in6_addr *nexthop_addr;
> +	int rc = -ENOENT;
> +
> +	struct flowi6 fl6 = {
> +		.daddr = *daddr,
> +		.saddr = *saddr,
> +	};
> +
> +	if (ipv6_addr_any(daddr))
> +		return -EINVAL;
> +
> +	dst = ip6_route_output(net, NULL, &fl6);
> +	if (!dst || dst->error) {
> +		rc = dst ? dst->error : -EINVAL;
> +		goto out;
> +	}
> +	rt = (struct rt6_info *)dst;
> +
> +	if (ipv6_addr_type(&rt->rt6i_gateway) != IPV6_ADDR_ANY) {
> +		*uses_gateway = 1;
> +		nexthop_addr = &rt->rt6i_gateway;
> +	} else {
> +		*uses_gateway = 0;
> +		nexthop_addr = daddr;
> +	}
> +
> +	neigh = dst_neigh_lookup(dst, nexthop_addr);
> +	if (!neigh)
> +		goto out;
> +
> +	read_lock_bh(&neigh->lock);
> +	if (neigh->nud_state & NUD_VALID) {
> +		memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
> +		rc = 0;
> +	}
> +	read_unlock_bh(&neigh->lock);
> +
> +	neigh_release(neigh);
> +out:
> +	dst_release(dst);
> +	return rc;
> +}
> +#endif /* CONFIG_IPV6 */
> +
> +static bool smc_ib_match_gid_rocev2(const struct net_device *ndev,
>  				    const struct ib_gid_attr *attr,
> -				    u8 gid[], u8 *sgid_index,
>  				    struct smc_init_info_smcrv2 *smcrv2)
>  {
> -	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
> -		if (gid)
> -			memcpy(gid, &attr->gid, SMC_GID_SIZE);
> -		if (sgid_index)
> -			*sgid_index = attr->index;
> -		return 0;
> -	}
> -	if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
> -	    smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> +	struct net *net = dev_net(ndev);
> +	bool subnet_match = false;
> +
> +	if (smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
>  		struct in_device *in_dev = __in_dev_get_rcu(ndev);
> -		struct net *net = dev_net(ndev);
>  		const struct in_ifaddr *ifa;
> -		bool subnet_match = false;
>  
>  		if (!in_dev)
> -			goto out;
> +			return false;
> +
> +		if (smcrv2->saddr.family != AF_INET)
> +			return false;
> +
>  		in_dev_for_each_ifa_rcu(ifa, in_dev) {
> -			if (!inet_ifa_match(smcrv2->saddr, ifa))
> +			if (!inet_ifa_match(smcrv2->saddr.addr, ifa))
>  				continue;
>  			subnet_match = true;
>  			break;
>  		}
> +
>  		if (!subnet_match)
> -			goto out;
> -		if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
> -						       smcrv2->daddr,
> -						       smcrv2->nexthop_mac,
> -						       &smcrv2->uses_gateway))
> -			goto out;
> +			return false;
> +
> +		if (smcrv2->daddr.addr &&
> +		    smc_ib_find_route(net, smcrv2->saddr.addr,
> +				      smcrv2->daddr.addr,
> +				      smcrv2->nexthop_mac,
> +				      &smcrv2->uses_gateway))
> +			return false;
> +#if IS_ENABLED(CONFIG_IPV6)
> +	} else if (!(ipv6_addr_type(smc_ib_gid_to_ipv6((u8 *)&attr->gid)) & IPV6_ADDR_LINKLOCAL)) {
> +		struct inet6_dev *in6_dev = __in6_dev_get(ndev);
> +		const struct inet6_ifaddr *if6;
> +
> +		if (!in6_dev)
> +			return false;
> +
> +		if (smcrv2->saddr.family != AF_INET6)
> +			return false;
> +
> +		list_for_each_entry_rcu(if6, &in6_dev->addr_list, if_list) {
> +			if (ipv6_addr_type(&if6->addr) & IPV6_ADDR_LINKLOCAL)
> +				continue;
> +			if (!ipv6_prefix_equal(&if6->addr, &smcrv2->saddr.addr_v6, if6->prefix_len))
> +				continue;
> +			subnet_match = true;
> +			break;
> +		}
>  
> +		if (!subnet_match)
> +			return false;
> +
> +		if ((ipv6_addr_type(&smcrv2->daddr.addr_v6) != IPV6_ADDR_ANY) &&
> +		    smc_ib_find_route_v6(net, &smcrv2->saddr.addr_v6,
> +					 &smcrv2->daddr.addr_v6,
> +					 smcrv2->nexthop_mac,
> +					 &smcrv2->uses_gateway))
> +			return false;
> +#endif /* CONFIG_IPV6 */
> +	} else {
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> +				    const struct ib_gid_attr *attr,
> +				    u8 gid[], u8 *sgid_index,
> +				    struct smc_init_info_smcrv2 *smcrv2)
> +{
> +	bool gid_match = false;
> +
> +	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE)
> +		gid_match = true;
> +	else if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
> +		gid_match = smc_ib_match_gid_rocev2(ndev, attr, smcrv2);
> +
> +	if (gid_match) {
>  		if (gid)
>  			memcpy(gid, &attr->gid, SMC_GID_SIZE);
>  		if (sgid_index)
>  			*sgid_index = attr->index;
>  		return 0;
>  	}
> -out:
> +
>  	return -ENODEV;
>  }
>  
> diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
> index ef8ac2b7546d..7cbeb7350478 100644
> --- a/net/smc/smc_ib.h
> +++ b/net/smc/smc_ib.h
> @@ -69,6 +69,12 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
>  	return cpu_to_be32(INADDR_NONE);
>  }
>  
> +static inline struct in6_addr *smc_ib_gid_to_ipv6(u8 gid[SMC_GID_SIZE])
> +{
> +	struct in6_addr *addr6 = (struct in6_addr *)gid;
> +	return addr6;
> +}
> +
>  static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev)
>  {
>  	if (smcibdev && smcibdev->ibdev)
> @@ -114,6 +120,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
>  			 struct smc_init_info_smcrv2 *smcrv2);
>  int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>  		      u8 nexthop_mac[], u8 *uses_gateway);
> +int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
> +			 struct in6_addr *daddr, u8 nexthop_mac[],
> +			 u8 *uses_gateway);
>  bool smc_ib_is_valid_local_systemid(void);
>  int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
>  #endif
> diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
> index f865c58c3aa7..f2a02611ab25 100644
> --- a/net/smc/smc_llc.c
> +++ b/net/smc/smc_llc.c
> @@ -1055,8 +1055,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
>  	if (lgr->smc_version == SMC_V2) {
>  		ini->check_smcrv2 = true;
>  		ini->smcrv2.saddr = lgr->saddr;
> -		ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
> +		smc_ipaddr_from_gid(&ini->smcrv2.daddr, llc->sender_gid);
>  	}
> +
>  	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
>  	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
>  	    (lgr->smc_version == SMC_V2 ||
> @@ -1438,8 +1439,7 @@ int smc_llc_srv_add_link(struct smc_link *link,
>  		if (send_req_add_link_resp) {
>  			struct smc_llc_msg_req_add_link_v2 *req_add =
>  				&req_qentry->msg.req_add_link;
> -
> -			ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
> +			smc_ipaddr_from_gid(&ini->smcrv2.daddr, req_add->gid[0]);
>  		}
>  	}
>  	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
> -- 
> 2.45.0
> 
> 

  parent reply	other threads:[~2025-10-27 13:42 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-22  3:23 [PATCH net-next v2] net/smc: add full IPv6 support for SMC D. Wythe
2025-10-23  1:34 ` Dust Li
2025-10-28 10:18   ` D. Wythe
2025-10-27 13:42 ` Leon Romanovsky [this message]
2025-10-28  9:54   ` D. Wythe
2025-10-28 12:31     ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027134227.GL12554@unreal \
    --to=leon@kernel.org \
    --cc=alibuda@linux.alibaba.com \
    --cc=davem@davemloft.net \
    --cc=dust.li@linux.alibaba.com \
    --cc=edumazet@google.com \
    --cc=guwen@linux.alibaba.com \
    --cc=jaka@linux.ibm.com \
    --cc=kuba@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mjambigi@linux.ibm.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sidraya@linux.ibm.com \
    --cc=tonylu@linux.alibaba.com \
    --cc=wenjia@linux.ibm.com \
    --cc=wintera@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.