linux-s390.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2] net/smc: add full IPv6 support for SMC
@ 2025-10-22  3:23 D. Wythe
  2025-10-23  1:34 ` Dust Li
  2025-10-27 13:42 ` Leon Romanovsky
  0 siblings, 2 replies; 6+ messages in thread
From: D. Wythe @ 2025-10-22  3:23 UTC (permalink / raw)
  To: mjambigi, wenjia, wintera, dust.li, tonylu, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, pabeni, edumazet,
	sidraya, jaka

The current SMC implementation is IPv4-centric. While it contains a
workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
for native IPv6, preventing its use in modern dual-stack or IPv6-only
networks.

This patch introduces full, native IPv6 support by refactoring the
address handling mechanism to be IP-version agnostic, which is
achieved by:

- Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
- Implementing an IPv6-specific route lookup function.
- Extend GID matching logic for both IPv4 and IPv6 addresses

With these changes, SMC can now discover RDMA devices and establish
connections over both native IPv4 and IPv6 networks.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
v2: Fix build failure with CONFIG_IPV6 disabled
---
 net/smc/af_smc.c   |  48 +++++++++++----
 net/smc/smc_core.h |  40 ++++++++++++-
 net/smc/smc_ib.c   | 145 ++++++++++++++++++++++++++++++++++++++-------
 net/smc/smc_ib.h   |   9 +++
 net/smc/smc_llc.c  |   6 +-
 5 files changed, 209 insertions(+), 39 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 77b99e8ef35a..b435c8ba95f5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1132,12 +1132,15 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
 
 	/* check if there is an rdma v2 device available */
 	ini->check_smcrv2 = true;
-	ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
-	if (!(ini->smcr_version & SMC_V2) ||
+
+	if (smc->clcsock->sk->sk_family == AF_INET)
+		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, smc->clcsock->sk->sk_rcv_saddr);
 #if IS_ENABLED(CONFIG_IPV6)
-	    (smc->clcsock->sk->sk_family == AF_INET6 &&
-	     !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
-#endif
+	else
+		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &smc->clcsock->sk->sk_v6_rcv_saddr);
+#endif /* CONFIG_IPV6 */
+
+	if (!(ini->smcr_version & SMC_V2) ||
 	    !smc_clc_ueid_count() ||
 	    smc_find_rdma_device(smc, ini))
 		ini->smcr_version &= ~SMC_V2;
@@ -1230,11 +1233,27 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
 		memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
 		ini->smcrv2.uses_gateway = false;
 	} else {
-		if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
-				      smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
-				      ini->smcrv2.nexthop_mac,
-				      &ini->smcrv2.uses_gateway))
+		struct smc_ipaddr peer_gid;
+
+		smc_ipaddr_from_gid(&peer_gid, aclc->r0.lcl.gid);
+		if (peer_gid.family == AF_INET) {
+			/* v4-mapped v6 address should also be treated as v4 address. */
+			if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
+					      peer_gid.addr,
+					      ini->smcrv2.nexthop_mac,
+					      &ini->smcrv2.uses_gateway))
+				return SMC_CLC_DECL_NOROUTE;
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			if (smc_ib_find_route_v6(net, &smc->clcsock->sk->sk_v6_rcv_saddr,
+						 &peer_gid.addr_v6,
+						 ini->smcrv2.nexthop_mac,
+						 &ini->smcrv2.uses_gateway))
+				return SMC_CLC_DECL_NOROUTE;
+#else
 			return SMC_CLC_DECL_NOROUTE;
+#endif /* CONFIG_IPV6 */
+		}
 		if (!ini->smcrv2.uses_gateway) {
 			/* mismatch: peer claims indirect, but its direct */
 			return SMC_CLC_DECL_NOINDIRECT;
@@ -2307,8 +2326,15 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
 	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
 	ini->check_smcrv2 = true;
 	ini->smcrv2.clc_sk = new_smc->clcsock->sk;
-	ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
-	ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
+
+	if (new_smc->clcsock->sk->sk_family == AF_INET)
+		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, new_smc->clcsock->sk->sk_rcv_saddr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &new_smc->clcsock->sk->sk_v6_rcv_saddr);
+#endif /* CONFIG_IPV6 */
+	smc_ipaddr_from_gid(&ini->smcrv2.daddr, smc_v2_ext->roce);
+
 	rc = smc_find_rdma_device(new_smc, ini);
 	if (rc) {
 		smc_find_ism_store_rc(rc, ini);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index a5a78cbff341..6b07785f9874 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -279,6 +279,14 @@ struct smc_llc_flow {
 	struct smc_llc_qentry *qentry;
 };
 
+struct smc_ipaddr {
+	sa_family_t family;
+	union {
+		__be32          addr;
+		struct in6_addr addr_v6;
+	};
+};
+
 struct smc_link_group {
 	struct list_head	list;
 	struct rb_root		conns_all;	/* connection tree */
@@ -359,7 +367,7 @@ struct smc_link_group {
 						/* rsn code for termination */
 			u8			nexthop_mac[ETH_ALEN];
 			u8			uses_gateway;
-			__be32			saddr;
+			struct smc_ipaddr saddr;
 						/* net namespace */
 			struct net		*net;
 			u8			max_conns;
@@ -389,9 +397,9 @@ struct smc_gidlist {
 
 struct smc_init_info_smcrv2 {
 	/* Input fields */
-	__be32			saddr;
+	struct smc_ipaddr saddr;
 	struct sock		*clc_sk;
-	__be32			daddr;
+	struct smc_ipaddr daddr;
 
 	/* Output fields when saddr is set */
 	struct smc_ib_device	*ib_dev_v2;
@@ -618,4 +626,30 @@ static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
 	return link->lgr;
 }
+
+static inline void smc_ipaddr_from_v4addr(struct smc_ipaddr *ipaddr, __be32 v4_addr)
+{
+	ipaddr->family = AF_INET;
+	ipaddr->addr = v4_addr;
+}
+
+static inline void smc_ipaddr_from_v6addr(struct smc_ipaddr *ipaddr, const struct in6_addr *v6_addr)
+{
+	ipaddr->family = AF_INET6;
+	ipaddr->addr_v6 = *v6_addr;
+}
+
+static inline void smc_ipaddr_from_gid(struct smc_ipaddr *ipaddr, u8 gid[SMC_GID_SIZE])
+{
+	__be32 gid_v4 = smc_ib_gid_to_ipv4(gid);
+
+	if (gid_v4 != cpu_to_be32(INADDR_NONE)) {
+		ipaddr->family = AF_INET;
+		ipaddr->addr = gid_v4;
+	} else {
+		ipaddr->family = AF_INET6;
+		ipaddr->addr_v6 = *smc_ib_gid_to_ipv6(gid);
+	}
+}
+
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 0052f02756eb..9c4c53be7bfc 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -22,6 +22,7 @@
 #include <linux/inetdevice.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
+#include <net/ip6_route.h>
 
 #include "smc_pnet.h"
 #include "smc_ib.h"
@@ -225,48 +226,148 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
 	return -ENOENT;
 }
 
-static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+#if IS_ENABLED(CONFIG_IPV6)
+int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
+			 struct in6_addr *daddr, u8 nexthop_mac[],
+			 u8 *uses_gateway)
+{
+	struct dst_entry *dst;
+	struct rt6_info *rt;
+	struct neighbour *neigh;
+	struct in6_addr *nexthop_addr;
+	int rc = -ENOENT;
+
+	struct flowi6 fl6 = {
+		.daddr = *daddr,
+		.saddr = *saddr,
+	};
+
+	if (ipv6_addr_any(daddr))
+		return -EINVAL;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst || dst->error) {
+		rc = dst ? dst->error : -EINVAL;
+		goto out;
+	}
+	rt = (struct rt6_info *)dst;
+
+	if (ipv6_addr_type(&rt->rt6i_gateway) != IPV6_ADDR_ANY) {
+		*uses_gateway = 1;
+		nexthop_addr = &rt->rt6i_gateway;
+	} else {
+		*uses_gateway = 0;
+		nexthop_addr = daddr;
+	}
+
+	neigh = dst_neigh_lookup(dst, nexthop_addr);
+	if (!neigh)
+		goto out;
+
+	read_lock_bh(&neigh->lock);
+	if (neigh->nud_state & NUD_VALID) {
+		memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+		rc = 0;
+	}
+	read_unlock_bh(&neigh->lock);
+
+	neigh_release(neigh);
+out:
+	dst_release(dst);
+	return rc;
+}
+#endif /* CONFIG_IPV6 */
+
+static bool smc_ib_match_gid_rocev2(const struct net_device *ndev,
 				    const struct ib_gid_attr *attr,
-				    u8 gid[], u8 *sgid_index,
 				    struct smc_init_info_smcrv2 *smcrv2)
 {
-	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
-		if (gid)
-			memcpy(gid, &attr->gid, SMC_GID_SIZE);
-		if (sgid_index)
-			*sgid_index = attr->index;
-		return 0;
-	}
-	if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
-	    smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
+	struct net *net = dev_net(ndev);
+	bool subnet_match = false;
+
+	if (smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
 		struct in_device *in_dev = __in_dev_get_rcu(ndev);
-		struct net *net = dev_net(ndev);
 		const struct in_ifaddr *ifa;
-		bool subnet_match = false;
 
 		if (!in_dev)
-			goto out;
+			return false;
+
+		if (smcrv2->saddr.family != AF_INET)
+			return false;
+
 		in_dev_for_each_ifa_rcu(ifa, in_dev) {
-			if (!inet_ifa_match(smcrv2->saddr, ifa))
+			if (!inet_ifa_match(smcrv2->saddr.addr, ifa))
 				continue;
 			subnet_match = true;
 			break;
 		}
+
 		if (!subnet_match)
-			goto out;
-		if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
-						       smcrv2->daddr,
-						       smcrv2->nexthop_mac,
-						       &smcrv2->uses_gateway))
-			goto out;
+			return false;
+
+		if (smcrv2->daddr.addr &&
+		    smc_ib_find_route(net, smcrv2->saddr.addr,
+				      smcrv2->daddr.addr,
+				      smcrv2->nexthop_mac,
+				      &smcrv2->uses_gateway))
+			return false;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (!(ipv6_addr_type(smc_ib_gid_to_ipv6((u8 *)&attr->gid)) & IPV6_ADDR_LINKLOCAL)) {
+		struct inet6_dev *in6_dev = __in6_dev_get(ndev);
+		const struct inet6_ifaddr *if6;
+
+		if (!in6_dev)
+			return false;
+
+		if (smcrv2->saddr.family != AF_INET6)
+			return false;
+
+		list_for_each_entry_rcu(if6, &in6_dev->addr_list, if_list) {
+			if (ipv6_addr_type(&if6->addr) & IPV6_ADDR_LINKLOCAL)
+				continue;
+			if (!ipv6_prefix_equal(&if6->addr, &smcrv2->saddr.addr_v6, if6->prefix_len))
+				continue;
+			subnet_match = true;
+			break;
+		}
 
+		if (!subnet_match)
+			return false;
+
+		if ((ipv6_addr_type(&smcrv2->daddr.addr_v6) != IPV6_ADDR_ANY) &&
+		    smc_ib_find_route_v6(net, &smcrv2->saddr.addr_v6,
+					 &smcrv2->daddr.addr_v6,
+					 smcrv2->nexthop_mac,
+					 &smcrv2->uses_gateway))
+			return false;
+#endif /* CONFIG_IPV6 */
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
+				    const struct ib_gid_attr *attr,
+				    u8 gid[], u8 *sgid_index,
+				    struct smc_init_info_smcrv2 *smcrv2)
+{
+	bool gid_match = false;
+
+	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE)
+		gid_match = true;
+	else if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		gid_match = smc_ib_match_gid_rocev2(ndev, attr, smcrv2);
+
+	if (gid_match) {
 		if (gid)
 			memcpy(gid, &attr->gid, SMC_GID_SIZE);
 		if (sgid_index)
 			*sgid_index = attr->index;
 		return 0;
 	}
-out:
+
 	return -ENODEV;
 }
 
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index ef8ac2b7546d..7cbeb7350478 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -69,6 +69,12 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
 	return cpu_to_be32(INADDR_NONE);
 }
 
+static inline struct in6_addr *smc_ib_gid_to_ipv6(u8 gid[SMC_GID_SIZE])
+{
+	struct in6_addr *addr6 = (struct in6_addr *)gid;
+	return addr6;
+}
+
 static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev)
 {
 	if (smcibdev && smcibdev->ibdev)
@@ -114,6 +120,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
 			 struct smc_init_info_smcrv2 *smcrv2);
 int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
 		      u8 nexthop_mac[], u8 *uses_gateway);
+int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
+			 struct in6_addr *daddr, u8 nexthop_mac[],
+			 u8 *uses_gateway);
 bool smc_ib_is_valid_local_systemid(void);
 int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
 #endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index f865c58c3aa7..f2a02611ab25 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1055,8 +1055,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
 	if (lgr->smc_version == SMC_V2) {
 		ini->check_smcrv2 = true;
 		ini->smcrv2.saddr = lgr->saddr;
-		ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
+		smc_ipaddr_from_gid(&ini->smcrv2.daddr, llc->sender_gid);
 	}
+
 	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
 	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
 	    (lgr->smc_version == SMC_V2 ||
@@ -1438,8 +1439,7 @@ int smc_llc_srv_add_link(struct smc_link *link,
 		if (send_req_add_link_resp) {
 			struct smc_llc_msg_req_add_link_v2 *req_add =
 				&req_qentry->msg.req_add_link;
-
-			ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
+			smc_ipaddr_from_gid(&ini->smcrv2.daddr, req_add->gid[0]);
 		}
 	}
 	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
  2025-10-22  3:23 [PATCH net-next v2] net/smc: add full IPv6 support for SMC D. Wythe
@ 2025-10-23  1:34 ` Dust Li
  2025-10-28 10:18   ` D. Wythe
  2025-10-27 13:42 ` Leon Romanovsky
  1 sibling, 1 reply; 6+ messages in thread
From: Dust Li @ 2025-10-23  1:34 UTC (permalink / raw)
  To: D. Wythe, mjambigi, wenjia, wintera, tonylu, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, pabeni, edumazet,
	sidraya, jaka

On 2025-10-22 11:23:09, D. Wythe wrote:
>The current SMC implementation is IPv4-centric. While it contains a
>workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
>for native IPv6, preventing its use in modern dual-stack or IPv6-only
>networks.
>
>This patch introduces full, native IPv6 support by refactoring the
>address handling mechanism to be IP-version agnostic, which is
>achieved by:
>
>- Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
>- Implementing an IPv6-specific route lookup function.
>- Extend GID matching logic for both IPv4 and IPv6 addresses
>
>With these changes, SMC can now discover RDMA devices and establish
>connections over both native IPv4 and IPv6 networks.

Tested it with link local ipv6 address, it still doesn't work as
expected, while TCP works fine.

#smc_run ./sockperf tp --tcp -i fe80::c679:7b0:4a4b:d5cc%eth2
sockperf: == version #3.10-23.gited92afb185e6.dirty ==
sockperf: ERROR: Can`t connect socket (errno=104 Connection reset by peer)

Best regards,
Dust

>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
>v2: Fix build failure with CONFIG_IPV6 disabled
>---
> net/smc/af_smc.c   |  48 +++++++++++----
> net/smc/smc_core.h |  40 ++++++++++++-
> net/smc/smc_ib.c   | 145 ++++++++++++++++++++++++++++++++++++++-------
> net/smc/smc_ib.h   |   9 +++
> net/smc/smc_llc.c  |   6 +-
> 5 files changed, 209 insertions(+), 39 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 77b99e8ef35a..b435c8ba95f5 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -1132,12 +1132,15 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
> 
> 	/* check if there is an rdma v2 device available */
> 	ini->check_smcrv2 = true;
>-	ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
>-	if (!(ini->smcr_version & SMC_V2) ||
>+
>+	if (smc->clcsock->sk->sk_family == AF_INET)
>+		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, smc->clcsock->sk->sk_rcv_saddr);
> #if IS_ENABLED(CONFIG_IPV6)
>-	    (smc->clcsock->sk->sk_family == AF_INET6 &&
>-	     !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
>-#endif
>+	else
>+		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &smc->clcsock->sk->sk_v6_rcv_saddr);
>+#endif /* CONFIG_IPV6 */
>+
>+	if (!(ini->smcr_version & SMC_V2) ||
> 	    !smc_clc_ueid_count() ||
> 	    smc_find_rdma_device(smc, ini))
> 		ini->smcr_version &= ~SMC_V2;
>@@ -1230,11 +1233,27 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
> 		memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
> 		ini->smcrv2.uses_gateway = false;
> 	} else {
>-		if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
>-				      smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
>-				      ini->smcrv2.nexthop_mac,
>-				      &ini->smcrv2.uses_gateway))
>+		struct smc_ipaddr peer_gid;
>+
>+		smc_ipaddr_from_gid(&peer_gid, aclc->r0.lcl.gid);
>+		if (peer_gid.family == AF_INET) {
>+			/* v4-mapped v6 address should also be treated as v4 address. */
>+			if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
>+					      peer_gid.addr,
>+					      ini->smcrv2.nexthop_mac,
>+					      &ini->smcrv2.uses_gateway))
>+				return SMC_CLC_DECL_NOROUTE;
>+		} else {
>+#if IS_ENABLED(CONFIG_IPV6)
>+			if (smc_ib_find_route_v6(net, &smc->clcsock->sk->sk_v6_rcv_saddr,
>+						 &peer_gid.addr_v6,
>+						 ini->smcrv2.nexthop_mac,
>+						 &ini->smcrv2.uses_gateway))
>+				return SMC_CLC_DECL_NOROUTE;
>+#else
> 			return SMC_CLC_DECL_NOROUTE;
>+#endif /* CONFIG_IPV6 */
>+		}
> 		if (!ini->smcrv2.uses_gateway) {
> 			/* mismatch: peer claims indirect, but its direct */
> 			return SMC_CLC_DECL_NOINDIRECT;
>@@ -2307,8 +2326,15 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
> 	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
> 	ini->check_smcrv2 = true;
> 	ini->smcrv2.clc_sk = new_smc->clcsock->sk;
>-	ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
>-	ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
>+
>+	if (new_smc->clcsock->sk->sk_family == AF_INET)
>+		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, new_smc->clcsock->sk->sk_rcv_saddr);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	else
>+		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &new_smc->clcsock->sk->sk_v6_rcv_saddr);
>+#endif /* CONFIG_IPV6 */
>+	smc_ipaddr_from_gid(&ini->smcrv2.daddr, smc_v2_ext->roce);
>+
> 	rc = smc_find_rdma_device(new_smc, ini);
> 	if (rc) {
> 		smc_find_ism_store_rc(rc, ini);
>diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>index a5a78cbff341..6b07785f9874 100644
>--- a/net/smc/smc_core.h
>+++ b/net/smc/smc_core.h
>@@ -279,6 +279,14 @@ struct smc_llc_flow {
> 	struct smc_llc_qentry *qentry;
> };
> 
>+struct smc_ipaddr {
>+	sa_family_t family;
>+	union {
>+		__be32          addr;
>+		struct in6_addr addr_v6;
>+	};
>+};
>+
> struct smc_link_group {
> 	struct list_head	list;
> 	struct rb_root		conns_all;	/* connection tree */
>@@ -359,7 +367,7 @@ struct smc_link_group {
> 						/* rsn code for termination */
> 			u8			nexthop_mac[ETH_ALEN];
> 			u8			uses_gateway;
>-			__be32			saddr;
>+			struct smc_ipaddr saddr;
> 						/* net namespace */
> 			struct net		*net;
> 			u8			max_conns;
>@@ -389,9 +397,9 @@ struct smc_gidlist {
> 
> struct smc_init_info_smcrv2 {
> 	/* Input fields */
>-	__be32			saddr;
>+	struct smc_ipaddr saddr;
> 	struct sock		*clc_sk;
>-	__be32			daddr;
>+	struct smc_ipaddr daddr;
> 
> 	/* Output fields when saddr is set */
> 	struct smc_ib_device	*ib_dev_v2;
>@@ -618,4 +626,30 @@ static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
> {
> 	return link->lgr;
> }
>+
>+static inline void smc_ipaddr_from_v4addr(struct smc_ipaddr *ipaddr, __be32 v4_addr)
>+{
>+	ipaddr->family = AF_INET;
>+	ipaddr->addr = v4_addr;
>+}
>+
>+static inline void smc_ipaddr_from_v6addr(struct smc_ipaddr *ipaddr, const struct in6_addr *v6_addr)
>+{
>+	ipaddr->family = AF_INET6;
>+	ipaddr->addr_v6 = *v6_addr;
>+}
>+
>+static inline void smc_ipaddr_from_gid(struct smc_ipaddr *ipaddr, u8 gid[SMC_GID_SIZE])
>+{
>+	__be32 gid_v4 = smc_ib_gid_to_ipv4(gid);
>+
>+	if (gid_v4 != cpu_to_be32(INADDR_NONE)) {
>+		ipaddr->family = AF_INET;
>+		ipaddr->addr = gid_v4;
>+	} else {
>+		ipaddr->family = AF_INET6;
>+		ipaddr->addr_v6 = *smc_ib_gid_to_ipv6(gid);
>+	}
>+}
>+
> #endif
>diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
>index 0052f02756eb..9c4c53be7bfc 100644
>--- a/net/smc/smc_ib.c
>+++ b/net/smc/smc_ib.c
>@@ -22,6 +22,7 @@
> #include <linux/inetdevice.h>
> #include <rdma/ib_verbs.h>
> #include <rdma/ib_cache.h>
>+#include <net/ip6_route.h>
> 
> #include "smc_pnet.h"
> #include "smc_ib.h"
>@@ -225,48 +226,148 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> 	return -ENOENT;
> }
> 
>-static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
>+#if IS_ENABLED(CONFIG_IPV6)
>+int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
>+			 struct in6_addr *daddr, u8 nexthop_mac[],
>+			 u8 *uses_gateway)
>+{
>+	struct dst_entry *dst;
>+	struct rt6_info *rt;
>+	struct neighbour *neigh;
>+	struct in6_addr *nexthop_addr;
>+	int rc = -ENOENT;
>+
>+	struct flowi6 fl6 = {
>+		.daddr = *daddr,
>+		.saddr = *saddr,
>+	};
>+
>+	if (ipv6_addr_any(daddr))
>+		return -EINVAL;
>+
>+	dst = ip6_route_output(net, NULL, &fl6);
>+	if (!dst || dst->error) {
>+		rc = dst ? dst->error : -EINVAL;
>+		goto out;
>+	}
>+	rt = (struct rt6_info *)dst;
>+
>+	if (ipv6_addr_type(&rt->rt6i_gateway) != IPV6_ADDR_ANY) {
>+		*uses_gateway = 1;
>+		nexthop_addr = &rt->rt6i_gateway;
>+	} else {
>+		*uses_gateway = 0;
>+		nexthop_addr = daddr;
>+	}
>+
>+	neigh = dst_neigh_lookup(dst, nexthop_addr);
>+	if (!neigh)
>+		goto out;
>+
>+	read_lock_bh(&neigh->lock);
>+	if (neigh->nud_state & NUD_VALID) {
>+		memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
>+		rc = 0;
>+	}
>+	read_unlock_bh(&neigh->lock);
>+
>+	neigh_release(neigh);
>+out:
>+	dst_release(dst);
>+	return rc;
>+}
>+#endif /* CONFIG_IPV6 */
>+
>+static bool smc_ib_match_gid_rocev2(const struct net_device *ndev,
> 				    const struct ib_gid_attr *attr,
>-				    u8 gid[], u8 *sgid_index,
> 				    struct smc_init_info_smcrv2 *smcrv2)
> {
>-	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
>-		if (gid)
>-			memcpy(gid, &attr->gid, SMC_GID_SIZE);
>-		if (sgid_index)
>-			*sgid_index = attr->index;
>-		return 0;
>-	}
>-	if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
>-	    smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
>+	struct net *net = dev_net(ndev);
>+	bool subnet_match = false;
>+
>+	if (smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> 		struct in_device *in_dev = __in_dev_get_rcu(ndev);
>-		struct net *net = dev_net(ndev);
> 		const struct in_ifaddr *ifa;
>-		bool subnet_match = false;
> 
> 		if (!in_dev)
>-			goto out;
>+			return false;
>+
>+		if (smcrv2->saddr.family != AF_INET)
>+			return false;
>+
> 		in_dev_for_each_ifa_rcu(ifa, in_dev) {
>-			if (!inet_ifa_match(smcrv2->saddr, ifa))
>+			if (!inet_ifa_match(smcrv2->saddr.addr, ifa))
> 				continue;
> 			subnet_match = true;
> 			break;
> 		}
>+
> 		if (!subnet_match)
>-			goto out;
>-		if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
>-						       smcrv2->daddr,
>-						       smcrv2->nexthop_mac,
>-						       &smcrv2->uses_gateway))
>-			goto out;
>+			return false;
>+
>+		if (smcrv2->daddr.addr &&
>+		    smc_ib_find_route(net, smcrv2->saddr.addr,
>+				      smcrv2->daddr.addr,
>+				      smcrv2->nexthop_mac,
>+				      &smcrv2->uses_gateway))
>+			return false;
>+#if IS_ENABLED(CONFIG_IPV6)
>+	} else if (!(ipv6_addr_type(smc_ib_gid_to_ipv6((u8 *)&attr->gid)) & IPV6_ADDR_LINKLOCAL)) {
>+		struct inet6_dev *in6_dev = __in6_dev_get(ndev);
>+		const struct inet6_ifaddr *if6;
>+
>+		if (!in6_dev)
>+			return false;
>+
>+		if (smcrv2->saddr.family != AF_INET6)
>+			return false;
>+
>+		list_for_each_entry_rcu(if6, &in6_dev->addr_list, if_list) {
>+			if (ipv6_addr_type(&if6->addr) & IPV6_ADDR_LINKLOCAL)
>+				continue;
>+			if (!ipv6_prefix_equal(&if6->addr, &smcrv2->saddr.addr_v6, if6->prefix_len))
>+				continue;
>+			subnet_match = true;
>+			break;
>+		}
> 
>+		if (!subnet_match)
>+			return false;
>+
>+		if ((ipv6_addr_type(&smcrv2->daddr.addr_v6) != IPV6_ADDR_ANY) &&
>+		    smc_ib_find_route_v6(net, &smcrv2->saddr.addr_v6,
>+					 &smcrv2->daddr.addr_v6,
>+					 smcrv2->nexthop_mac,
>+					 &smcrv2->uses_gateway))
>+			return false;
>+#endif /* CONFIG_IPV6 */
>+	} else {
>+		return false;
>+	}
>+
>+	return true;
>+}
>+
>+static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
>+				    const struct ib_gid_attr *attr,
>+				    u8 gid[], u8 *sgid_index,
>+				    struct smc_init_info_smcrv2 *smcrv2)
>+{
>+	bool gid_match = false;
>+
>+	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE)
>+		gid_match = true;
>+	else if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
>+		gid_match = smc_ib_match_gid_rocev2(ndev, attr, smcrv2);
>+
>+	if (gid_match) {
> 		if (gid)
> 			memcpy(gid, &attr->gid, SMC_GID_SIZE);
> 		if (sgid_index)
> 			*sgid_index = attr->index;
> 		return 0;
> 	}
>-out:
>+
> 	return -ENODEV;
> }
> 
>diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
>index ef8ac2b7546d..7cbeb7350478 100644
>--- a/net/smc/smc_ib.h
>+++ b/net/smc/smc_ib.h
>@@ -69,6 +69,12 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
> 	return cpu_to_be32(INADDR_NONE);
> }
> 
>+static inline struct in6_addr *smc_ib_gid_to_ipv6(u8 gid[SMC_GID_SIZE])
>+{
>+	struct in6_addr *addr6 = (struct in6_addr *)gid;
>+	return addr6;
>+}
>+
> static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev)
> {
> 	if (smcibdev && smcibdev->ibdev)
>@@ -114,6 +120,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
> 			 struct smc_init_info_smcrv2 *smcrv2);
> int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
> 		      u8 nexthop_mac[], u8 *uses_gateway);
>+int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
>+			 struct in6_addr *daddr, u8 nexthop_mac[],
>+			 u8 *uses_gateway);
> bool smc_ib_is_valid_local_systemid(void);
> int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
> #endif
>diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
>index f865c58c3aa7..f2a02611ab25 100644
>--- a/net/smc/smc_llc.c
>+++ b/net/smc/smc_llc.c
>@@ -1055,8 +1055,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
> 	if (lgr->smc_version == SMC_V2) {
> 		ini->check_smcrv2 = true;
> 		ini->smcrv2.saddr = lgr->saddr;
>-		ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
>+		smc_ipaddr_from_gid(&ini->smcrv2.daddr, llc->sender_gid);
> 	}
>+
> 	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
> 	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
> 	    (lgr->smc_version == SMC_V2 ||
>@@ -1438,8 +1439,7 @@ int smc_llc_srv_add_link(struct smc_link *link,
> 		if (send_req_add_link_resp) {
> 			struct smc_llc_msg_req_add_link_v2 *req_add =
> 				&req_qentry->msg.req_add_link;
>-
>-			ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
>+			smc_ipaddr_from_gid(&ini->smcrv2.daddr, req_add->gid[0]);
> 		}
> 	}
> 	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
>-- 
>2.45.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
  2025-10-22  3:23 [PATCH net-next v2] net/smc: add full IPv6 support for SMC D. Wythe
  2025-10-23  1:34 ` Dust Li
@ 2025-10-27 13:42 ` Leon Romanovsky
  2025-10-28  9:54   ` D. Wythe
  1 sibling, 1 reply; 6+ messages in thread
From: Leon Romanovsky @ 2025-10-27 13:42 UTC (permalink / raw)
  To: D. Wythe
  Cc: mjambigi, wenjia, wintera, dust.li, tonylu, guwen, kuba, davem,
	netdev, linux-s390, linux-rdma, pabeni, edumazet, sidraya, jaka

On Wed, Oct 22, 2025 at 11:23:09AM +0800, D. Wythe wrote:
> The current SMC implementation is IPv4-centric. While it contains a
> workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
> for native IPv6, preventing its use in modern dual-stack or IPv6-only
> networks.
> 
> This patch introduces full, native IPv6 support by refactoring the
> address handling mechanism to be IP-version agnostic, which is
> achieved by:
> 
> - Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
> - Implementing an IPv6-specific route lookup function.
> - Extend GID matching logic for both IPv4 and IPv6 addresses
> 
> With these changes, SMC can now discover RDMA devices and establish
> connections over both native IPv4 and IPv6 networks.

Why can't you use rdma-cm in-kernel API like any other in-kernel RDMA consumers?

Thanks

> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
> v2: Fix build failure with CONFIG_IPV6 disabled
> ---
>  net/smc/af_smc.c   |  48 +++++++++++----
>  net/smc/smc_core.h |  40 ++++++++++++-
>  net/smc/smc_ib.c   | 145 ++++++++++++++++++++++++++++++++++++++-------
>  net/smc/smc_ib.h   |   9 +++
>  net/smc/smc_llc.c  |   6 +-
>  5 files changed, 209 insertions(+), 39 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 77b99e8ef35a..b435c8ba95f5 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1132,12 +1132,15 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
>  
>  	/* check if there is an rdma v2 device available */
>  	ini->check_smcrv2 = true;
> -	ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
> -	if (!(ini->smcr_version & SMC_V2) ||
> +
> +	if (smc->clcsock->sk->sk_family == AF_INET)
> +		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, smc->clcsock->sk->sk_rcv_saddr);
>  #if IS_ENABLED(CONFIG_IPV6)
> -	    (smc->clcsock->sk->sk_family == AF_INET6 &&
> -	     !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
> -#endif
> +	else
> +		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &smc->clcsock->sk->sk_v6_rcv_saddr);
> +#endif /* CONFIG_IPV6 */
> +
> +	if (!(ini->smcr_version & SMC_V2) ||
>  	    !smc_clc_ueid_count() ||
>  	    smc_find_rdma_device(smc, ini))
>  		ini->smcr_version &= ~SMC_V2;
> @@ -1230,11 +1233,27 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
>  		memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
>  		ini->smcrv2.uses_gateway = false;
>  	} else {
> -		if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> -				      smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
> -				      ini->smcrv2.nexthop_mac,
> -				      &ini->smcrv2.uses_gateway))
> +		struct smc_ipaddr peer_gid;
> +
> +		smc_ipaddr_from_gid(&peer_gid, aclc->r0.lcl.gid);
> +		if (peer_gid.family == AF_INET) {
> +			/* v4-mapped v6 address should also be treated as v4 address. */
> +			if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
> +					      peer_gid.addr,
> +					      ini->smcrv2.nexthop_mac,
> +					      &ini->smcrv2.uses_gateway))
> +				return SMC_CLC_DECL_NOROUTE;
> +		} else {
> +#if IS_ENABLED(CONFIG_IPV6)
> +			if (smc_ib_find_route_v6(net, &smc->clcsock->sk->sk_v6_rcv_saddr,
> +						 &peer_gid.addr_v6,
> +						 ini->smcrv2.nexthop_mac,
> +						 &ini->smcrv2.uses_gateway))
> +				return SMC_CLC_DECL_NOROUTE;
> +#else
>  			return SMC_CLC_DECL_NOROUTE;
> +#endif /* CONFIG_IPV6 */
> +		}
>  		if (!ini->smcrv2.uses_gateway) {
>  			/* mismatch: peer claims indirect, but its direct */
>  			return SMC_CLC_DECL_NOINDIRECT;
> @@ -2307,8 +2326,15 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
>  	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
>  	ini->check_smcrv2 = true;
>  	ini->smcrv2.clc_sk = new_smc->clcsock->sk;
> -	ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
> -	ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
> +
> +	if (new_smc->clcsock->sk->sk_family == AF_INET)
> +		smc_ipaddr_from_v4addr(&ini->smcrv2.saddr, new_smc->clcsock->sk->sk_rcv_saddr);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	else
> +		smc_ipaddr_from_v6addr(&ini->smcrv2.saddr, &new_smc->clcsock->sk->sk_v6_rcv_saddr);
> +#endif /* CONFIG_IPV6 */
> +	smc_ipaddr_from_gid(&ini->smcrv2.daddr, smc_v2_ext->roce);
> +
>  	rc = smc_find_rdma_device(new_smc, ini);
>  	if (rc) {
>  		smc_find_ism_store_rc(rc, ini);
> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> index a5a78cbff341..6b07785f9874 100644
> --- a/net/smc/smc_core.h
> +++ b/net/smc/smc_core.h
> @@ -279,6 +279,14 @@ struct smc_llc_flow {
>  	struct smc_llc_qentry *qentry;
>  };
>  
> +struct smc_ipaddr {
> +	sa_family_t family;
> +	union {
> +		__be32          addr;
> +		struct in6_addr addr_v6;
> +	};
> +};
> +
>  struct smc_link_group {
>  	struct list_head	list;
>  	struct rb_root		conns_all;	/* connection tree */
> @@ -359,7 +367,7 @@ struct smc_link_group {
>  						/* rsn code for termination */
>  			u8			nexthop_mac[ETH_ALEN];
>  			u8			uses_gateway;
> -			__be32			saddr;
> +			struct smc_ipaddr saddr;
>  						/* net namespace */
>  			struct net		*net;
>  			u8			max_conns;
> @@ -389,9 +397,9 @@ struct smc_gidlist {
>  
>  struct smc_init_info_smcrv2 {
>  	/* Input fields */
> -	__be32			saddr;
> +	struct smc_ipaddr saddr;
>  	struct sock		*clc_sk;
> -	__be32			daddr;
> +	struct smc_ipaddr daddr;
>  
>  	/* Output fields when saddr is set */
>  	struct smc_ib_device	*ib_dev_v2;
> @@ -618,4 +626,30 @@ static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
>  {
>  	return link->lgr;
>  }
> +
> +static inline void smc_ipaddr_from_v4addr(struct smc_ipaddr *ipaddr, __be32 v4_addr)
> +{
> +	ipaddr->family = AF_INET;
> +	ipaddr->addr = v4_addr;
> +}
> +
> +static inline void smc_ipaddr_from_v6addr(struct smc_ipaddr *ipaddr, const struct in6_addr *v6_addr)
> +{
> +	ipaddr->family = AF_INET6;
> +	ipaddr->addr_v6 = *v6_addr;
> +}
> +
> +static inline void smc_ipaddr_from_gid(struct smc_ipaddr *ipaddr, u8 gid[SMC_GID_SIZE])
> +{
> +	__be32 gid_v4 = smc_ib_gid_to_ipv4(gid);
> +
> +	if (gid_v4 != cpu_to_be32(INADDR_NONE)) {
> +		ipaddr->family = AF_INET;
> +		ipaddr->addr = gid_v4;
> +	} else {
> +		ipaddr->family = AF_INET6;
> +		ipaddr->addr_v6 = *smc_ib_gid_to_ipv6(gid);
> +	}
> +}
> +
>  #endif
> diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
> index 0052f02756eb..9c4c53be7bfc 100644
> --- a/net/smc/smc_ib.c
> +++ b/net/smc/smc_ib.c
> @@ -22,6 +22,7 @@
>  #include <linux/inetdevice.h>
>  #include <rdma/ib_verbs.h>
>  #include <rdma/ib_cache.h>
> +#include <net/ip6_route.h>
>  
>  #include "smc_pnet.h"
>  #include "smc_ib.h"
> @@ -225,48 +226,148 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>  	return -ENOENT;
>  }
>  
> -static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> +#if IS_ENABLED(CONFIG_IPV6)
> +int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
> +			 struct in6_addr *daddr, u8 nexthop_mac[],
> +			 u8 *uses_gateway)
> +{
> +	struct dst_entry *dst;
> +	struct rt6_info *rt;
> +	struct neighbour *neigh;
> +	struct in6_addr *nexthop_addr;
> +	int rc = -ENOENT;
> +
> +	struct flowi6 fl6 = {
> +		.daddr = *daddr,
> +		.saddr = *saddr,
> +	};
> +
> +	if (ipv6_addr_any(daddr))
> +		return -EINVAL;
> +
> +	dst = ip6_route_output(net, NULL, &fl6);
> +	if (!dst || dst->error) {
> +		rc = dst ? dst->error : -EINVAL;
> +		goto out;
> +	}
> +	rt = (struct rt6_info *)dst;
> +
> +	if (ipv6_addr_type(&rt->rt6i_gateway) != IPV6_ADDR_ANY) {
> +		*uses_gateway = 1;
> +		nexthop_addr = &rt->rt6i_gateway;
> +	} else {
> +		*uses_gateway = 0;
> +		nexthop_addr = daddr;
> +	}
> +
> +	neigh = dst_neigh_lookup(dst, nexthop_addr);
> +	if (!neigh)
> +		goto out;
> +
> +	read_lock_bh(&neigh->lock);
> +	if (neigh->nud_state & NUD_VALID) {
> +		memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
> +		rc = 0;
> +	}
> +	read_unlock_bh(&neigh->lock);
> +
> +	neigh_release(neigh);
> +out:
> +	dst_release(dst);
> +	return rc;
> +}
> +#endif /* CONFIG_IPV6 */
> +
> +static bool smc_ib_match_gid_rocev2(const struct net_device *ndev,
>  				    const struct ib_gid_attr *attr,
> -				    u8 gid[], u8 *sgid_index,
>  				    struct smc_init_info_smcrv2 *smcrv2)
>  {
> -	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) {
> -		if (gid)
> -			memcpy(gid, &attr->gid, SMC_GID_SIZE);
> -		if (sgid_index)
> -			*sgid_index = attr->index;
> -		return 0;
> -	}
> -	if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
> -	    smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
> +	struct net *net = dev_net(ndev);
> +	bool subnet_match = false;
> +
> +	if (smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
>  		struct in_device *in_dev = __in_dev_get_rcu(ndev);
> -		struct net *net = dev_net(ndev);
>  		const struct in_ifaddr *ifa;
> -		bool subnet_match = false;
>  
>  		if (!in_dev)
> -			goto out;
> +			return false;
> +
> +		if (smcrv2->saddr.family != AF_INET)
> +			return false;
> +
>  		in_dev_for_each_ifa_rcu(ifa, in_dev) {
> -			if (!inet_ifa_match(smcrv2->saddr, ifa))
> +			if (!inet_ifa_match(smcrv2->saddr.addr, ifa))
>  				continue;
>  			subnet_match = true;
>  			break;
>  		}
> +
>  		if (!subnet_match)
> -			goto out;
> -		if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
> -						       smcrv2->daddr,
> -						       smcrv2->nexthop_mac,
> -						       &smcrv2->uses_gateway))
> -			goto out;
> +			return false;
> +
> +		if (smcrv2->daddr.addr &&
> +		    smc_ib_find_route(net, smcrv2->saddr.addr,
> +				      smcrv2->daddr.addr,
> +				      smcrv2->nexthop_mac,
> +				      &smcrv2->uses_gateway))
> +			return false;
> +#if IS_ENABLED(CONFIG_IPV6)
> +	} else if (!(ipv6_addr_type(smc_ib_gid_to_ipv6((u8 *)&attr->gid)) & IPV6_ADDR_LINKLOCAL)) {
> +		struct inet6_dev *in6_dev = __in6_dev_get(ndev);
> +		const struct inet6_ifaddr *if6;
> +
> +		if (!in6_dev)
> +			return false;
> +
> +		if (smcrv2->saddr.family != AF_INET6)
> +			return false;
> +
> +		list_for_each_entry_rcu(if6, &in6_dev->addr_list, if_list) {
> +			if (ipv6_addr_type(&if6->addr) & IPV6_ADDR_LINKLOCAL)
> +				continue;
> +			if (!ipv6_prefix_equal(&if6->addr, &smcrv2->saddr.addr_v6, if6->prefix_len))
> +				continue;
> +			subnet_match = true;
> +			break;
> +		}
>  
> +		if (!subnet_match)
> +			return false;
> +
> +		if ((ipv6_addr_type(&smcrv2->daddr.addr_v6) != IPV6_ADDR_ANY) &&
> +		    smc_ib_find_route_v6(net, &smcrv2->saddr.addr_v6,
> +					 &smcrv2->daddr.addr_v6,
> +					 smcrv2->nexthop_mac,
> +					 &smcrv2->uses_gateway))
> +			return false;
> +#endif /* CONFIG_IPV6 */
> +	} else {
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
> +				    const struct ib_gid_attr *attr,
> +				    u8 gid[], u8 *sgid_index,
> +				    struct smc_init_info_smcrv2 *smcrv2)
> +{
> +	bool gid_match = false;
> +
> +	if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE)
> +		gid_match = true;
> +	else if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
> +		gid_match = smc_ib_match_gid_rocev2(ndev, attr, smcrv2);
> +
> +	if (gid_match) {
>  		if (gid)
>  			memcpy(gid, &attr->gid, SMC_GID_SIZE);
>  		if (sgid_index)
>  			*sgid_index = attr->index;
>  		return 0;
>  	}
> -out:
> +
>  	return -ENODEV;
>  }
>  
> diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
> index ef8ac2b7546d..7cbeb7350478 100644
> --- a/net/smc/smc_ib.h
> +++ b/net/smc/smc_ib.h
> @@ -69,6 +69,12 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
>  	return cpu_to_be32(INADDR_NONE);
>  }
>  
> +static inline struct in6_addr *smc_ib_gid_to_ipv6(u8 gid[SMC_GID_SIZE])
> +{
> +	struct in6_addr *addr6 = (struct in6_addr *)gid;
> +	return addr6;
> +}
> +
>  static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev)
>  {
>  	if (smcibdev && smcibdev->ibdev)
> @@ -114,6 +120,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
>  			 struct smc_init_info_smcrv2 *smcrv2);
>  int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
>  		      u8 nexthop_mac[], u8 *uses_gateway);
> +int smc_ib_find_route_v6(struct net *net, struct in6_addr *saddr,
> +			 struct in6_addr *daddr, u8 nexthop_mac[],
> +			 u8 *uses_gateway);
>  bool smc_ib_is_valid_local_systemid(void);
>  int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
>  #endif
> diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
> index f865c58c3aa7..f2a02611ab25 100644
> --- a/net/smc/smc_llc.c
> +++ b/net/smc/smc_llc.c
> @@ -1055,8 +1055,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
>  	if (lgr->smc_version == SMC_V2) {
>  		ini->check_smcrv2 = true;
>  		ini->smcrv2.saddr = lgr->saddr;
> -		ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid);
> +		smc_ipaddr_from_gid(&ini->smcrv2.daddr, llc->sender_gid);
>  	}
> +
>  	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
>  	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
>  	    (lgr->smc_version == SMC_V2 ||
> @@ -1438,8 +1439,7 @@ int smc_llc_srv_add_link(struct smc_link *link,
>  		if (send_req_add_link_resp) {
>  			struct smc_llc_msg_req_add_link_v2 *req_add =
>  				&req_qentry->msg.req_add_link;
> -
> -			ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]);
> +			smc_ipaddr_from_gid(&ini->smcrv2.daddr, req_add->gid[0]);
>  		}
>  	}
>  	smc_pnet_find_alt_roce(lgr, ini, link->smcibdev);
> -- 
> 2.45.0
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
  2025-10-27 13:42 ` Leon Romanovsky
@ 2025-10-28  9:54   ` D. Wythe
  2025-10-28 12:31     ` Leon Romanovsky
  0 siblings, 1 reply; 6+ messages in thread
From: D. Wythe @ 2025-10-28  9:54 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: D. Wythe, mjambigi, wenjia, wintera, dust.li, tonylu, guwen, kuba,
	davem, netdev, linux-s390, linux-rdma, pabeni, edumazet, sidraya,
	jaka

On Mon, Oct 27, 2025 at 03:42:27PM +0200, Leon Romanovsky wrote:
> On Wed, Oct 22, 2025 at 11:23:09AM +0800, D. Wythe wrote:
> > The current SMC implementation is IPv4-centric. While it contains a
> > workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
> > for native IPv6, preventing its use in modern dual-stack or IPv6-only
> > networks.
> > 
> > This patch introduces full, native IPv6 support by refactoring the
> > address handling mechanism to be IP-version agnostic, which is
> > achieved by:
> > 
> > - Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
> > - Implementing an IPv6-specific route lookup function.
> > - Extend GID matching logic for both IPv4 and IPv6 addresses
> > 
> > With these changes, SMC can now discover RDMA devices and establish
> > connections over both native IPv4 and IPv6 networks.
> 
> Why can't you use rdma-cm in-kernel API like any other in-kernel RDMA consumers?
> 
> Thanks
> 
> >

Hi Leon,

Regarding RDMA-CM, I’m not sure if I’ve fully grasped your point, but
based on my current understanding, I believe SMC cannot use RDMA-CM.
There are a few reasons for this:

Firstly, SMC is designed to work not only with RDMA devices but also
needs to negotiate with DIBS(DIRECT INTERNAL BUFFER SHARING) devices. This
means we must support scenarios where no RDMA device is present.
Therefore, we require a round of out-of-band negotiation regardless of
the final device choice. In this context, even if we ultimately select
an RDMA device, using rdma-cm to establish the connection would be
redundant.

Additionally, SMC requires multiplexing multiple connections over a
single QP. We need to decide during the out-of-band negotiation which
specific QP to reuse for the connection. From what I know, rdma-cm does
not seem to offer this capability either.

Best regards,
D. Wythe

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
  2025-10-23  1:34 ` Dust Li
@ 2025-10-28 10:18   ` D. Wythe
  0 siblings, 0 replies; 6+ messages in thread
From: D. Wythe @ 2025-10-28 10:18 UTC (permalink / raw)
  To: Dust Li
  Cc: D. Wythe, mjambigi, wenjia, wintera, tonylu, guwen, kuba, davem,
	netdev, linux-s390, linux-rdma, pabeni, edumazet, sidraya, jaka

On Thu, Oct 23, 2025 at 09:34:22AM +0800, Dust Li wrote:
> On 2025-10-22 11:23:09, D. Wythe wrote:
> >The current SMC implementation is IPv4-centric. While it contains a
> >workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
> >for native IPv6, preventing its use in modern dual-stack or IPv6-only
> >networks.
> >
> >This patch introduces full, native IPv6 support by refactoring the
> >address handling mechanism to be IP-version agnostic, which is
> >achieved by:
> >
> >- Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
> >- Implementing an IPv6-specific route lookup function.
> >- Extend GID matching logic for both IPv4 and IPv6 addresses
> >
> >With these changes, SMC can now discover RDMA devices and establish
> >connections over both native IPv4 and IPv6 networks.
> 
> Tested it with link local ipv6 address, it still doesn't work as
> expected, while TCP works fine.
> 
> #smc_run ./sockperf tp --tcp -i fe80::c679:7b0:4a4b:d5cc%eth2
> sockperf: == version #3.10-23.gited92afb185e6.dirty ==
> sockperf: ERROR: Can`t connect socket (errno=104 Connection reset by peer)
> 
> Best regards,
> Dust
>

This is a long-standing bug. When both smcd and smcrv1 devices are
present in the system, if the smc_clc_prfx_set function fails, the
current logic only clears pclc_base->hdr.typev1 = SMC_TYPE_N, but
neglects to clear ini->smc_type_v1.

This leads to a serious issue: when subsequently constructing the smc
proposal message, the message content is organized according to the v1 +
smcd format based on the uncleared ini->smc_type_v1.

However, because pclc_base->hdr.typev1 has been cleared, When the server
receives such a proposal message, where the type in the header does not
match the content, it immediately fails.

Previously, I considered this issue to be logically unrelated to IPv6
support, so I didn't plan to send this bugfix along. However, in an
IPv6-only environment, this function is guaranteed to return an error
when only link-local address is present. This inevitably triggers the
problem in your test cases, which I hadn't anticipated. I will include
its fix in the next release.

Best wishes,
D. Wythe


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next v2] net/smc: add full IPv6 support for SMC
  2025-10-28  9:54   ` D. Wythe
@ 2025-10-28 12:31     ` Leon Romanovsky
  0 siblings, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2025-10-28 12:31 UTC (permalink / raw)
  To: D. Wythe
  Cc: mjambigi, wenjia, wintera, dust.li, tonylu, guwen, kuba, davem,
	netdev, linux-s390, linux-rdma, pabeni, edumazet, sidraya, jaka

On Tue, Oct 28, 2025 at 05:54:50PM +0800, D. Wythe wrote:
> On Mon, Oct 27, 2025 at 03:42:27PM +0200, Leon Romanovsky wrote:
> > On Wed, Oct 22, 2025 at 11:23:09AM +0800, D. Wythe wrote:
> > > The current SMC implementation is IPv4-centric. While it contains a
> > > workaround for IPv4-mapped IPv6 addresses, it lacks a functional path
> > > for native IPv6, preventing its use in modern dual-stack or IPv6-only
> > > networks.
> > > 
> > > This patch introduces full, native IPv6 support by refactoring the
> > > address handling mechanism to be IP-version agnostic, which is
> > > achieved by:
> > > 
> > > - Introducing a generic `struct smc_ipaddr` to abstract IP addresses.
> > > - Implementing an IPv6-specific route lookup function.
> > > - Extend GID matching logic for both IPv4 and IPv6 addresses
> > > 
> > > With these changes, SMC can now discover RDMA devices and establish
> > > connections over both native IPv4 and IPv6 networks.
> > 
> > Why can't you use rdma-cm in-kernel API like any other in-kernel RDMA consumers?
> > 
> > Thanks
> > 
> > >
> 
> Hi Leon,
> 
> Regarding RDMA-CM, I’m not sure if I’ve fully grasped your point, but
> based on my current understanding, I believe SMC cannot use RDMA-CM.
> There are a few reasons for this:
> 
> Firstly, SMC is designed to work not only with RDMA devices but also
> needs to negotiate with DIBS(DIRECT INTERNAL BUFFER SHARING) devices. This
> means we must support scenarios where no RDMA device is present.
> Therefore, we require a round of out-of-band negotiation regardless of
> the final device choice. In this context, even if we ultimately select
> an RDMA device, using rdma-cm to establish the connection would be
> redundant.

Ahh, yes, I always failed to remember this.

Thanks

> 
> Additionally, SMC requires multiplexing multiple connections over a
> single QP. We need to decide during the out-of-band negotiation which
> specific QP to reuse for the connection. From what I know, rdma-cm does
> not seem to offer this capability either.
> 
> Best regards,
> D. Wythe
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-10-28 12:31 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-22  3:23 [PATCH net-next v2] net/smc: add full IPv6 support for SMC D. Wythe
2025-10-23  1:34 ` Dust Li
2025-10-28 10:18   ` D. Wythe
2025-10-27 13:42 ` Leon Romanovsky
2025-10-28  9:54   ` D. Wythe
2025-10-28 12:31     ` Leon Romanovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).