From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: "D. Wythe" <alibuda@linux.alibaba.com>,
kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
wintera@linux.ibm.com, guwen@linux.alibaba.com
Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org,
linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org,
tonylu@linux.alibaba.com, pabeni@redhat.com, edumazet@google.com
Subject: Re: [PATCH net-next v4 3/3] net/smc: Introduce IPPROTO_SMC
Date: Wed, 29 May 2024 21:55:15 +0200 [thread overview]
Message-ID: <f7ad8072-a173-4d75-bbdd-775f31f6826f@linux.dev> (raw)
In-Reply-To: <1716955147-88923-4-git-send-email-alibuda@linux.alibaba.com>
在 2024/5/29 5:59, D. Wythe 写道:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
> There are several reasons why we believe it is appropriate here:
>
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
>
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
>
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
> include/uapi/linux/in.h | 2 +
> net/smc/Makefile | 2 +-
> net/smc/af_smc.c | 36 ++++++++++++++++
> net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
> net/smc/inet_smc.h | 34 +++++++++++++++
> 5 files changed, 181 insertions(+), 1 deletion(-)
> create mode 100644 net/smc/inet_smc.c
> create mode 100644 net/smc/inet_smc.h
>
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index e682ab6..0c6322b 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW IPPROTO_RAW
> IPPROTO_MPTCP = 262, /* Multipath TCP connection */
> #define IPPROTO_MPTCP IPPROTO_MPTCP
> + IPPROTO_SMC = 263, /* Shared Memory Communications */
> +#define IPPROTO_SMC IPPROTO_SMC
> IPPROTO_MAX
> };
> #endif
> diff --git a/net/smc/Makefile b/net/smc/Makefile
> index 2c510d54..472b9ee 100644
> --- a/net/smc/Makefile
> +++ b/net/smc/Makefile
> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o
> obj-$(CONFIG_SMC_DIAG) += smc_diag.o
> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
> -smc-y += smc_tracepoint.o
> +smc-y += smc_tracepoint.o inet_smc.o
> smc-$(CONFIG_SYSCTL) += smc_sysctl.o
> smc-$(CONFIG_SMC_LO) += smc_loopback.o
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 8e3ce76..320624c 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -54,6 +54,7 @@
> #include "smc_tracepoint.h"
> #include "smc_sysctl.h"
> #include "smc_loopback.h"
> +#include "inet_smc.h"
>
> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
> * creation on server
> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
> goto out_lo;
> }
>
> + rc = proto_register(&smc_inet_prot, 1);
> + if (rc) {
> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> + goto out_ulp;
> + }
> + inet_register_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> + rc = proto_register(&smc_inet6_prot, 1);
> + if (rc) {
> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> + goto out_inet_prot;
> + }
> + inet6_register_protosw(&smc_inet6_protosw);
> +#endif
> +
> static_branch_enable(&tcp_have_smc);
> return 0;
>
> +#if IS_ENABLED(CONFIG_IPV6)
> +out_inet_prot:
> + inet_unregister_protosw(&smc_inet_protosw);
> + proto_unregister(&smc_inet_prot);
> +#endif
> +out_ulp:
> + tcp_unregister_ulp(&smc_ulp_ops);
> out_lo:
> smc_loopback_exit();
> out_ib:
> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
> static void __exit smc_exit(void)
> {
> static_branch_disable(&tcp_have_smc);
> + inet_unregister_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> + inet6_unregister_protosw(&smc_inet6_protosw);
> +#endif
> tcp_unregister_ulp(&smc_ulp_ops);
> sock_unregister(PF_SMC);
> smc_core_exit();
> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
> destroy_workqueue(smc_hs_wq);
> proto_unregister(&smc_proto6);
> proto_unregister(&smc_proto);
> + proto_unregister(&smc_inet_prot);
> +#if IS_ENABLED(CONFIG_IPV6)
> + proto_unregister(&smc_inet6_prot);
> +#endif
> smc_pnet_exit();
> smc_nl_exit();
> smc_clc_exit();
> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
> MODULE_LICENSE("GPL");
> MODULE_ALIAS_NETPROTO(PF_SMC);
> MODULE_ALIAS_TCP_ULP("smc");
> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
> +#if IS_ENABLED(CONFIG_IPV6)
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
> +#endif
> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
> new file mode 100644
> index 00000000..1ba73d7
> --- /dev/null
> +++ b/net/smc/inet_smc.c
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + * Definitions for the IPPROTO_SMC (socket related)
> + *
> + * Copyright IBM Corp. 2016, 2018
> + * Copyright (c) 2024, Alibaba Inc.
> + *
> + * Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +
> +#include "inet_smc.h"
> +#include "smc.h"
> +
> +struct proto smc_inet_prot = {
> + .name = "INET_SMC",
> + .owner = THIS_MODULE,
> + .init = smc_inet_init_sock,
> + .hash = smc_hash_sk,
> + .unhash = smc_unhash_sk,
> + .release_cb = smc_release_cb,
> + .obj_size = sizeof(struct smc_sock),
> + .h.smc_hash = &smc_v4_hashinfo,
> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet_stream_ops = {
> + .family = PF_INET,
> + .owner = THIS_MODULE,
> + .release = smc_release,
> + .bind = smc_bind,
> + .connect = smc_connect,
> + .socketpair = sock_no_socketpair,
> + .accept = smc_accept,
> + .getname = smc_getname,
> + .poll = smc_poll,
> + .ioctl = smc_ioctl,
> + .listen = smc_listen,
> + .shutdown = smc_shutdown,
> + .setsockopt = smc_setsockopt,
> + .getsockopt = smc_getsockopt,
> + .sendmsg = smc_sendmsg,
> + .recvmsg = smc_recvmsg,
> + .mmap = sock_no_mmap,
> + .splice_read = smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet_protosw = {
> + .type = SOCK_STREAM,
> + .protocol = IPPROTO_SMC,
> + .prot = &smc_inet_prot,
> + .ops = &smc_inet_stream_ops,
> + .flags = INET_PROTOSW_ICSK,
> +};
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +struct proto smc_inet6_prot = {
> + .name = "INET6_SMC",
> + .owner = THIS_MODULE,
> + .init = smc_inet_init_sock,
> + .hash = smc_hash_sk,
> + .unhash = smc_unhash_sk,
> + .release_cb = smc_release_cb,
> + .obj_size = sizeof(struct smc_sock),
> + .h.smc_hash = &smc_v6_hashinfo,
> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet6_stream_ops = {
> + .family = PF_INET6,
> + .owner = THIS_MODULE,
> + .release = smc_release,
> + .bind = smc_bind,
> + .connect = smc_connect,
> + .socketpair = sock_no_socketpair,
> + .accept = smc_accept,
> + .getname = smc_getname,
> + .poll = smc_poll,
> + .ioctl = smc_ioctl,
> + .listen = smc_listen,
> + .shutdown = smc_shutdown,
> + .setsockopt = smc_setsockopt,
> + .getsockopt = smc_getsockopt,
> + .sendmsg = smc_sendmsg,
> + .recvmsg = smc_recvmsg,
> + .mmap = sock_no_mmap,
> + .splice_read = smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet6_protosw = {
> + .type = SOCK_STREAM,
> + .protocol = IPPROTO_SMC,
> + .prot = &smc_inet6_prot,
> + .ops = &smc_inet6_stream_ops,
> + .flags = INET_PROTOSW_ICSK,
> +};
> +#endif
> +
> +int smc_inet_init_sock(struct sock *sk)
> +{
> + struct net *net = sock_net(sk);
> +
> + /* init common smc sock */
> + smc_sk_init(net, sk, IPPROTO_SMC);
> + /* create clcsock */
> + return smc_create_clcsk(net, sk, sk->sk_family);
> +}
> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
> new file mode 100644
> index 00000000..c55345d
> --- /dev/null
> +++ b/net/smc/inet_smc.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + * Definitions for the IPPROTO_SMC (socket related)
> +
> + * Copyright IBM Corp. 2016
> + * Copyright (c) 2024, Alibaba Inc.
> + *
> + * Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +#ifndef __INET_SMC
> +#define __INET_SMC
> +
> +#include <net/protocol.h>
> +#include <net/sock.h>
> +#include <net/tcp.h>
> +
> +extern struct proto smc_inet_prot;
> +extern const struct proto_ops smc_inet_stream_ops;
> +extern struct inet_protosw smc_inet_protosw;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +/* MUST after net/tcp.h or warning */
> +#include <net/transp_v6.h>
> +extern struct proto smc_inet6_prot;
> +extern const struct proto_ops smc_inet6_stream_ops;
> +extern struct inet_protosw smc_inet6_protosw;
> +#endif
If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the end
of CONFIG_IPV6, it is a good habit. When browsing the source code, it is
easy for us to know that it is the end of CONFIG_IPV6.
Just my 2 cent suggestions. It is a trivial problem. You can ignore it.
But if you fix it, it can make the source code more readable.
Zhu Yanjun
> +
> +int smc_inet_init_sock(struct sock *sk);
> +
> +#endif /* __INET_SMC */
next prev parent reply other threads:[~2024-05-29 19:55 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 3:59 [PATCH net-next v4 0/3] Introduce IPPROTO_SMC D. Wythe
2024-05-29 3:59 ` [PATCH net-next v4 1/3] net/smc: refactoring initialization of smc sock D. Wythe
2024-05-29 6:14 ` Tony Lu
2024-05-29 3:59 ` [PATCH net-next v4 2/3] net/smc: expose smc proto operations D. Wythe
2024-05-29 17:57 ` Zhu Yanjun
2024-05-30 2:33 ` D. Wythe
2024-05-29 3:59 ` [PATCH net-next v4 3/3] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-29 11:12 ` Dust Li
2024-05-30 3:11 ` D. Wythe
2024-05-29 11:58 ` Wenjia Zhang
2024-05-30 2:51 ` D. Wythe
2024-05-29 19:55 ` Zhu Yanjun [this message]
2024-05-30 2:35 ` D. Wythe
2024-06-01 13:06 ` Simon Horman
2024-06-03 2:57 ` D. Wythe
2024-06-03 7:47 ` Simon Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f7ad8072-a173-4d75-bbdd-775f31f6826f@linux.dev \
--to=yanjun.zhu@linux.dev \
--cc=alibuda@linux.alibaba.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=guwen@linux.alibaba.com \
--cc=jaka@linux.ibm.com \
--cc=kgraul@linux.ibm.com \
--cc=kuba@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=tonylu@linux.alibaba.com \
--cc=wenjia@linux.ibm.com \
--cc=wintera@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).