From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: "D. Wythe" <alibuda@linux.alibaba.com>,
kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
wintera@linux.ibm.com, guwen@linux.alibaba.com
Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org,
linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org,
tonylu@linux.alibaba.com, pabeni@redhat.com, edumazet@google.com
Subject: Re: [PATCH net-next v4 3/3] net/smc: Introduce IPPROTO_SMC
Date: Wed, 29 May 2024 21:55:15 +0200 [thread overview]
Message-ID: <f7ad8072-a173-4d75-bbdd-775f31f6826f@linux.dev> (raw)
In-Reply-To: <1716955147-88923-4-git-send-email-alibuda@linux.alibaba.com>
在 2024/5/29 5:59, D. Wythe 写道:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
> There are several reasons why we believe it is appropriate here:
>
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
>
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
>
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
> include/uapi/linux/in.h | 2 +
> net/smc/Makefile | 2 +-
> net/smc/af_smc.c | 36 ++++++++++++++++
> net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
> net/smc/inet_smc.h | 34 +++++++++++++++
> 5 files changed, 181 insertions(+), 1 deletion(-)
> create mode 100644 net/smc/inet_smc.c
> create mode 100644 net/smc/inet_smc.h
>
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index e682ab6..0c6322b 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW IPPROTO_RAW
> IPPROTO_MPTCP = 262, /* Multipath TCP connection */
> #define IPPROTO_MPTCP IPPROTO_MPTCP
> + IPPROTO_SMC = 263, /* Shared Memory Communications */
> +#define IPPROTO_SMC IPPROTO_SMC
> IPPROTO_MAX
> };
> #endif
> diff --git a/net/smc/Makefile b/net/smc/Makefile
> index 2c510d54..472b9ee 100644
> --- a/net/smc/Makefile
> +++ b/net/smc/Makefile
> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o
> obj-$(CONFIG_SMC_DIAG) += smc_diag.o
> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
> -smc-y += smc_tracepoint.o
> +smc-y += smc_tracepoint.o inet_smc.o
> smc-$(CONFIG_SYSCTL) += smc_sysctl.o
> smc-$(CONFIG_SMC_LO) += smc_loopback.o
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 8e3ce76..320624c 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -54,6 +54,7 @@
> #include "smc_tracepoint.h"
> #include "smc_sysctl.h"
> #include "smc_loopback.h"
> +#include "inet_smc.h"
>
> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
> * creation on server
> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
> goto out_lo;
> }
>
> + rc = proto_register(&smc_inet_prot, 1);
> + if (rc) {
> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> + goto out_ulp;
> + }
> + inet_register_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> + rc = proto_register(&smc_inet6_prot, 1);
> + if (rc) {
> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> + goto out_inet_prot;
> + }
> + inet6_register_protosw(&smc_inet6_protosw);
> +#endif
> +
> static_branch_enable(&tcp_have_smc);
> return 0;
>
> +#if IS_ENABLED(CONFIG_IPV6)
> +out_inet_prot:
> + inet_unregister_protosw(&smc_inet_protosw);
> + proto_unregister(&smc_inet_prot);
> +#endif
> +out_ulp:
> + tcp_unregister_ulp(&smc_ulp_ops);
> out_lo:
> smc_loopback_exit();
> out_ib:
> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
> static void __exit smc_exit(void)
> {
> static_branch_disable(&tcp_have_smc);
> + inet_unregister_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> + inet6_unregister_protosw(&smc_inet6_protosw);
> +#endif
> tcp_unregister_ulp(&smc_ulp_ops);
> sock_unregister(PF_SMC);
> smc_core_exit();
> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
> destroy_workqueue(smc_hs_wq);
> proto_unregister(&smc_proto6);
> proto_unregister(&smc_proto);
> + proto_unregister(&smc_inet_prot);
> +#if IS_ENABLED(CONFIG_IPV6)
> + proto_unregister(&smc_inet6_prot);
> +#endif
> smc_pnet_exit();
> smc_nl_exit();
> smc_clc_exit();
> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
> MODULE_LICENSE("GPL");
> MODULE_ALIAS_NETPROTO(PF_SMC);
> MODULE_ALIAS_TCP_ULP("smc");
> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
> +#if IS_ENABLED(CONFIG_IPV6)
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
> +#endif
> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
> new file mode 100644
> index 00000000..1ba73d7
> --- /dev/null
> +++ b/net/smc/inet_smc.c
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + * Definitions for the IPPROTO_SMC (socket related)
> + *
> + * Copyright IBM Corp. 2016, 2018
> + * Copyright (c) 2024, Alibaba Inc.
> + *
> + * Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +
> +#include "inet_smc.h"
> +#include "smc.h"
> +
> +struct proto smc_inet_prot = {
> + .name = "INET_SMC",
> + .owner = THIS_MODULE,
> + .init = smc_inet_init_sock,
> + .hash = smc_hash_sk,
> + .unhash = smc_unhash_sk,
> + .release_cb = smc_release_cb,
> + .obj_size = sizeof(struct smc_sock),
> + .h.smc_hash = &smc_v4_hashinfo,
> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet_stream_ops = {
> + .family = PF_INET,
> + .owner = THIS_MODULE,
> + .release = smc_release,
> + .bind = smc_bind,
> + .connect = smc_connect,
> + .socketpair = sock_no_socketpair,
> + .accept = smc_accept,
> + .getname = smc_getname,
> + .poll = smc_poll,
> + .ioctl = smc_ioctl,
> + .listen = smc_listen,
> + .shutdown = smc_shutdown,
> + .setsockopt = smc_setsockopt,
> + .getsockopt = smc_getsockopt,
> + .sendmsg = smc_sendmsg,
> + .recvmsg = smc_recvmsg,
> + .mmap = sock_no_mmap,
> + .splice_read = smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet_protosw = {
> + .type = SOCK_STREAM,
> + .protocol = IPPROTO_SMC,
> + .prot = &smc_inet_prot,
> + .ops = &smc_inet_stream_ops,
> + .flags = INET_PROTOSW_ICSK,
> +};
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +struct proto smc_inet6_prot = {
> + .name = "INET6_SMC",
> + .owner = THIS_MODULE,
> + .init = smc_inet_init_sock,
> + .hash = smc_hash_sk,
> + .unhash = smc_unhash_sk,
> + .release_cb = smc_release_cb,
> + .obj_size = sizeof(struct smc_sock),
> + .h.smc_hash = &smc_v6_hashinfo,
> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet6_stream_ops = {
> + .family = PF_INET6,
> + .owner = THIS_MODULE,
> + .release = smc_release,
> + .bind = smc_bind,
> + .connect = smc_connect,
> + .socketpair = sock_no_socketpair,
> + .accept = smc_accept,
> + .getname = smc_getname,
> + .poll = smc_poll,
> + .ioctl = smc_ioctl,
> + .listen = smc_listen,
> + .shutdown = smc_shutdown,
> + .setsockopt = smc_setsockopt,
> + .getsockopt = smc_getsockopt,
> + .sendmsg = smc_sendmsg,
> + .recvmsg = smc_recvmsg,
> + .mmap = sock_no_mmap,
> + .splice_read = smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet6_protosw = {
> + .type = SOCK_STREAM,
> + .protocol = IPPROTO_SMC,
> + .prot = &smc_inet6_prot,
> + .ops = &smc_inet6_stream_ops,
> + .flags = INET_PROTOSW_ICSK,
> +};
> +#endif
> +
> +int smc_inet_init_sock(struct sock *sk)
> +{
> + struct net *net = sock_net(sk);
> +
> + /* init common smc sock */
> + smc_sk_init(net, sk, IPPROTO_SMC);
> + /* create clcsock */
> + return smc_create_clcsk(net, sk, sk->sk_family);
> +}
> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
> new file mode 100644
> index 00000000..c55345d
> --- /dev/null
> +++ b/net/smc/inet_smc.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + * Definitions for the IPPROTO_SMC (socket related)
> +
> + * Copyright IBM Corp. 2016
> + * Copyright (c) 2024, Alibaba Inc.
> + *
> + * Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +#ifndef __INET_SMC
> +#define __INET_SMC
> +
> +#include <net/protocol.h>
> +#include <net/sock.h>
> +#include <net/tcp.h>
> +
> +extern struct proto smc_inet_prot;
> +extern const struct proto_ops smc_inet_stream_ops;
> +extern struct inet_protosw smc_inet_protosw;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +/* MUST after net/tcp.h or warning */
> +#include <net/transp_v6.h>
> +extern struct proto smc_inet6_prot;
> +extern const struct proto_ops smc_inet6_stream_ops;
> +extern struct inet_protosw smc_inet6_protosw;
> +#endif
If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the end
of CONFIG_IPV6, it is a good habit. When browsing the source code, it is
easy for us to know that it is the end of CONFIG_IPV6.
Just my 2 cent suggestions. It is a trivial problem. You can ignore it.
But if you fix it, it can make the source code more readable.
Zhu Yanjun
> +
> +int smc_inet_init_sock(struct sock *sk);
> +
> +#endif /* __INET_SMC */
next prev parent reply other threads:[~2024-05-29 19:55 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 3:59 [PATCH net-next v4 0/3] Introduce IPPROTO_SMC D. Wythe
2024-05-29 3:59 ` [PATCH net-next v4 1/3] net/smc: refactoring initialization of smc sock D. Wythe
2024-05-29 6:14 ` Tony Lu
2024-05-29 3:59 ` [PATCH net-next v4 2/3] net/smc: expose smc proto operations D. Wythe
2024-05-29 17:57 ` Zhu Yanjun
2024-05-30 2:33 ` D. Wythe
2024-05-29 3:59 ` [PATCH net-next v4 3/3] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-29 11:12 ` Dust Li
2024-05-30 3:11 ` D. Wythe
2024-05-29 11:58 ` Wenjia Zhang
2024-05-30 2:51 ` D. Wythe
2024-05-29 19:55 ` Zhu Yanjun [this message]
2024-05-30 2:35 ` D. Wythe
2024-06-01 13:06 ` Simon Horman
2024-06-03 2:57 ` D. Wythe
2024-06-03 7:47 ` Simon Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f7ad8072-a173-4d75-bbdd-775f31f6826f@linux.dev \
--to=yanjun.zhu@linux.dev \
--cc=alibuda@linux.alibaba.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=guwen@linux.alibaba.com \
--cc=jaka@linux.ibm.com \
--cc=kgraul@linux.ibm.com \
--cc=kuba@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=tonylu@linux.alibaba.com \
--cc=wenjia@linux.ibm.com \
--cc=wintera@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.