From: Stefan Metzmacher <metze@samba.org>
To: Namjae Jeon <linkinjeon@kernel.org>
Cc: Steve French <smfrench@gmail.com>,
CIFS <linux-cifs@vger.kernel.org>, Tom Talpey <tom@talpey.com>,
Long Li <longli@microsoft.com>, Hyunchul Lee <hyc.lee@gmail.com>,
Meetakshi Setiya <meetakshisetiyaoss@gmail.com>,
samba-technical <samba-technical@lists.samba.org>
Subject: Re: [PATCH v2 01/12] smb: smbdirect: add smbdirect_pdu.h with protocol definitions
Date: Tue, 3 Jun 2025 11:06:09 +0200 [thread overview]
Message-ID: <995fde08-3fed-47a0-b984-876f426e9076@samba.org> (raw)
In-Reply-To: <CAKYAXd9h8LpaOX9JA5Mdduw1CQ4RnYFgkU9dXf6NnNbTFYFJ8g@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 8637 bytes --]
Am 03.06.25 um 08:20 schrieb Namjae Jeon:
> On Tue, Jun 3, 2025 at 7:03 AM Stefan Metzmacher <metze@samba.org> wrote:
>>
>> Am 02.06.25 um 04:19 schrieb Namjae Jeon:
>>> On Mon, Jun 2, 2025 at 10:57 AM Steve French <smfrench@gmail.com> wrote:
>>>>
>>>>> Can you explain why he has split it into smbdirect_socket.h?
>>>>
>>>> The three header names seem plausible, but would be useful to have
>>>> Metze's clarification/explanation:
>>>> - the "protocol" related header info for smbdirect goes in
>>>> smb/common/smbdirect/smbdirect_pdu.h (we use similar name smb2pdu.h
>>>> for the smb2/smb3 protocol related wire definitions)
>>>> - smbdirect.h for internal smbdirect structure definitions
>>>> - smbdirect_socket.h for things related to exporting it as a socket
>>>> (since one of the goals is to make smbdirect useable by Samba
>>>> userspace tools)
>>> There is no need to do things in advance that are not yet concrete and
>>> may change later.
>>
>> The current idea is to merge transport_tcp and transport_rdma into
>> transport_sock, see
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/server/transport_sock.c;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf
I've attached to this mail, but remember this is just a demo I need to redo that
based on the current transport_tcp.c and transport_rdma.c
>> Which uses this interface:
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/common/smbdirect/smbdirect.h;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf
> Hm.. I can not access these links.. Is it just me?
Here's a trimmed down version with only the in kernel related stuff of smbdirect.h:
/*
* userspace:
*
* int socket(int family, int type, int protocol);
*
* kernel:
*
* int sock_create_kern(struct net *net,
* int family, int type, int protocol,
* struct socket **res);
*
* family: PF_SMBDIRECT
* type: SOCK_STREAM, together with SOCK_CLOEXEC SOCK_NONBLOCK
* protocol: address family + protocol flavor
*
* address family: AF_INET or AF_INET6
* protocol flavor: SMBDIRECT_FLAVOR_AUTO
*/
/*
* AF_INET, AF_INET6 and AF_IB are below 0xff
*/
#define SMBDIRECT_FAMILY_MASK 0x000000ff
/*
* For now just auto use iWarp and ROCE
*/
#define SMBDIRECT_FLAVOR_MASK 0x0000f000
#define SMBDIRECT_FLAVOR_AUTO 0x00000000
/*
* Additional hints/flags
*/
#define SMBDIRECT_FLAGS_MASK 0x7fff0000
#define SMBDIRECT_FLAGS_VALID ( \
0)
#define SMBDIRECT_PROTOCOL_INVALID_MASK ~((unsigned)( \
SMBDIRECT_FAMILY_MASK | \
SMBDIRECT_FLAVOR_MASK | \
SMBDIRECT_FLAGS_MASK | \
0))
enum {
__SMBDIRECT_CONNECTION_PARAMETERS = 100,
__SMBDIRECT_BUFFER_UNREGISTER = 200,
__SMBDIRECT_BUFFER_REGISTER_READ = 201,
__SMBDIRECT_BUFFER_REGISTER_WRITE = 202,
__SMBDIRECT_BUFFER_REMOTE_INVALIDATE = 300,
__SMBDIRECT_BUFFER_REMOTE_READ = 301,
__SMBDIRECT_BUFFER_REMOTE_WRITE = 302,
};
struct smbdirect_connection_parameters {
__u16 recv_credit_max;
__u16 send_credit_target;
__u32 max_send_size;
__u32 max_fragmented_send_size;
__u32 max_recv_size;
__u32 max_fragmented_recv_size;
__u32 max_read_write_size;
__u32 keepalive_interval;
} __packed;
int smbdirect_kern_connection_get_parameters(struct socket *sock,
struct smbdirect_connection_parameters *params);
struct smbdirect_buffer_descriptor_v1 {
__le64 offset;
__le32 token;
__le32 length;
} __packed;
struct smbdirect_buffer_descriptors_v1 {
#define SMBDIRECT_BUFFER_ALLOW_READ 0x00000001
#define SMBDIRECT_BUFFER_ALLOW_WRITE 0x00000002
#define SMBDIRECT_BUFFER_INVALIDATE 0x00000004
#define SMBDIRECT_BUFFER_REMOTE 0x00000008
#define SMBDIRECT_BUFFER_DEFINED_FLAGS ( \
SMBDIRECT_BUFFER_ALLOW_READ | \
SMBDIRECT_BUFFER_ALLOW_WRITE | \
SMBDIRECT_BUFFER_INVALIDATE | \
SMBDIRECT_BUFFER_REMOTE | \
0)
__u32 flags;
/* (UINT16_MAX / sizeof(smbdirect_buffer_descriptor_v1)) = 4096 */
#define _SMBDIRECT_BUFFER_COUNT_MAX 4096
__u16 max_count;
__u16 count;
struct smbdirect_buffer_descriptor_v1 array[];
} __packed;
struct smbdirect_buffer_descriptors_v1_fast {
struct smbdirect_buffer_descriptors_v1 hdr;
#ifndef SMBDIRECT_BUFFER_COUNT_FAST
#define SMBDIRECT_BUFFER_COUNT_FAST 32
#endif
struct smbdirect_buffer_descriptor_v1 __fast_array[SMBDIRECT_BUFFER_COUNT_FAST];
} __packed;
#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_FAST_INIT { \
.hdr = { \
.max_count = SMBDIRECT_BUFFER_COUNT_FAST, \
}, \
}
#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_SIZE(count) \
(sizeof(struct smbdirect_buffer_descriptors_v1) + \
sizeof(struct smbdirect_buffer_descriptor_v1)*count)
struct smbdirect_cmsg_buffer {
uint8_t msg_control[CMSG_SPACE(24)];
};
static inline void __smbdirect_cmsg_prepare(struct msghdr *msg,
struct smbdirect_cmsg_buffer *cbuffer,
int cmsg_type,
const void *payload,
size_t payloadlen)
{
size_t cmsg_space = CMSG_SPACE(payloadlen);
size_t cmsg_len = CMSG_LEN(payloadlen);
struct cmsghdr *cmsg = NULL;
void *dataptr = NULL;
BUILD_BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));
BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));
memset(cbuffer, 0, sizeof(*cbuffer));
msg->msg_control = cbuffer->msg_control;
msg->msg_controllen = cmsg_space;
cmsg = CMSG_FIRSTHDR(msg);
cmsg->cmsg_level = SOL_SMBDIRECT;
cmsg->cmsg_type = cmsg_type;
cmsg->cmsg_len = cmsg_len;
dataptr = CMSG_DATA(cmsg);
memcpy(dataptr, payload, payloadlen);
msg->msg_controllen = cmsg->cmsg_len;
}
struct smbdirect_buffer_remote_invalidate_args {
struct smbdirect_buffer_descriptor_v1 first_desc;
} __packed;
#define SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE \
_IOW('S', __SMBDIRECT_BUFFER_REMOTE_INVALIDATE, \
struct smbdirect_buffer_remote_invalidate_args)
static inline void smbdirect_buffer_remote_invalidate_cmsg_prepare(struct msghdr *msg,
struct smbdirect_cmsg_buffer *cbuffer,
const struct smbdirect_buffer_descriptor_v1 *desc)
{
struct smbdirect_buffer_remote_invalidate_args args = {
.first_desc = *desc,
};
__smbdirect_cmsg_prepare(msg, cbuffer,
SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE,
&args, sizeof(args));
}
ssize_t smbdirect_kern_rdma_v1_writev(struct socket *sock,
const struct smbdirect_buffer_descriptors_v1 *remote,
size_t size,
struct iov_iter *iter);
ssize_t smbdirect_kern_rdma_v1_readv(struct socket *sock,
const struct smbdirect_buffer_descriptors_v1 *remote,
size_t size,
struct iov_iter *iter);
>> But note that is just the direction were it goes, that current code has a lot of resolved merge conflicts,
>> which may not work at all currently.
>>
>> Instead of putting my current code I try to take the existing client and server
>> code and merge it, so that we don't have a flag day commit that switches to
>> completely new code. Instead I try to do tiny steps in that direction
>> and may end with an interface that is similar but might be a bit different in
>> some parts.
> Okay.
>
>>
>>> He can just put these changes in his own queue and work on them.
>>> I am pointing out why he is trying to put unfinished things in the public queue.
>>
>> Because I want to base the next steps on something that is already accepted.
>>
>> I really don't want to work on it for weeks and then some review will void
>> that work completely and I can start again.
> It was too tiny a step and unclear.
> i.e. the patch description should not have comments like "It will be
> used in the next commits..."
What should it say if something is introduced but not yet used?
I mean I could explain in more detail how it will be used in
the next commits?
>>> If You want to apply it, Please do it only on cifs.ko. When it is
>>> properly implemented, I want to apply it to ksmbd.
>>
>> I can keep the ksmbd patches rebased on top and send them again
>> each time to get more feedback.
>>
>> Would that work for you?
> Okay, Please re-include the ksmbd patches in the next patch-set and I
> will check them.
>>
>> The key for me is discuss patches first and have them reviewed early
>> so that the following work rely on. Any the tiny steps should
>> make it possible to do easy review and make it possible to test each
>> tiny step.
> Okay. I agreed. But It should not be too tiny.
> As I said above, please don't send it in pieces that I can understand
> by looking at the next commits.
I'll try to keep them tiny they can always be squashed later,
but splitting them again would be a pain.
You can apply them and do a diff over multiple patches
and tell me which commits I should squash.
Thanks!
metze
[-- Attachment #2: transport_sock.c --]
[-- Type: text/x-csrc, Size: 20708 bytes --]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
* Copyright (C) 2018 Samsung Electronics Co., Ltd.
* Copyright (C) 2021 Stefan Metzmacher <metze@samba.org>
*/
#include <linux/freezer.h>
#include "smb_common.h"
#include "server.h"
#include "auth.h"
#include "connection.h"
#include "transport_sock.h"
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
#include "../common/smbdirect/smbdirect.h"
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
#define IFACE_STATE_DOWN BIT(0)
#define IFACE_STATE_CONFIGURED BIT(1)
static atomic_t active_num_conn;
struct ksmbd_sock_interface {
struct task_struct *ksmbd_kthread;
struct socket *ksmbd_socket;
struct list_head entry;
char *name;
struct mutex sock_release_lock;
int state;
const char *type;
const struct ksmbd_transport_ops *ops;
};
static LIST_HEAD(iface_list);
static int bind_additional_ifaces;
struct ksmbd_sock_transport {
struct ksmbd_transport transport;
struct socket *sock;
struct kvec *iov;
unsigned int nr_iov;
};
static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
static const struct ksmbd_transport_ops ksmbd_smbdirect_transport_ops;
static int create_smbdirect_socket(struct ksmbd_sock_interface *iface);
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
static void ksmbd_sock_stop_kthread(struct task_struct *kthread);
static struct ksmbd_sock_interface *ksmbd_alloc_iface(const char *ifname,
const struct ksmbd_transport_ops *ops);
#define KSMBD_TRANS(t) (&(t)->transport)
#define KSMBD_SOCK_TRANS(t) ((struct ksmbd_sock_transport *)container_of(t, \
struct ksmbd_sock_transport, transport))
static inline void ksmbd_tcp_nodelay(struct socket *sock)
{
tcp_sock_set_nodelay(sock->sk);
}
static inline void ksmbd_tcp_reuseaddr(struct socket *sock)
{
sock_set_reuseaddr(sock->sk);
}
static inline void ksmbd_tcp_rcv_timeout(struct socket *sock, s64 secs)
{
lock_sock(sock->sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
sock->sk->sk_rcvtimeo = secs * HZ;
else
sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
release_sock(sock->sk);
}
static inline void ksmbd_tcp_snd_timeout(struct socket *sock, s64 secs)
{
sock_set_sndtimeo(sock->sk, secs);
}
static struct ksmbd_sock_transport *ksmbd_sock_transport_alloc(struct socket *client_sk,
struct ksmbd_sock_interface *iface)
{
struct ksmbd_sock_transport *t;
struct ksmbd_conn *conn;
t = kzalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return NULL;
t->sock = client_sk;
conn = ksmbd_conn_alloc();
if (!conn) {
kfree(t);
return NULL;
}
conn->transport = KSMBD_TRANS(t);
KSMBD_TRANS(t)->conn = conn;
KSMBD_TRANS(t)->ops = iface->ops;
return t;
}
void ksmbd_free_transport(struct ksmbd_transport *kt)
{
struct ksmbd_sock_transport *t = KSMBD_SOCK_TRANS(kt);
sock_release(t->sock);
kfree(t->iov);
kfree(t);
}
static void ksmbd_sock_transport_free(struct ksmbd_sock_transport *t)
{
kernel_sock_shutdown(t->sock, SHUT_RDWR);
ksmbd_conn_free(KSMBD_TRANS(t)->conn);
}
/**
* kvec_array_init() - initialize a IO vector segment
* @new: IO vector to be initialized
* @iov: base IO vector
* @nr_segs: number of segments in base iov
* @bytes: total iovec length so far for read
*
* Return: Number of IO segments
*/
static unsigned int kvec_array_init(struct kvec *new, struct kvec *iov,
unsigned int nr_segs, size_t bytes)
{
size_t base = 0;
while (bytes || !iov->iov_len) {
int copy = min(bytes, iov->iov_len);
bytes -= copy;
base += copy;
if (iov->iov_len == base) {
iov++;
nr_segs--;
base = 0;
}
}
memcpy(new, iov, sizeof(*iov) * nr_segs);
new->iov_base += base;
new->iov_len -= base;
return nr_segs;
}
/**
* get_conn_iovec() - get connection iovec for reading from socket
* @t: SOCK transport instance
* @nr_segs: number of segments in iov
*
* Return: return existing or newly allocate iovec
*/
static struct kvec *get_conn_iovec(struct ksmbd_sock_transport *t, unsigned int nr_segs)
{
struct kvec *new_iov;
if (t->iov && nr_segs <= t->nr_iov)
return t->iov;
/* not big enough -- allocate a new one and release the old */
new_iov = kmalloc_array(nr_segs, sizeof(*new_iov), GFP_KERNEL);
if (new_iov) {
kfree(t->iov);
t->iov = new_iov;
t->nr_iov = nr_segs;
}
return new_iov;
}
static unsigned short ksmbd_tcp_get_port(const struct sockaddr *sa)
{
switch (sa->sa_family) {
case AF_INET:
return ntohs(((struct sockaddr_in *)sa)->sin_port);
case AF_INET6:
return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
}
return 0;
}
/**
* ksmbd_sock_new_connection() - create a new sock session on mount
* @client_sk: socket associated with new connection
* @iface: ksmbd_sock_interface of the listening socket
*
* whenever a new connection is requested, create a conn thread
* (session thread) to handle new incoming smb requests from the connection
*
* Return: 0 on success, otherwise error
*/
static int ksmbd_sock_new_connection(struct socket *client_sk,
struct ksmbd_sock_interface *iface)
{
struct sockaddr *csin;
int rc = 0;
struct ksmbd_sock_transport *t;
struct task_struct *handler;
client_sk->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
client_sk->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;
t = ksmbd_sock_transport_alloc(client_sk, iface);
if (!t)
return -ENOMEM;
csin = KSMBD_SOCK_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
if (kernel_getpeername(client_sk, csin) < 0) {
pr_err("client ip resolution failed\n");
rc = -EINVAL;
goto out_error;
}
handler = kthread_run(ksmbd_conn_handler_loop,
KSMBD_TRANS(t)->conn,
"ksmbd:%u",
ksmbd_tcp_get_port(csin));
if (IS_ERR(handler)) {
pr_err("cannot start conn thread\n");
rc = PTR_ERR(handler);
ksmbd_sock_transport_free(t);
}
return rc;
out_error:
ksmbd_sock_transport_free(t);
return rc;
}
/**
* ksmbd_kthread_fn() - listen to new SMB connections and callback server
* @p: arguments to forker thread
*
* Return: 0 on success, error number otherwise
*/
static int ksmbd_kthread_fn(void *p)
{
struct socket *client_sk = NULL;
struct ksmbd_sock_interface *iface = (struct ksmbd_sock_interface *)p;
int ret;
while (!kthread_should_stop()) {
mutex_lock(&iface->sock_release_lock);
if (!iface->ksmbd_socket) {
mutex_unlock(&iface->sock_release_lock);
break;
}
ret = kernel_accept(iface->ksmbd_socket, &client_sk,
SOCK_NONBLOCK);
mutex_unlock(&iface->sock_release_lock);
if (ret) {
if (ret == -EAGAIN)
/* check for new connections every 100 msecs */
schedule_timeout_interruptible(HZ / 10);
continue;
}
if (server_conf.max_connections &&
atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
atomic_read(&active_num_conn));
atomic_dec(&active_num_conn);
sock_release(client_sk);
continue;
}
ksmbd_debug(CONN, "connect success: accepted new connection\n");
ksmbd_sock_new_connection(client_sk, iface);
}
ksmbd_debug(CONN, "releasing socket\n");
return 0;
}
/**
* ksmbd_tcp_run_kthread() - start forker thread
* @iface: pointer to struct ksmbd_sock_interface
*
* start forker thread(ksmbd/0) at module init time to listen
* on port 445 for new SMB connection requests. It creates per connection
* server threads(ksmbd/x)
*
* Return: 0 on success or error number
*/
static int ksmbd_sock_run_kthread(struct ksmbd_sock_interface *iface)
{
int rc;
struct task_struct *kthread;
kthread = kthread_run(ksmbd_kthread_fn, (void *)iface, "ksmbd-%s-%s",
iface->type, iface->name);
if (IS_ERR(kthread)) {
rc = PTR_ERR(kthread);
return rc;
}
iface->ksmbd_kthread = kthread;
return 0;
}
/**
* ksmbd_sock_readv() - read data from socket in given iovec
* @t: SOCK transport instance
* @iov_orig: base IO vector
* @nr_segs: number of segments in base iov
* @to_read: number of bytes to read from socket
* @max_retries: maximum retry count
*
* Return: on success return number of bytes read from socket,
* otherwise return error number
*/
static int ksmbd_sock_readv(struct ksmbd_sock_transport *t, struct kvec *iov_orig,
unsigned int nr_segs, unsigned int to_read,
int max_retries)
{
int length = 0;
int total_read;
unsigned int segs;
struct msghdr ksmbd_msg;
struct kvec *iov;
struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;
iov = get_conn_iovec(t, nr_segs);
if (!iov)
return -ENOMEM;
ksmbd_msg.msg_control = NULL;
ksmbd_msg.msg_controllen = 0;
for (total_read = 0; to_read; total_read += length, to_read -= length) {
try_to_freeze();
if (!ksmbd_conn_alive(conn)) {
total_read = -ESHUTDOWN;
break;
}
segs = kvec_array_init(iov, iov_orig, nr_segs, total_read);
length = kernel_recvmsg(t->sock, &ksmbd_msg,
iov, segs, to_read, 0);
if (length == -EINTR) {
total_read = -ESHUTDOWN;
break;
} else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
total_read = -EAGAIN;
break;
} else if (length == -ERESTARTSYS || length == -EAGAIN) {
/*
* If max_retries is negative, Allow unlimited
* retries to keep connection with inactive sessions.
*/
if (max_retries == 0) {
total_read = length;
break;
} else if (max_retries > 0) {
max_retries--;
}
usleep_range(1000, 2000);
length = 0;
continue;
} else if (length <= 0) {
total_read = length;
break;
}
}
return total_read;
}
/**
* ksmbd_tcp_read() - read data from socket in given buffer
* @t: SOCK transport instance
* @buf: buffer to store read data from socket
* @to_read: number of bytes to read from socket
* @max_retries: number of retries if reading from socket fails
*
* Return: on success return number of bytes read from socket,
* otherwise return error number
*/
static int ksmbd_sock_read(struct ksmbd_transport *t, char *buf,
unsigned int to_read, int max_retries)
{
struct kvec iov;
iov.iov_base = buf;
iov.iov_len = to_read;
return ksmbd_sock_readv(KSMBD_SOCK_TRANS(t), &iov, 1, to_read, max_retries);
}
static int ksmbd_sock_writev(struct ksmbd_transport *t, struct kvec *iov,
int nvecs, int size, bool need_invalidate,
unsigned int remote_key)
{
struct msghdr smb_msg = {.msg_flags = MSG_NOSIGNAL};
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
struct smbdirect_cmsg_buffer cbuffer;
if (need_invalidate) {
struct smbdirect_buffer_descriptor_v1 desc = { .token = remote_key, };
smbdirect_buffer_remote_invalidate_cmsg_prepare(&smb_msg, &cbuffer, &desc);
}
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
return kernel_sendmsg(KSMBD_SOCK_TRANS(t)->sock, &smb_msg, iov, nvecs, size);
}
static void ksmbd_sock_disconnect(struct ksmbd_transport *t)
{
ksmbd_sock_transport_free(KSMBD_SOCK_TRANS(t));
if (server_conf.max_connections)
atomic_dec(&active_num_conn);
}
static void ksmbd_destroy_socket(struct socket *ksmbd_socket)
{
int ret;
if (!ksmbd_socket)
return;
/* set zero to timeout */
ksmbd_tcp_rcv_timeout(ksmbd_socket, 0);
ksmbd_tcp_snd_timeout(ksmbd_socket, 0);
ret = kernel_sock_shutdown(ksmbd_socket, SHUT_RDWR);
if (ret)
pr_err("Failed to shutdown socket: %d\n", ret);
sock_release(ksmbd_socket);
}
/**
* create_socket - create socket for ksmbd/0
* @iface: interface to bind the created socket to
*
* Return: 0 on success, error number otherwise
*/
static int create_socket(struct ksmbd_sock_interface *iface)
{
int ret;
struct sockaddr_in6 sin6;
struct sockaddr_in sin;
struct socket *ksmbd_socket;
bool ipv4 = false;
ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
if (ret) {
if (ret != -EAFNOSUPPORT)
pr_err("Can't create socket for ipv6, fallback to ipv4: %d\n", ret);
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
&ksmbd_socket);
if (ret) {
pr_err("Can't create socket for ipv4: %d\n", ret);
goto out_clear;
}
sin.sin_family = PF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = htons(server_conf.tcp_port);
ipv4 = true;
} else {
sin6.sin6_family = PF_INET6;
sin6.sin6_addr = in6addr_any;
sin6.sin6_port = htons(server_conf.tcp_port);
lock_sock(ksmbd_socket->sk);
ksmbd_socket->sk->sk_ipv6only = false;
release_sock(ksmbd_socket->sk);
}
ksmbd_tcp_nodelay(ksmbd_socket);
ksmbd_tcp_reuseaddr(ksmbd_socket);
ret = sock_setsockopt(ksmbd_socket,
SOL_SOCKET,
SO_BINDTODEVICE,
KERNEL_SOCKPTR(iface->name),
strlen(iface->name));
if (ret != -ENODEV && ret < 0) {
pr_err("Failed to set SO_BINDTODEVICE: %d\n", ret);
goto out_error;
}
if (ipv4)
ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
sizeof(sin));
else
ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin6,
sizeof(sin6));
if (ret) {
pr_err("Failed to bind socket: %d\n", ret);
goto out_error;
}
ksmbd_socket->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
ksmbd_socket->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;
ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
if (ret) {
pr_err("Port listen() error: %d\n", ret);
goto out_error;
}
iface->ksmbd_socket = ksmbd_socket;
ret = ksmbd_sock_run_kthread(iface);
if (ret) {
pr_err("Can't start ksmbd main kthread: %d\n", ret);
goto out_error;
}
iface->state = IFACE_STATE_CONFIGURED;
return 0;
out_error:
ksmbd_destroy_socket(ksmbd_socket);
out_clear:
iface->ksmbd_socket = NULL;
return ret;
}
static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct ksmbd_sock_interface *iface;
int ret, found = 0;
switch (event) {
case NETDEV_UP:
if (netif_is_bridge_port(netdev))
return NOTIFY_OK;
list_for_each_entry(iface, &iface_list, entry) {
if (!strcmp(iface->name, netdev->name)) {
found = 1;
if (iface->state != IFACE_STATE_DOWN)
break;
ret = create_socket(iface);
if (ret)
return NOTIFY_OK;
break;
}
}
if (!found && bind_additional_ifaces) {
iface = ksmbd_alloc_iface(netdev->name,
&ksmbd_tcp_transport_ops);
if (!iface)
return NOTIFY_OK;
ret = create_socket(iface);
if (ret)
break;
}
break;
case NETDEV_DOWN:
list_for_each_entry(iface, &iface_list, entry) {
if (!strcmp(iface->name, netdev->name) &&
iface->state == IFACE_STATE_CONFIGURED) {
ksmbd_sock_stop_kthread(iface->ksmbd_kthread);
iface->ksmbd_kthread = NULL;
mutex_lock(&iface->sock_release_lock);
ksmbd_destroy_socket(iface->ksmbd_socket);
iface->ksmbd_socket = NULL;
mutex_unlock(&iface->sock_release_lock);
iface->state = IFACE_STATE_DOWN;
break;
}
}
break;
}
return NOTIFY_DONE;
}
static struct notifier_block ksmbd_netdev_notifier = {
.notifier_call = ksmbd_netdev_event,
};
int ksmbd_sock_init(void)
{
register_netdevice_notifier(&ksmbd_netdev_notifier);
return 0;
}
static void ksmbd_sock_stop_kthread(struct task_struct *kthread)
{
int ret;
if (!kthread)
return;
ret = kthread_stop(kthread);
if (ret)
pr_err("failed to stop forker thread\n");
}
void ksmbd_sock_destroy(void)
{
struct ksmbd_sock_interface *iface, *tmp;
unregister_netdevice_notifier(&ksmbd_netdev_notifier);
list_for_each_entry_safe(iface, tmp, &iface_list, entry) {
list_del(&iface->entry);
kfree(iface->name);
kfree(iface);
}
}
static struct ksmbd_sock_interface *ksmbd_alloc_iface(const char *ifname,
const struct ksmbd_transport_ops *ops)
{
struct ksmbd_sock_interface *iface;
iface = kzalloc(sizeof(struct ksmbd_sock_interface), GFP_KERNEL);
if (!iface) {
return NULL;
}
if (ops == &ksmbd_tcp_transport_ops) {
iface->type = "tcp";
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
} else if (ops == &ksmbd_smbdirect_transport_ops) {
iface->type = "smbdirect";
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
} else {
WARN_ON_ONCE(1);
kfree(iface);
return NULL;
}
iface->ops = ops;
iface->name = kstrdup(ifname, GFP_KERNEL);
if (!iface->name) {
kfree(iface);
return NULL;
}
iface->state = IFACE_STATE_DOWN;
list_add(&iface->entry, &iface_list);
mutex_init(&iface->sock_release_lock);
return iface;
}
int ksmbd_sock_set_interfaces(char *ifc_list, int ifc_list_sz)
{
int sz = 0;
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
{
struct ksmbd_sock_interface *iface;
iface = ksmbd_alloc_iface("smbdirect-sock",
&ksmbd_smbdirect_transport_ops);
if (iface != NULL) {
int ret;
ret = create_smbdirect_socket(iface);
if (ret)
pr_err("create_smbdirect_socket() failed: %d\n", ret);
}
}
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
if (!ifc_list_sz) {
struct net_device *netdev;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
struct ksmbd_sock_interface *iface;
if (netif_is_bridge_port(netdev))
continue;
iface = ksmbd_alloc_iface(netdev->name,
&ksmbd_tcp_transport_ops);
if (!iface) {
rtnl_unlock();
return -ENOMEM;
}
}
rtnl_unlock();
bind_additional_ifaces = 1;
return 0;
}
while (ifc_list_sz > 0) {
struct ksmbd_sock_interface *iface;
iface = ksmbd_alloc_iface(ifc_list,
&ksmbd_tcp_transport_ops);
if (!iface)
return -ENOMEM;
sz = strlen(ifc_list);
if (!sz)
break;
ifc_list += sz + 1;
ifc_list_sz -= (sz + 1);
}
bind_additional_ifaces = 0;
return 0;
}
static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
.read = ksmbd_sock_read,
.writev = ksmbd_sock_writev,
.disconnect = ksmbd_sock_disconnect,
};
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
{
if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
return false;
if (attrs->max_fast_reg_page_list_len == 0)
return false;
return true;
}
bool ksmbd_netdev_rdma_capable(struct net_device *netdev)
{
struct ib_device *ibdev;
bool rdma_capable = false;
ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
if (ibdev) {
if (rdma_frwr_is_supported(&ibdev->attrs))
rdma_capable = true;
ib_device_put(ibdev);
}
return rdma_capable;
}
static int create_smbdirect_socket(struct ksmbd_sock_interface *iface)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr= htonl(INADDR_ANY),
.sin_port = htons(5445),
};
struct socket *ksmbd_socket;
int ret;
ret = sock_create(PF_SMBDIRECT, SOCK_STREAM, AF_INET, &ksmbd_socket);
if (ret) {
pr_err("Can't create smbdirect socket for ipv4: %d\n", ret);
goto out_error;
}
ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
sizeof(sin));
if (ret) {
pr_err("Failed to bind socket: %d\n", ret);
goto out_error;
}
ksmbd_socket->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
ksmbd_socket->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;
ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
if (ret) {
pr_err("Port listen() error: %d\n", ret);
goto out_error;
}
iface->ksmbd_socket = ksmbd_socket;
ret = ksmbd_sock_run_kthread(iface);
if (ret) {
pr_err("Can't start ksmbd main kthread: %d\n", ret);
goto out_error;
}
iface->state = IFACE_STATE_CONFIGURED;
return 0;
out_error:
ksmbd_destroy_socket(ksmbd_socket);
iface->ksmbd_socket = NULL;
return ret;
}
static int ksmbd_sock_rdma_write(struct ksmbd_transport *t,
void *buf, unsigned int buflen,
const struct smbdirect_buffer_descriptors_v1 *remote)
{
struct bio_vec biov = {
.bv_page = is_vmalloc_addr(buf) ? vmalloc_to_page(buf) : kmap_to_page(buf),
.bv_offset = offset_in_page(buf),
.bv_len = buflen,
};
struct iov_iter iter;
ssize_t ret;
iov_iter_bvec(&iter, WRITE, &biov, 1, buflen);
ret = smbdirect_kern_rdma_v1_writev(KSMBD_SOCK_TRANS(t)->sock,
remote,
buflen,
&iter);
if (ret < 0) {
return ret;
}
return 0;
}
static int ksmbd_sock_rdma_read(struct ksmbd_transport *t,
void *buf, unsigned int buflen,
const struct smbdirect_buffer_descriptors_v1 *remote)
{
struct bio_vec biov = {
.bv_page = is_vmalloc_addr(buf) ? vmalloc_to_page(buf) : kmap_to_page(buf),
.bv_offset = offset_in_page(buf),
.bv_len = buflen,
};
struct iov_iter iter;
ssize_t ret;
iov_iter_bvec(&iter, READ, &biov, 1, buflen);
ret = smbdirect_kern_rdma_v1_readv(KSMBD_SOCK_TRANS(t)->sock,
remote,
buflen,
&iter);
if (ret < 0) {
return ret;
}
return 0;
}
static const struct ksmbd_transport_ops ksmbd_smbdirect_transport_ops = {
.read = ksmbd_sock_read,
.writev = ksmbd_sock_writev,
.disconnect = ksmbd_sock_disconnect,
.rdma_read = ksmbd_sock_rdma_read,
.rdma_write = ksmbd_sock_rdma_write,
};
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
next prev parent reply other threads:[~2025-06-03 9:06 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-28 16:01 [PATCH v2 00/12] smb:common: introduce and use common smbdirect headers/structures (step1) Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 01/12] smb: smbdirect: add smbdirect_pdu.h with protocol definitions Stefan Metzmacher
2025-05-28 23:28 ` Namjae Jeon
2025-05-30 19:03 ` Stefan Metzmacher
2025-05-30 19:27 ` Steve French
2025-05-30 22:29 ` Namjae Jeon
[not found] ` <CAH2r5mvQbL_R9wrFRHF9_3XwM3e-=2vK=i1uaSCk37-FZmJq9g@mail.gmail.com>
2025-06-01 0:01 ` Namjae Jeon
[not found] ` <CAH2r5mso54sXPcoJWDSU4E--XMH44wFY-cdww6_6yx5CxrFtdg@mail.gmail.com>
2025-06-01 4:59 ` Namjae Jeon
2025-06-02 1:56 ` Steve French
2025-06-02 2:19 ` Namjae Jeon
2025-06-02 22:02 ` Stefan Metzmacher
2025-06-03 6:20 ` Namjae Jeon
2025-06-03 9:06 ` Stefan Metzmacher [this message]
2025-06-03 10:55 ` Namjae Jeon
2025-05-28 16:01 ` [PATCH v2 02/12] smb: client: make use of common smbdirect_pdu.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 03/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 04/12] smb: smbdirect: add smbdirect.h with public structures Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 05/12] smb: client: make use of common smbdirect.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 06/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 07/12] smb: smbdirect: add smbdirect_socket.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 08/12] smb: client: make use of common smbdirect_socket Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 09/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 10/12] smb: smbdirect: introduce smbdirect_socket_parameters Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 11/12] smb: client: make use of common smbdirect_socket_parameters Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 12/12] smb: server: " Stefan Metzmacher
2025-05-29 3:33 ` [PATCH v2 00/12] smb:common: introduce and use common smbdirect headers/structures (step1) Steve French
2025-05-30 16:51 ` Steve French
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=995fde08-3fed-47a0-b984-876f426e9076@samba.org \
--to=metze@samba.org \
--cc=hyc.lee@gmail.com \
--cc=linkinjeon@kernel.org \
--cc=linux-cifs@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=meetakshisetiyaoss@gmail.com \
--cc=samba-technical@lists.samba.org \
--cc=smfrench@gmail.com \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox