Linux CIFS filesystem development
 help / color / mirror / Atom feed
From: Stefan Metzmacher <metze@samba.org>
To: Namjae Jeon <linkinjeon@kernel.org>
Cc: Steve French <smfrench@gmail.com>,
	CIFS <linux-cifs@vger.kernel.org>, Tom Talpey <tom@talpey.com>,
	Long Li <longli@microsoft.com>, Hyunchul Lee <hyc.lee@gmail.com>,
	Meetakshi Setiya <meetakshisetiyaoss@gmail.com>,
	samba-technical <samba-technical@lists.samba.org>
Subject: Re: [PATCH v2 01/12] smb: smbdirect: add smbdirect_pdu.h with protocol definitions
Date: Tue, 3 Jun 2025 11:06:09 +0200	[thread overview]
Message-ID: <995fde08-3fed-47a0-b984-876f426e9076@samba.org> (raw)
In-Reply-To: <CAKYAXd9h8LpaOX9JA5Mdduw1CQ4RnYFgkU9dXf6NnNbTFYFJ8g@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 8637 bytes --]

Am 03.06.25 um 08:20 schrieb Namjae Jeon:
> On Tue, Jun 3, 2025 at 7:03 AM Stefan Metzmacher <metze@samba.org> wrote:
>>
>> Am 02.06.25 um 04:19 schrieb Namjae Jeon:
>>> On Mon, Jun 2, 2025 at 10:57 AM Steve French <smfrench@gmail.com> wrote:
>>>>
>>>>> Can you explain why he has split it into smbdirect_socket.h?
>>>>
>>>> The three header names seem plausible, but would be useful to have
>>>> Metze's clarification/explanation:
>>>> - the "protocol" related header info for smbdirect goes in
>>>> smb/common/smbdirect/smbdirect_pdu.h   (we use similar name smb2pdu.h
>>>> for the smb2/smb3 protocol related wire definitions)
>>>> - smbdirect.h for internal smbdirect structure definitions
>>>> - smbdirect_socket.h for things related to exporting it as a socket
>>>> (since one of the goals is to make smbdirect useable by Samba
>>>> userspace tools)
>>> There is no need to do things in advance that are not yet concrete and
>>> may change later.
>>
>> The current idea is to merge transport_tcp and transport_rdma into
>> transport_sock, see
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/server/transport_sock.c;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf

I've attached to this mail, but remember this is just a demo I need to redo that
based on the current transport_tcp.c and transport_rdma.c

>> Which uses this interface:
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/common/smbdirect/smbdirect.h;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf
> Hm.. I can not access these links.. Is it just me?

Here's a trimmed down version with only the in kernel related stuff of smbdirect.h:

/*
  * userspace:
  *
  *   int socket(int family, int type, int protocol);
  *
  * kernel:
  *
  *   int sock_create_kern(struct net *net,
  *                        int family, int type, int protocol,
  *                        struct socket **res);
  *
  * family: PF_SMBDIRECT
  * type: SOCK_STREAM, together with SOCK_CLOEXEC SOCK_NONBLOCK
  * protocol: address family + protocol flavor
  *
  * address family: AF_INET or AF_INET6
  * protocol flavor: SMBDIRECT_FLAVOR_AUTO
  */

/*
  * AF_INET, AF_INET6 and AF_IB are below 0xff
  */
#define SMBDIRECT_FAMILY_MASK                0x000000ff
/*
  * For now just auto use iWarp and ROCE
  */
#define SMBDIRECT_FLAVOR_MASK                0x0000f000
#define SMBDIRECT_FLAVOR_AUTO                0x00000000
/*
  * Additional hints/flags
  */
#define SMBDIRECT_FLAGS_MASK                 0x7fff0000
#define SMBDIRECT_FLAGS_VALID ( \
	0)

#define SMBDIRECT_PROTOCOL_INVALID_MASK ~((unsigned)( \
	SMBDIRECT_FAMILY_MASK | \
	SMBDIRECT_FLAVOR_MASK | \
	SMBDIRECT_FLAGS_MASK | \
	0))

enum {
	__SMBDIRECT_CONNECTION_PARAMETERS = 100,

	__SMBDIRECT_BUFFER_UNREGISTER = 200,
	__SMBDIRECT_BUFFER_REGISTER_READ = 201,
	__SMBDIRECT_BUFFER_REGISTER_WRITE = 202,

	__SMBDIRECT_BUFFER_REMOTE_INVALIDATE = 300,
	__SMBDIRECT_BUFFER_REMOTE_READ = 301,
	__SMBDIRECT_BUFFER_REMOTE_WRITE = 302,
};

struct smbdirect_connection_parameters {
	__u16 recv_credit_max;
	__u16 send_credit_target;
	__u32 max_send_size;
	__u32 max_fragmented_send_size;
	__u32 max_recv_size;
	__u32 max_fragmented_recv_size;
	__u32 max_read_write_size;
	__u32 keepalive_interval;
} __packed;

int smbdirect_kern_connection_get_parameters(struct socket *sock,
					     struct smbdirect_connection_parameters *params);

struct smbdirect_buffer_descriptor_v1 {
	__le64 offset;
	__le32 token;
	__le32 length;
} __packed;

struct smbdirect_buffer_descriptors_v1 {
#define SMBDIRECT_BUFFER_ALLOW_READ  0x00000001
#define SMBDIRECT_BUFFER_ALLOW_WRITE 0x00000002
#define SMBDIRECT_BUFFER_INVALIDATE  0x00000004
#define SMBDIRECT_BUFFER_REMOTE      0x00000008
#define SMBDIRECT_BUFFER_DEFINED_FLAGS ( \
	SMBDIRECT_BUFFER_ALLOW_READ | \
	SMBDIRECT_BUFFER_ALLOW_WRITE | \
	SMBDIRECT_BUFFER_INVALIDATE | \
	SMBDIRECT_BUFFER_REMOTE | \
	0)
	__u32 flags;
/* (UINT16_MAX / sizeof(smbdirect_buffer_descriptor_v1)) = 4096 */
#define _SMBDIRECT_BUFFER_COUNT_MAX 4096
	__u16 max_count;
	__u16 count;
	struct smbdirect_buffer_descriptor_v1 array[];
} __packed;

struct smbdirect_buffer_descriptors_v1_fast {
	struct smbdirect_buffer_descriptors_v1 hdr;
#ifndef SMBDIRECT_BUFFER_COUNT_FAST
#define SMBDIRECT_BUFFER_COUNT_FAST 32
#endif
	struct smbdirect_buffer_descriptor_v1 __fast_array[SMBDIRECT_BUFFER_COUNT_FAST];
} __packed;

#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_FAST_INIT { \
	.hdr = { \
		.max_count = SMBDIRECT_BUFFER_COUNT_FAST, \
	}, \
}

#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_SIZE(count) \
	(sizeof(struct smbdirect_buffer_descriptors_v1) + \
	 sizeof(struct smbdirect_buffer_descriptor_v1)*count)

struct smbdirect_cmsg_buffer {
	uint8_t msg_control[CMSG_SPACE(24)];
};

static inline void __smbdirect_cmsg_prepare(struct msghdr *msg,
					    struct smbdirect_cmsg_buffer *cbuffer,
					    int cmsg_type,
					    const void *payload,
					    size_t payloadlen)
{
	size_t cmsg_space = CMSG_SPACE(payloadlen);
	size_t cmsg_len = CMSG_LEN(payloadlen);
	struct cmsghdr *cmsg = NULL;
	void *dataptr = NULL;

	BUILD_BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));
	BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));

	memset(cbuffer, 0, sizeof(*cbuffer));

	msg->msg_control = cbuffer->msg_control;
	msg->msg_controllen = cmsg_space;

	cmsg = CMSG_FIRSTHDR(msg);
	cmsg->cmsg_level = SOL_SMBDIRECT;
	cmsg->cmsg_type = cmsg_type;
	cmsg->cmsg_len = cmsg_len;
	dataptr = CMSG_DATA(cmsg);
	memcpy(dataptr, payload, payloadlen);
	msg->msg_controllen = cmsg->cmsg_len;
}

struct smbdirect_buffer_remote_invalidate_args {
	struct smbdirect_buffer_descriptor_v1 first_desc;
} __packed;
#define SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE \
	_IOW('S', __SMBDIRECT_BUFFER_REMOTE_INVALIDATE, \
		struct smbdirect_buffer_remote_invalidate_args)

static inline void smbdirect_buffer_remote_invalidate_cmsg_prepare(struct msghdr *msg,
								   struct smbdirect_cmsg_buffer *cbuffer,
								   const struct smbdirect_buffer_descriptor_v1 *desc)
{
	struct smbdirect_buffer_remote_invalidate_args args = {
		.first_desc = *desc,
	};

	__smbdirect_cmsg_prepare(msg, cbuffer,
				 SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE,
				 &args, sizeof(args));
}


ssize_t smbdirect_kern_rdma_v1_writev(struct socket *sock,
				      const struct smbdirect_buffer_descriptors_v1 *remote,
				      size_t size,
				      struct iov_iter *iter);

ssize_t smbdirect_kern_rdma_v1_readv(struct socket *sock,
				     const struct smbdirect_buffer_descriptors_v1 *remote,
				     size_t size,
				     struct iov_iter *iter);



>> But note that is just the direction were it goes, that current code has a lot of resolved merge conflicts,
>> which may not work at all currently.
>>
>> Instead of putting my current code I try to take the existing client and server
>> code and merge it, so that we don't have a flag day commit that switches to
>> completely new code. Instead I try to do tiny steps in that direction
>> and may end with an interface that is similar but might be a bit different in
>> some parts.
> Okay.
> 
>>
>>> He can just put these changes in his own queue and work on them.
>>> I am pointing out why he is trying to put unfinished things in the public queue.
>>
>> Because I want to base the next steps on something that is already accepted.
>>
>> I really don't want to work on it for weeks and then some review will void
>> that work completely and I can start again.
> It was too tiny a step and unclear.
> i.e. the patch description should not have comments like "It will be
> used in the next commits..."

What should it say if something is introduced but not yet used?

I mean I could explain in more detail how it will be used in
the next commits?

>>> If You want to apply it, Please do it only on cifs.ko. When it is
>>> properly implemented, I want to apply it to ksmbd.
>>
>> I can keep the ksmbd patches rebased on top and send them again
>> each time to get more feedback.
>>
>> Would that work for you?
> Okay, Please re-include the ksmbd patches in the next patch-set and I
> will check them.
>>
>> The key for me is discuss patches first and have them reviewed early
>> so that the following work rely on. Any the tiny steps should
>> make it possible to do easy review and make it possible to test each
>> tiny step.
> Okay. I agreed. But It should not be too tiny.
> As I said above, please don't send it in pieces that I can understand
> by looking at the next commits.

I'll try to keep them tiny they can always be squashed later,
but splitting them again would be a pain.

You can apply them and do a diff over multiple patches
and tell me which commits I should squash.

Thanks!
metze

[-- Attachment #2: transport_sock.c --]
[-- Type: text/x-csrc, Size: 20708 bytes --]

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
 *   Copyright (C) 2018 Samsung Electronics Co., Ltd.
 *   Copyright (C) 2021 Stefan Metzmacher <metze@samba.org>
 */

#include <linux/freezer.h>

#include "smb_common.h"
#include "server.h"
#include "auth.h"
#include "connection.h"
#include "transport_sock.h"

#ifdef CONFIG_SMB_SERVER_SMBDIRECT
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
#include "../common/smbdirect/smbdirect.h"
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */

#define IFACE_STATE_DOWN		BIT(0)
#define IFACE_STATE_CONFIGURED		BIT(1)

static atomic_t active_num_conn;

struct ksmbd_sock_interface {
	struct task_struct	*ksmbd_kthread;
	struct socket		*ksmbd_socket;
	struct list_head	entry;
	char			*name;
	struct mutex		sock_release_lock;
	int			state;
	const char		*type;
	const struct ksmbd_transport_ops *ops;
};

static LIST_HEAD(iface_list);

static int bind_additional_ifaces;

struct ksmbd_sock_transport {
	struct ksmbd_transport		transport;
	struct socket			*sock;
	struct kvec			*iov;
	unsigned int			nr_iov;
};

static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
static const struct ksmbd_transport_ops ksmbd_smbdirect_transport_ops;
static int create_smbdirect_socket(struct ksmbd_sock_interface *iface);
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */

static void ksmbd_sock_stop_kthread(struct task_struct *kthread);
static struct ksmbd_sock_interface *ksmbd_alloc_iface(const char *ifname,
					const struct ksmbd_transport_ops *ops);

#define KSMBD_TRANS(t)	(&(t)->transport)
#define KSMBD_SOCK_TRANS(t)	((struct ksmbd_sock_transport *)container_of(t, \
				struct ksmbd_sock_transport, transport))

static inline void ksmbd_tcp_nodelay(struct socket *sock)
{
	tcp_sock_set_nodelay(sock->sk);
}

static inline void ksmbd_tcp_reuseaddr(struct socket *sock)
{
	sock_set_reuseaddr(sock->sk);
}

static inline void ksmbd_tcp_rcv_timeout(struct socket *sock, s64 secs)
{
	lock_sock(sock->sk);
	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
		sock->sk->sk_rcvtimeo = secs * HZ;
	else
		sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
	release_sock(sock->sk);
}

static inline void ksmbd_tcp_snd_timeout(struct socket *sock, s64 secs)
{
	sock_set_sndtimeo(sock->sk, secs);
}

static struct ksmbd_sock_transport *ksmbd_sock_transport_alloc(struct socket *client_sk,
							       struct ksmbd_sock_interface *iface)
{
	struct ksmbd_sock_transport *t;
	struct ksmbd_conn *conn;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		return NULL;
	t->sock = client_sk;

	conn = ksmbd_conn_alloc();
	if (!conn) {
		kfree(t);
		return NULL;
	}

	conn->transport = KSMBD_TRANS(t);
	KSMBD_TRANS(t)->conn = conn;
	KSMBD_TRANS(t)->ops = iface->ops;
	return t;
}

void ksmbd_free_transport(struct ksmbd_transport *kt)
{
	struct ksmbd_sock_transport *t = KSMBD_SOCK_TRANS(kt);

	sock_release(t->sock);
	kfree(t->iov);
	kfree(t);
}

static void ksmbd_sock_transport_free(struct ksmbd_sock_transport *t)
{
	kernel_sock_shutdown(t->sock, SHUT_RDWR);
	ksmbd_conn_free(KSMBD_TRANS(t)->conn);
}

/**
 * kvec_array_init() - initialize a IO vector segment
 * @new:	IO vector to be initialized
 * @iov:	base IO vector
 * @nr_segs:	number of segments in base iov
 * @bytes:	total iovec length so far for read
 *
 * Return:	Number of IO segments
 */
static unsigned int kvec_array_init(struct kvec *new, struct kvec *iov,
				    unsigned int nr_segs, size_t bytes)
{
	size_t base = 0;

	while (bytes || !iov->iov_len) {
		int copy = min(bytes, iov->iov_len);

		bytes -= copy;
		base += copy;
		if (iov->iov_len == base) {
			iov++;
			nr_segs--;
			base = 0;
		}
	}

	memcpy(new, iov, sizeof(*iov) * nr_segs);
	new->iov_base += base;
	new->iov_len -= base;
	return nr_segs;
}

/**
 * get_conn_iovec() - get connection iovec for reading from socket
 * @t:		SOCK transport instance
 * @nr_segs:	number of segments in iov
 *
 * Return:	return existing or newly allocate iovec
 */
static struct kvec *get_conn_iovec(struct ksmbd_sock_transport *t, unsigned int nr_segs)
{
	struct kvec *new_iov;

	if (t->iov && nr_segs <= t->nr_iov)
		return t->iov;

	/* not big enough -- allocate a new one and release the old */
	new_iov = kmalloc_array(nr_segs, sizeof(*new_iov), GFP_KERNEL);
	if (new_iov) {
		kfree(t->iov);
		t->iov = new_iov;
		t->nr_iov = nr_segs;
	}
	return new_iov;
}

static unsigned short ksmbd_tcp_get_port(const struct sockaddr *sa)
{
	switch (sa->sa_family) {
	case AF_INET:
		return ntohs(((struct sockaddr_in *)sa)->sin_port);
	case AF_INET6:
		return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
	}
	return 0;
}

/**
 * ksmbd_sock_new_connection() - create a new sock session on mount
 * @client_sk:	socket associated with new connection
 * @iface:	ksmbd_sock_interface of the listening socket
 *
 * whenever a new connection is requested, create a conn thread
 * (session thread) to handle new incoming smb requests from the connection
 *
 * Return:	0 on success, otherwise error
 */
static int ksmbd_sock_new_connection(struct socket *client_sk,
				     struct ksmbd_sock_interface *iface)
{
	struct sockaddr *csin;
	int rc = 0;
	struct ksmbd_sock_transport *t;
	struct task_struct *handler;

	client_sk->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
	client_sk->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;

	t = ksmbd_sock_transport_alloc(client_sk, iface);
	if (!t)
		return -ENOMEM;

	csin = KSMBD_SOCK_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
	if (kernel_getpeername(client_sk, csin) < 0) {
		pr_err("client ip resolution failed\n");
		rc = -EINVAL;
		goto out_error;
	}

	handler = kthread_run(ksmbd_conn_handler_loop,
			      KSMBD_TRANS(t)->conn,
			      "ksmbd:%u",
			      ksmbd_tcp_get_port(csin));
	if (IS_ERR(handler)) {
		pr_err("cannot start conn thread\n");
		rc = PTR_ERR(handler);
		ksmbd_sock_transport_free(t);
	}
	return rc;

out_error:
	ksmbd_sock_transport_free(t);
	return rc;
}

/**
 * ksmbd_kthread_fn() - listen to new SMB connections and callback server
 * @p:		arguments to forker thread
 *
 * Return:	0 on success, error number otherwise
 */
static int ksmbd_kthread_fn(void *p)
{
	struct socket *client_sk = NULL;
	struct ksmbd_sock_interface *iface = (struct ksmbd_sock_interface *)p;
	int ret;

	while (!kthread_should_stop()) {
		mutex_lock(&iface->sock_release_lock);
		if (!iface->ksmbd_socket) {
			mutex_unlock(&iface->sock_release_lock);
			break;
		}
		ret = kernel_accept(iface->ksmbd_socket, &client_sk,
				    SOCK_NONBLOCK);
		mutex_unlock(&iface->sock_release_lock);
		if (ret) {
			if (ret == -EAGAIN)
				/* check for new connections every 100 msecs */
				schedule_timeout_interruptible(HZ / 10);
			continue;
		}

		if (server_conf.max_connections &&
		    atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
			pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
					    atomic_read(&active_num_conn));
			atomic_dec(&active_num_conn);
			sock_release(client_sk);
			continue;
		}

		ksmbd_debug(CONN, "connect success: accepted new connection\n");
		ksmbd_sock_new_connection(client_sk, iface);
	}

	ksmbd_debug(CONN, "releasing socket\n");
	return 0;
}

/**
 * ksmbd_tcp_run_kthread() - start forker thread
 * @iface: pointer to struct ksmbd_sock_interface
 *
 * start forker thread(ksmbd/0) at module init time to listen
 * on port 445 for new SMB connection requests. It creates per connection
 * server threads(ksmbd/x)
 *
 * Return:	0 on success or error number
 */
static int ksmbd_sock_run_kthread(struct ksmbd_sock_interface *iface)
{
	int rc;
	struct task_struct *kthread;

	kthread = kthread_run(ksmbd_kthread_fn, (void *)iface, "ksmbd-%s-%s",
			      iface->type, iface->name);
	if (IS_ERR(kthread)) {
		rc = PTR_ERR(kthread);
		return rc;
	}
	iface->ksmbd_kthread = kthread;

	return 0;
}

/**
 * ksmbd_sock_readv() - read data from socket in given iovec
 * @t:			SOCK transport instance
 * @iov_orig:		base IO vector
 * @nr_segs:		number of segments in base iov
 * @to_read:		number of bytes to read from socket
 * @max_retries:	maximum retry count
 *
 * Return:	on success return number of bytes read from socket,
 *		otherwise return error number
 */
static int ksmbd_sock_readv(struct ksmbd_sock_transport *t, struct kvec *iov_orig,
			    unsigned int nr_segs, unsigned int to_read,
			    int max_retries)
{
	int length = 0;
	int total_read;
	unsigned int segs;
	struct msghdr ksmbd_msg;
	struct kvec *iov;
	struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;

	iov = get_conn_iovec(t, nr_segs);
	if (!iov)
		return -ENOMEM;

	ksmbd_msg.msg_control = NULL;
	ksmbd_msg.msg_controllen = 0;

	for (total_read = 0; to_read; total_read += length, to_read -= length) {
		try_to_freeze();

		if (!ksmbd_conn_alive(conn)) {
			total_read = -ESHUTDOWN;
			break;
		}
		segs = kvec_array_init(iov, iov_orig, nr_segs, total_read);

		length = kernel_recvmsg(t->sock, &ksmbd_msg,
					iov, segs, to_read, 0);

		if (length == -EINTR) {
			total_read = -ESHUTDOWN;
			break;
		} else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
			total_read = -EAGAIN;
			break;
		} else if (length == -ERESTARTSYS || length == -EAGAIN) {
			/*
			 * If max_retries is negative, Allow unlimited
			 * retries to keep connection with inactive sessions.
			 */
			if (max_retries == 0) {
				total_read = length;
				break;
			} else if (max_retries > 0) {
				max_retries--;
			}

			usleep_range(1000, 2000);
			length = 0;
			continue;
		} else if (length <= 0) {
			total_read = length;
			break;
		}
	}
	return total_read;
}

/**
 * ksmbd_tcp_read() - read data from socket in given buffer
 * @t:		SOCK transport instance
 * @buf:	buffer to store read data from socket
 * @to_read:	number of bytes to read from socket
 * @max_retries: number of retries if reading from socket fails
 *
 * Return:	on success return number of bytes read from socket,
 *		otherwise return error number
 */
static int ksmbd_sock_read(struct ksmbd_transport *t, char *buf,
			   unsigned int to_read, int max_retries)
{
	struct kvec iov;

	iov.iov_base = buf;
	iov.iov_len = to_read;

	return ksmbd_sock_readv(KSMBD_SOCK_TRANS(t), &iov, 1, to_read, max_retries);
}

static int ksmbd_sock_writev(struct ksmbd_transport *t, struct kvec *iov,
			    int nvecs, int size, bool need_invalidate,
			    unsigned int remote_key)

{
	struct msghdr smb_msg = {.msg_flags = MSG_NOSIGNAL};
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
	struct smbdirect_cmsg_buffer cbuffer;

	if (need_invalidate) {
		struct smbdirect_buffer_descriptor_v1 desc = { .token = remote_key, };
		smbdirect_buffer_remote_invalidate_cmsg_prepare(&smb_msg, &cbuffer, &desc);
	}
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */

	return kernel_sendmsg(KSMBD_SOCK_TRANS(t)->sock, &smb_msg, iov, nvecs, size);
}

static void ksmbd_sock_disconnect(struct ksmbd_transport *t)
{
	ksmbd_sock_transport_free(KSMBD_SOCK_TRANS(t));
	if (server_conf.max_connections)
		atomic_dec(&active_num_conn);
}

static void ksmbd_destroy_socket(struct socket *ksmbd_socket)
{
	int ret;

	if (!ksmbd_socket)
		return;

	/* set zero to timeout */
	ksmbd_tcp_rcv_timeout(ksmbd_socket, 0);
	ksmbd_tcp_snd_timeout(ksmbd_socket, 0);

	ret = kernel_sock_shutdown(ksmbd_socket, SHUT_RDWR);
	if (ret)
		pr_err("Failed to shutdown socket: %d\n", ret);
	sock_release(ksmbd_socket);
}

/**
 * create_socket - create socket for ksmbd/0
 * @iface:      interface to bind the created socket to
 *
 * Return:	0 on success, error number otherwise
 */
static int create_socket(struct ksmbd_sock_interface *iface)
{
	int ret;
	struct sockaddr_in6 sin6;
	struct sockaddr_in sin;
	struct socket *ksmbd_socket;
	bool ipv4 = false;

	ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
	if (ret) {
		if (ret != -EAFNOSUPPORT)
			pr_err("Can't create socket for ipv6, fallback to ipv4: %d\n", ret);
		ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
				  &ksmbd_socket);
		if (ret) {
			pr_err("Can't create socket for ipv4: %d\n", ret);
			goto out_clear;
		}

		sin.sin_family = PF_INET;
		sin.sin_addr.s_addr = htonl(INADDR_ANY);
		sin.sin_port = htons(server_conf.tcp_port);
		ipv4 = true;
	} else {
		sin6.sin6_family = PF_INET6;
		sin6.sin6_addr = in6addr_any;
		sin6.sin6_port = htons(server_conf.tcp_port);

		lock_sock(ksmbd_socket->sk);
		ksmbd_socket->sk->sk_ipv6only = false;
		release_sock(ksmbd_socket->sk);
	}

	ksmbd_tcp_nodelay(ksmbd_socket);
	ksmbd_tcp_reuseaddr(ksmbd_socket);

	ret = sock_setsockopt(ksmbd_socket,
			      SOL_SOCKET,
			      SO_BINDTODEVICE,
			      KERNEL_SOCKPTR(iface->name),
			      strlen(iface->name));
	if (ret != -ENODEV && ret < 0) {
		pr_err("Failed to set SO_BINDTODEVICE: %d\n", ret);
		goto out_error;
	}

	if (ipv4)
		ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
				  sizeof(sin));
	else
		ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin6,
				  sizeof(sin6));
	if (ret) {
		pr_err("Failed to bind socket: %d\n", ret);
		goto out_error;
	}

	ksmbd_socket->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
	ksmbd_socket->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;

	ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
	if (ret) {
		pr_err("Port listen() error: %d\n", ret);
		goto out_error;
	}

	iface->ksmbd_socket = ksmbd_socket;
	ret = ksmbd_sock_run_kthread(iface);
	if (ret) {
		pr_err("Can't start ksmbd main kthread: %d\n", ret);
		goto out_error;
	}
	iface->state = IFACE_STATE_CONFIGURED;

	return 0;

out_error:
	ksmbd_destroy_socket(ksmbd_socket);
out_clear:
	iface->ksmbd_socket = NULL;
	return ret;
}

static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event,
			      void *ptr)
{
	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
	struct ksmbd_sock_interface *iface;
	int ret, found = 0;

	switch (event) {
	case NETDEV_UP:
		if (netif_is_bridge_port(netdev))
			return NOTIFY_OK;

		list_for_each_entry(iface, &iface_list, entry) {
			if (!strcmp(iface->name, netdev->name)) {
				found = 1;
				if (iface->state != IFACE_STATE_DOWN)
					break;
				ret = create_socket(iface);
				if (ret)
					return NOTIFY_OK;
				break;
			}
		}
		if (!found && bind_additional_ifaces) {
			iface = ksmbd_alloc_iface(netdev->name,
						  &ksmbd_tcp_transport_ops);
			if (!iface)
				return NOTIFY_OK;
			ret = create_socket(iface);
			if (ret)
				break;
		}
		break;
	case NETDEV_DOWN:
		list_for_each_entry(iface, &iface_list, entry) {
			if (!strcmp(iface->name, netdev->name) &&
			    iface->state == IFACE_STATE_CONFIGURED) {
				ksmbd_sock_stop_kthread(iface->ksmbd_kthread);
				iface->ksmbd_kthread = NULL;
				mutex_lock(&iface->sock_release_lock);
				ksmbd_destroy_socket(iface->ksmbd_socket);
				iface->ksmbd_socket = NULL;
				mutex_unlock(&iface->sock_release_lock);

				iface->state = IFACE_STATE_DOWN;
				break;
			}
		}
		break;
	}

	return NOTIFY_DONE;
}

static struct notifier_block ksmbd_netdev_notifier = {
	.notifier_call = ksmbd_netdev_event,
};

int ksmbd_sock_init(void)
{
	register_netdevice_notifier(&ksmbd_netdev_notifier);

	return 0;
}

static void ksmbd_sock_stop_kthread(struct task_struct *kthread)
{
	int ret;

	if (!kthread)
		return;

	ret = kthread_stop(kthread);
	if (ret)
		pr_err("failed to stop forker thread\n");
}

void ksmbd_sock_destroy(void)
{
	struct ksmbd_sock_interface *iface, *tmp;

	unregister_netdevice_notifier(&ksmbd_netdev_notifier);

	list_for_each_entry_safe(iface, tmp, &iface_list, entry) {
		list_del(&iface->entry);
		kfree(iface->name);
		kfree(iface);
	}
}

static struct ksmbd_sock_interface *ksmbd_alloc_iface(const char *ifname,
					const struct ksmbd_transport_ops *ops)
{
	struct ksmbd_sock_interface *iface;

	iface = kzalloc(sizeof(struct ksmbd_sock_interface), GFP_KERNEL);
	if (!iface) {
		return NULL;
	}

	if (ops == &ksmbd_tcp_transport_ops) {
		iface->type = "tcp";
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
	} else if (ops == &ksmbd_smbdirect_transport_ops) {
		iface->type = "smbdirect";
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */
	} else {
		WARN_ON_ONCE(1);
		kfree(iface);
		return NULL;
	}
	iface->ops = ops;

	iface->name = kstrdup(ifname, GFP_KERNEL);
	if (!iface->name) {
		kfree(iface);
		return NULL;
	}

	iface->state = IFACE_STATE_DOWN;
	list_add(&iface->entry, &iface_list);
	mutex_init(&iface->sock_release_lock);
	return iface;
}

int ksmbd_sock_set_interfaces(char *ifc_list, int ifc_list_sz)
{
	int sz = 0;

#ifdef CONFIG_SMB_SERVER_SMBDIRECT
	{
		struct ksmbd_sock_interface *iface;

		iface = ksmbd_alloc_iface("smbdirect-sock",
					  &ksmbd_smbdirect_transport_ops);
		if (iface != NULL) {
			int ret;
			ret = create_smbdirect_socket(iface);
			if (ret)
				pr_err("create_smbdirect_socket() failed: %d\n", ret);
		}
	}
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */

	if (!ifc_list_sz) {
		struct net_device *netdev;

		rtnl_lock();
		for_each_netdev(&init_net, netdev) {
			struct ksmbd_sock_interface *iface;

			if (netif_is_bridge_port(netdev))
				continue;
			iface = ksmbd_alloc_iface(netdev->name,
						  &ksmbd_tcp_transport_ops);
			if (!iface) {
				rtnl_unlock();
				return -ENOMEM;
			}
		}
		rtnl_unlock();
		bind_additional_ifaces = 1;
		return 0;
	}

	while (ifc_list_sz > 0) {
		struct ksmbd_sock_interface *iface;

		iface = ksmbd_alloc_iface(ifc_list,
					  &ksmbd_tcp_transport_ops);
		if (!iface)
			return -ENOMEM;

		sz = strlen(ifc_list);
		if (!sz)
			break;

		ifc_list += sz + 1;
		ifc_list_sz -= (sz + 1);
	}

	bind_additional_ifaces = 0;

	return 0;
}

static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
	.read		= ksmbd_sock_read,
	.writev		= ksmbd_sock_writev,
	.disconnect	= ksmbd_sock_disconnect,
};

#ifdef CONFIG_SMB_SERVER_SMBDIRECT

static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
{
	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
		return false;
	if (attrs->max_fast_reg_page_list_len == 0)
		return false;
	return true;
}

bool ksmbd_netdev_rdma_capable(struct net_device *netdev)
{
	struct ib_device *ibdev;
	bool rdma_capable = false;

	ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
	if (ibdev) {
		if (rdma_frwr_is_supported(&ibdev->attrs))
			rdma_capable = true;
		ib_device_put(ibdev);
	}
	return rdma_capable;
}

static int create_smbdirect_socket(struct ksmbd_sock_interface *iface)
{
	struct sockaddr_in sin = {
		.sin_family	= AF_INET,
		.sin_addr.s_addr= htonl(INADDR_ANY),
		.sin_port	= htons(5445),
	};
	struct socket *ksmbd_socket;
	int ret;

	ret = sock_create(PF_SMBDIRECT, SOCK_STREAM, AF_INET, &ksmbd_socket);
	if (ret) {
		pr_err("Can't create smbdirect socket for ipv4: %d\n", ret);
		goto out_error;
	}

	ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
			  sizeof(sin));
	if (ret) {
		pr_err("Failed to bind socket: %d\n", ret);
		goto out_error;
	}

	ksmbd_socket->sk->sk_rcvtimeo = KSMBD_SOCK_RECV_TIMEOUT;
	ksmbd_socket->sk->sk_sndtimeo = KSMBD_SOCK_SEND_TIMEOUT;

	ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
	if (ret) {
		pr_err("Port listen() error: %d\n", ret);
		goto out_error;
	}

	iface->ksmbd_socket = ksmbd_socket;
	ret = ksmbd_sock_run_kthread(iface);
	if (ret) {
		pr_err("Can't start ksmbd main kthread: %d\n", ret);
		goto out_error;
	}
	iface->state = IFACE_STATE_CONFIGURED;

	return 0;

out_error:
	ksmbd_destroy_socket(ksmbd_socket);
	iface->ksmbd_socket = NULL;
	return ret;
}

static int ksmbd_sock_rdma_write(struct ksmbd_transport *t,
				 void *buf, unsigned int buflen,
				 const struct smbdirect_buffer_descriptors_v1 *remote)
{
	struct bio_vec biov = {
		.bv_page = is_vmalloc_addr(buf) ? vmalloc_to_page(buf) : kmap_to_page(buf),
		.bv_offset = offset_in_page(buf),
		.bv_len = buflen,
	};
	struct iov_iter iter;
	ssize_t ret;

	iov_iter_bvec(&iter, WRITE, &biov, 1, buflen);

	ret = smbdirect_kern_rdma_v1_writev(KSMBD_SOCK_TRANS(t)->sock,
					    remote,
					    buflen,
					    &iter);
	if (ret < 0) {
		return ret;
	}

	return 0;
}

static int ksmbd_sock_rdma_read(struct ksmbd_transport *t,
				void *buf, unsigned int buflen,
				const struct smbdirect_buffer_descriptors_v1 *remote)
{
	struct bio_vec biov = {
		.bv_page = is_vmalloc_addr(buf) ? vmalloc_to_page(buf) : kmap_to_page(buf),
		.bv_offset = offset_in_page(buf),
		.bv_len = buflen,
	};
	struct iov_iter iter;
	ssize_t ret;

	iov_iter_bvec(&iter, READ, &biov, 1, buflen);

	ret = smbdirect_kern_rdma_v1_readv(KSMBD_SOCK_TRANS(t)->sock,
					   remote,
					   buflen,
					   &iter);
	if (ret < 0) {
		return ret;
	}

	return 0;
}

static const struct ksmbd_transport_ops ksmbd_smbdirect_transport_ops = {
	.read		= ksmbd_sock_read,
	.writev		= ksmbd_sock_writev,
	.disconnect	= ksmbd_sock_disconnect,
	.rdma_read	= ksmbd_sock_rdma_read,
	.rdma_write	= ksmbd_sock_rdma_write,
};
#endif /* CONFIG_SMB_SERVER_SMBDIRECT */

  reply	other threads:[~2025-06-03  9:06 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-28 16:01 [PATCH v2 00/12] smb:common: introduce and use common smbdirect headers/structures (step1) Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 01/12] smb: smbdirect: add smbdirect_pdu.h with protocol definitions Stefan Metzmacher
2025-05-28 23:28   ` Namjae Jeon
2025-05-30 19:03     ` Stefan Metzmacher
2025-05-30 19:27       ` Steve French
2025-05-30 22:29       ` Namjae Jeon
     [not found]         ` <CAH2r5mvQbL_R9wrFRHF9_3XwM3e-=2vK=i1uaSCk37-FZmJq9g@mail.gmail.com>
2025-06-01  0:01           ` Namjae Jeon
     [not found]             ` <CAH2r5mso54sXPcoJWDSU4E--XMH44wFY-cdww6_6yx5CxrFtdg@mail.gmail.com>
2025-06-01  4:59               ` Namjae Jeon
2025-06-02  1:56                 ` Steve French
2025-06-02  2:19                   ` Namjae Jeon
2025-06-02 22:02                     ` Stefan Metzmacher
2025-06-03  6:20                       ` Namjae Jeon
2025-06-03  9:06                         ` Stefan Metzmacher [this message]
2025-06-03 10:55                           ` Namjae Jeon
2025-05-28 16:01 ` [PATCH v2 02/12] smb: client: make use of common smbdirect_pdu.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 03/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 04/12] smb: smbdirect: add smbdirect.h with public structures Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 05/12] smb: client: make use of common smbdirect.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 06/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 07/12] smb: smbdirect: add smbdirect_socket.h Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 08/12] smb: client: make use of common smbdirect_socket Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 09/12] smb: server: " Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 10/12] smb: smbdirect: introduce smbdirect_socket_parameters Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 11/12] smb: client: make use of common smbdirect_socket_parameters Stefan Metzmacher
2025-05-28 16:01 ` [PATCH v2 12/12] smb: server: " Stefan Metzmacher
2025-05-29  3:33 ` [PATCH v2 00/12] smb:common: introduce and use common smbdirect headers/structures (step1) Steve French
2025-05-30 16:51 ` Steve French

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=995fde08-3fed-47a0-b984-876f426e9076@samba.org \
    --to=metze@samba.org \
    --cc=hyc.lee@gmail.com \
    --cc=linkinjeon@kernel.org \
    --cc=linux-cifs@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=meetakshisetiyaoss@gmail.com \
    --cc=samba-technical@lists.samba.org \
    --cc=smfrench@gmail.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox