All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] io_uring: Pass whole sqe to commands
  2023-04-06 14:43 ` Breno Leitao
@ 2023-04-06 16:57 ` Breno Leitao
  -1 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 16:57 UTC (permalink / raw)
  To: dccp

Currently uring CMD operation relies on having large SQEs, but future
operations might want to use normal SQE.

The io_uring_cmd currently only saves the payload (cmd) part of the SQE,
but, for commands that use normal SQE size, it might be necessary to
access the initial SQE fields outside of the payload/cmd block.  So,
saves the whole SQE other than just the pdu.

This changes slighlty how the io_uring_cmd works, since the cmd
structures and callbacks are not opaque to io_uring anymore. I.e, the
callbacks can look at the SQE entries, not only, in the cmd structure.

The main advantage is that we don't need to create custom structures for
simple commands.

Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
 drivers/block/ublk_drv.c  | 24 ++++++++++++------------
 drivers/nvme/host/ioctl.c |  2 +-
 include/linux/io_uring.h  |  2 +-
 io_uring/opdef.c          |  2 +-
 io_uring/uring_cmd.c      | 11 ++++++-----
 5 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index d1d1c8d606c8..0e35d82eb070 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1258,7 +1258,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
 
 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
-	struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
+	struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->sqe->cmd;
 	struct ublk_device *ub = cmd->file->private_data;
 	struct ublk_queue *ubq;
 	struct ublk_io *io;
@@ -1562,7 +1562,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
 
 static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	int ublksrv_pid = (int)header->data[0];
 	struct gendisk *disk;
 	int ret = -EINVAL;
@@ -1624,7 +1624,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
 static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	cpumask_var_t cpumask;
 	unsigned long queue;
@@ -1675,7 +1675,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
 
 static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	struct ublksrv_ctrl_dev_info info;
 	struct ublk_device *ub;
@@ -1838,7 +1838,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
 
 static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 
 	pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
 			__func__, cmd->cmd_op, header->dev_id, header->queue_id,
@@ -1857,7 +1857,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
 static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 
 	if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
@@ -1888,7 +1888,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
 static int ublk_ctrl_get_params(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	struct ublk_params_header ph;
 	int ret;
@@ -1919,7 +1919,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
 static int ublk_ctrl_set_params(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	struct ublk_params_header ph;
 	int ret = -EFAULT;
@@ -1977,7 +1977,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
 static int ublk_ctrl_start_recovery(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	int ret = -EINVAL;
 	int i;
 
@@ -2019,7 +2019,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
 static int ublk_ctrl_end_recovery(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	int ublksrv_pid = (int)header->data[0];
 	int ret = -EINVAL;
 
@@ -2086,7 +2086,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub,
 static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
 		struct io_uring_cmd *cmd)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	char *dev_path = NULL;
@@ -2165,7 +2165,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
 static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
 		unsigned int issue_flags)
 {
-	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->sqe->cmd;
 	struct ublk_device *ub = NULL;
 	int ret = -EINVAL;
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 723e7d5b778f..304da8f3200b 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -550,7 +550,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
 {
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+	const struct nvme_uring_cmd *cmd = (struct nvme_uring_cmd *)ioucmd->sqe->cmd;
 	struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
 	struct nvme_uring_data d;
 	struct nvme_command c;
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 934e5dd4ccc0..650e6f12cc18 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
 
 struct io_uring_cmd {
 	struct file	*file;
-	const void	*cmd;
+	const struct io_uring_sqe *sqe;
 	union {
 		/* callback to defer completions to task context */
 		void (*task_work_cb)(struct io_uring_cmd *cmd);
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index cca7c5b55208..3b9c6489b8b6 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = {
 	},
 	[IORING_OP_URING_CMD] = {
 		.name			= "URING_CMD",
-		.async_size		= uring_cmd_pdu_size(1),
+		.async_size		= 2 * sizeof(struct io_uring_sqe),
 		.prep_async		= io_uring_cmd_prep_async,
 	},
 	[IORING_OP_SEND_ZC] = {
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 2e4c483075d3..9648134ccae1 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -63,14 +63,15 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 int io_uring_cmd_prep_async(struct io_kiocb *req)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-	size_t cmd_size;
+	size_t size = sizeof(struct io_uring_sqe);
 
 	BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16);
 	BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80);
 
-	cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
+	if (req->ctx->flags & IORING_SETUP_SQE128)
+		size <<= 1;
 
-	memcpy(req->async_data, ioucmd->cmd, cmd_size);
+	memcpy(req->async_data, ioucmd->sqe, size);
 	return 0;
 }
 
@@ -96,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->imu = ctx->user_bufs[index];
 		io_req_set_rsrc_node(req, ctx, 0);
 	}
-	ioucmd->cmd = sqe->cmd;
+	ioucmd->sqe = sqe;
 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 	return 0;
 }
@@ -128,7 +129,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 	if (req_has_async_data(req))
-		ioucmd->cmd = req->async_data;
+		ioucmd->sqe = req->async_data;
 
 	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
 	if (ret = -EAGAIN) {
-- 
2.34.1

^ permalink raw reply related	[flat|nested] 108+ messages in thread
* [RFC PATCH 4/4] net: add uring_cmd callback to raw "protocol"
  2023-04-06 14:43 ` Breno Leitao
@ 2023-04-06 14:43 ` Breno Leitao
  -1 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 14:43 UTC (permalink / raw)
  To: dccp

This is the implementation of uring_cmd for the raw "protocol". It
basically encompasses SOCKET_URING_OP_SIOCOUTQ and
SOCKET_URING_OP_SIOCINQ, which is similar to the SIOCOUTQ and SIOCINQ
ioctls.

The return value is exactly the same as the regular ioctl (raw_ioctl()).

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/net/raw.h |  3 +++
 net/ipv4/raw.c    | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/include/net/raw.h b/include/net/raw.h
index 2c004c20ed99..ba7a96dce16b 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -99,4 +99,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
 #endif
 }
 
+int raw_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags);
+
 #endif	/* _RAW_H */
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 94df935ee0c5..3db828bc1224 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -75,6 +75,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/uio.h>
+#include <linux/io_uring.h>
 
 struct raw_frag_vec {
 	struct msghdr *msg;
@@ -857,6 +858,29 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
 	return do_raw_getsockopt(sk, level, optname, optval, optlen);
 }
 
+int raw_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags)
+{
+	switch (cmd->sqe->cmd_op) {
+	case SOCKET_URING_OP_SIOCOUTQ:
+		return sk_wmem_alloc_get(sk);
+	case SOCKET_URING_OP_SIOCINQ: {
+		struct sk_buff *skb;
+		int amount = 0;
+
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			amount = skb->len;
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return amount;
+	}
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+EXPORT_SYMBOL_GPL(raw_uring_cmd);
+
 static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -925,6 +949,7 @@ struct proto raw_prot = {
 	.connect	   = ip4_datagram_connect,
 	.disconnect	   = __udp_disconnect,
 	.ioctl		   = raw_ioctl,
+	.uring_cmd	   = raw_uring_cmd,
 	.init		   = raw_sk_init,
 	.setsockopt	   = raw_setsockopt,
 	.getsockopt	   = raw_getsockopt,
-- 
2.34.1

^ permalink raw reply related	[flat|nested] 108+ messages in thread
* [RFC PATCH 3/4] net: add uring_cmd callback to TCP
  2023-04-06 14:43 ` Breno Leitao
@ 2023-04-06 14:43 ` Breno Leitao
  -1 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 14:43 UTC (permalink / raw)
  To: dccp

This is the implementation of uring_cmd for the TCP protocol. It
basically encompasses SOCKET_URING_OP_SIOCOUTQ and
SOCKET_URING_OP_SIOCINQ, which is similar to the SIOCOUTQ and SIOCINQ
ioctls.

The return value is exactly the same as the regular ioctl (tcp_ioctl()).

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/net/tcp.h   |  2 ++
 net/ipv4/tcp.c      | 32 ++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c |  1 +
 3 files changed, 35 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index db9f828e9d1e..4dfd6bd63261 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -342,6 +342,8 @@ void tcp_release_cb(struct sock *sk);
 void tcp_wfree(struct sk_buff *skb);
 void tcp_write_timer_handler(struct sock *sk);
 void tcp_delack_timer_handler(struct sock *sk);
+int tcp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 288693981b00..cf2822242e28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
+#include <linux/io_uring.h>
 
 /* Track pending CMSGs. */
 enum {
@@ -596,6 +597,37 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 }
 EXPORT_SYMBOL(tcp_poll);
 
+int tcp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	bool slow;
+	int ret;
+
+	switch (cmd->sqe->cmd_op) {
+	case SOCKET_URING_OP_SIOCINQ:
+		if (sk->sk_state = TCP_LISTEN)
+			return -EINVAL;
+
+		slow = lock_sock_fast(sk);
+		ret = tcp_inq(sk);
+		unlock_sock_fast(sk, slow);
+		return ret;
+	case SOCKET_URING_OP_SIOCOUTQ:
+		if (sk->sk_state = TCP_LISTEN)
+			return -EINVAL;
+
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			ret = 0;
+		else
+			ret = READ_ONCE(tp->write_seq) - tp->snd_una;
+		return ret;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+EXPORT_SYMBOL_GPL(tcp_uring_cmd);
+
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ea370afa70ed..900081fa2e1a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3103,6 +3103,7 @@ struct proto tcp_prot = {
 	.disconnect		= tcp_disconnect,
 	.accept			= inet_csk_accept,
 	.ioctl			= tcp_ioctl,
+	.uring_cmd		= tcp_uring_cmd,
 	.init			= tcp_v4_init_sock,
 	.destroy		= tcp_v4_destroy_sock,
 	.shutdown		= tcp_shutdown,
-- 
2.34.1

^ permalink raw reply related	[flat|nested] 108+ messages in thread
* [RFC PATCH 2/4] net: add uring_cmd callback to UDP
  2023-04-06 14:43 ` Breno Leitao
@ 2023-04-06 14:43 ` Breno Leitao
  -1 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 14:43 UTC (permalink / raw)
  To: dccp

This is the implementation of uring_cmd for the udp protocol. It
basically encompasses SOCKET_URING_OP_SIOCOUTQ and
SOCKET_URING_OP_SIOCINQ, which is similar to the SIOCOUTQ and SIOCINQ
ioctls.

The return value is exactly the same as the regular ioctl (udp_ioctl()).

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/net/udp.h        |  2 ++
 include/uapi/linux/net.h |  5 +++++
 net/ipv4/udp.c           | 16 ++++++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/include/net/udp.h b/include/net/udp.h
index de4b528522bb..c0e829dacc2f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -283,6 +283,8 @@ void udp_flush_pending_frames(struct sock *sk);
 int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
 int udp_rcv(struct sk_buff *skb);
+int udp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags);
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_init_sock(struct sock *sk);
 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h
index 4dabec6bd957..dd8e7ced7d24 100644
--- a/include/uapi/linux/net.h
+++ b/include/uapi/linux/net.h
@@ -55,4 +55,9 @@ typedef enum {
 
 #define __SO_ACCEPTCON	(1 << 16)	/* performed a listen		*/
 
+enum {
+	SOCKET_URING_OP_SIOCINQ		= 0,
+	SOCKET_URING_OP_SIOCOUTQ,
+};
+
 #endif /* _UAPI_LINUX_NET_H */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c605d171eb2d..d6d60600831b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -113,6 +113,7 @@
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 #include <net/udp_tunnel.h>
+#include <linux/io_uring.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6_stubs.h>
 #endif
@@ -1711,6 +1712,20 @@ static int first_packet_length(struct sock *sk)
 	return res;
 }
 
+int udp_uring_cmd(struct sock *sk, struct io_uring_cmd *cmd,
+		  unsigned int issue_flags)
+{
+	switch (cmd->sqe->cmd_op) {
+	case SOCKET_URING_OP_SIOCOUTQ:
+		return sk_wmem_alloc_get(sk);
+	case SOCKET_URING_OP_SIOCINQ:
+		return max_t(int, 0, first_packet_length(sk));
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+EXPORT_SYMBOL_GPL(udp_uring_cmd);
+
 /*
  *	IOCTL requests applicable to the UDP protocol
  */
@@ -2952,6 +2967,7 @@ struct proto udp_prot = {
 	.connect		= ip4_datagram_connect,
 	.disconnect		= udp_disconnect,
 	.ioctl			= udp_ioctl,
+	.uring_cmd		= udp_uring_cmd,
 	.init			= udp_init_sock,
 	.destroy		= udp_destroy_sock,
 	.setsockopt		= udp_setsockopt,
-- 
2.34.1

^ permalink raw reply related	[flat|nested] 108+ messages in thread
* [RFC PATCH 1/4] net: wire up support for file_operations->uring_cmd()
  2023-04-06 14:43 ` Breno Leitao
@ 2023-04-06 14:43 ` Breno Leitao
  -1 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 14:43 UTC (permalink / raw)
  To: dccp

Create the initial plumbing to call protocol specific uring_cmd
callbacks. These are io_uring specific callbacks that implement
ioctl-like operation types, such as SIOCINQ, SIOCOUTQ and others.

In order to achieve this, create uring_cmd callback placeholders in
file_ops, proto and proto_ops structures.

Create also the functions that does the plumbing from io_uring_cmd() up
to sk_proto->uring_cmd(). If the callback is not implemented,
-EOPNOTSUPP is returned.

That way, the io_uring issue path calls file_operations->uring_cmd
(sock_uring_cmd()).  This function calls proto_ops->uring_cmd
(sock_common_uring_cmd()). sock_common_uring_cmd() is responsible for
calling protocol specific (struct proto_ops) uring_cmd callback
(sock_common_uring_cmd()). sock_common_uring_cmd() then calls the proto
specific (struct proto) uring_cmd function, which are implemented in the
upcoming patch.

By the end, uring_cmd() function has access to  'struct io_uring_cmd'
which points to the whole SQE, and any field could be accessed from the
function pointer.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 include/linux/net.h  |  2 ++
 include/net/sock.h   |  6 ++++++
 net/core/sock.c      | 17 +++++++++++++++--
 net/dccp/ipv4.c      |  1 +
 net/ipv4/af_inet.c   |  3 +++
 net/l2tp/l2tp_ip.c   |  1 +
 net/mptcp/protocol.c |  1 +
 net/sctp/protocol.c  |  1 +
 net/socket.c         | 13 +++++++++++++
 9 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index b73ad8e3c212..efcc47a57069 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -182,6 +182,8 @@ struct proto_ops {
 	int	 	(*compat_ioctl) (struct socket *sock, unsigned int cmd,
 				      unsigned long arg);
 #endif
+	int		(*uring_cmd)(struct socket *sock, struct io_uring_cmd *cmd,
+				     unsigned int issue_flags);
 	int		(*gettstamp) (struct socket *sock, void __user *userstamp,
 				      bool timeval, bool time32);
 	int		(*listen)    (struct socket *sock, int len);
diff --git a/include/net/sock.h b/include/net/sock.h
index 573f2bf7e0de..57437a1e041c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -111,6 +111,7 @@ typedef struct {
 struct sock;
 struct proto;
 struct net;
+struct io_uring_cmd;
 
 typedef __u32 __bitwise __portpair;
 typedef __u64 __bitwise __addrpair;
@@ -1247,6 +1248,9 @@ struct proto {
 
 	int			(*ioctl)(struct sock *sk, int cmd,
 					 unsigned long arg);
+	int			(*uring_cmd)(struct sock *sk,
+					     struct io_uring_cmd *cmd,
+					     unsigned int issue_flags);
 	int			(*init)(struct sock *sk);
 	void			(*destroy)(struct sock *sk);
 	void			(*shutdown)(struct sock *sk, int how);
@@ -1921,6 +1925,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 			int flags);
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
 			   sockptr_t optval, unsigned int optlen);
+int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
+			  unsigned int issue_flags);
 
 void sk_common_release(struct sock *sk);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index c25888795390..1bf5e4d4ba29 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3669,6 +3669,18 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
 }
 EXPORT_SYMBOL(sock_common_setsockopt);
 
+int sock_common_uring_cmd(struct socket *sock, struct io_uring_cmd *cmd,
+			  unsigned int issue_flags)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk->sk_prot || !sk->sk_prot->uring_cmd)
+		return -EOPNOTSUPP;
+
+	return sk->sk_prot->uring_cmd(sk, cmd, issue_flags);
+}
+EXPORT_SYMBOL(sock_common_uring_cmd);
+
 void sk_common_release(struct sock *sk)
 {
 	if (sk->sk_prot->destroy)
@@ -4009,7 +4021,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
 
 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
-			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 		   proto->name,
 		   proto->obj_size,
 		   sock_prot_inuse_get(seq_file_net(seq), proto),
@@ -4023,6 +4035,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 		   proto_method_implemented(proto->disconnect),
 		   proto_method_implemented(proto->accept),
 		   proto_method_implemented(proto->ioctl),
+		   proto_method_implemented(proto->uring_cmd),
 		   proto_method_implemented(proto->init),
 		   proto_method_implemented(proto->destroy),
 		   proto_method_implemented(proto->shutdown),
@@ -4051,7 +4064,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
 			   "maxhdr",
 			   "slab",
 			   "module",
-			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+			   "cl co di ac io ur in de sh ss gs se re sp bi br ha uh gp em\n");
 	else
 		proto_seq_printf(seq, list_entry(v, struct proto, node));
 	return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b780827f5e0a..47047ad05e65 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -999,6 +999,7 @@ static const struct proto_ops inet_dccp_ops = {
 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
 	.poll		   = dccp_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
 	.listen		   = inet_dccp_listen,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8db6747f892f..1c54c3b59f2e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1036,6 +1036,7 @@ const struct proto_ops inet_stream_ops = {
 	.getname	   = inet_getname,
 	.poll		   = tcp_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = inet_listen,
 	.shutdown	   = inet_shutdown,
@@ -1071,6 +1072,7 @@ const struct proto_ops inet_dgram_ops = {
 	.getname	   = inet_getname,
 	.poll		   = udp_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
@@ -1103,6 +1105,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.getname	   = inet_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4db5a554bdbd..cdfaaada0695 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -617,6 +617,7 @@ static const struct proto_ops l2tp_ip_ops = {
 	.getname	   = l2tp_ip_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ad9c46202fc..b8182eab5ebf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3808,6 +3808,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.getname	   = inet_getname,
 	.poll		   = mptcp_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = mptcp_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c365df24ad33..b1aaf644076f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1127,6 +1127,7 @@ static const struct proto_ops inet_seqpacket_ops = {
 	.getname	   = inet_getname,	/* Semantics are different.  */
 	.poll		   = sctp_poll,
 	.ioctl		   = inet_ioctl,
+	.uring_cmd	   = sock_common_uring_cmd,
 	.gettstamp	   = sock_gettstamp,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,	/* Looks harmless.  */
diff --git a/net/socket.c b/net/socket.c
index 9c92c0e6c4da..c683110c1523 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -87,6 +87,7 @@
 #include <linux/xattr.h>
 #include <linux/nospec.h>
 #include <linux/indirect_call_wrapper.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -116,6 +117,7 @@ unsigned int sysctl_net_busy_poll __read_mostly;
 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
 static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
+static int sock_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
 
 static int sock_close(struct inode *inode, struct file *file);
 static __poll_t sock_poll(struct file *file,
@@ -159,6 +161,7 @@ static const struct file_operations socket_file_ops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = compat_sock_ioctl,
 #endif
+	.uring_cmd =	sock_uring_cmd,
 	.mmap =		sock_mmap,
 	.release =	sock_close,
 	.fasync =	sock_fasync,
@@ -1319,6 +1322,16 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return err;
 }
 
+static int sock_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+	struct socket *sock = cmd->file->private_data;
+
+	if (!sock->ops || !sock->ops->uring_cmd)
+		return -EOPNOTSUPP;
+
+	return sock->ops->uring_cmd(sock, cmd, issue_flags);
+}
+
 /**
  *	sock_create_lite - creates a socket
  *	@family: protocol family (AF_INET, ...)
-- 
2.34.1

^ permalink raw reply related	[flat|nested] 108+ messages in thread
* [PATCH 0/5] add initial io_uring_cmd support for sockets
@ 2023-04-06 14:43 ` Breno Leitao
  0 siblings, 0 replies; 108+ messages in thread
From: Breno Leitao @ 2023-04-06 14:43 UTC (permalink / raw)
  To: dccp

From: Breno Leitao <leit@fb.com>

This patchset creates the initial plumbing for a io_uring command for
sockets.

For now, create two uring commands for sockets, SOCKET_URING_OP_SIOCOUTQ
and SOCKET_URING_OP_SIOCINQ. They are similar to ioctl operations
SIOCOUTQ and SIOCINQ. In fact, the code on the protocol side itself is
heavily based on the ioctl operations.

In order to test this code, I created a liburing test, which is
currently located at [1], and I will create a pull request once we are
good with this patch.

I've also run test/io_uring_passthrough to make sure the first patch
didn't regressed the NVME passthrough path.

This patchset is a RFC for two different reasons:
  * It changes slighlty on how IO uring command operates. I.e, we are
    now passing the whole SQE to the io_uring_cmd callback (instead of
    an opaque buffer). This seems to be more palatable instead of
    creating some custom structure just to fit small parameters, as in
    SOCKET_URING_OP_SIOC{IN,OUT}Q. Is this OK?

  * Pavel has some ideas about the SQE->cmd_op field, so, we can start
    discussing it here.

This work is heavily inspired by Jens Axboe's initial implementation.

[1] https://github.com/leitao/liburing/blob/master/test/socket-io-cmd.c

Breno Leitao (4):
  net: wire up support for file_operations->uring_cmd()
  net: add uring_cmd callback to UDP
  net: add uring_cmd callback to TCP
  net: add uring_cmd callback to raw "protocol"

 include/linux/net.h      |  2 ++
 include/net/raw.h        |  3 +++
 include/net/sock.h       |  6 ++++++
 include/net/tcp.h        |  2 ++
 include/net/udp.h        |  2 ++
 include/uapi/linux/net.h |  5 +++++
 net/core/sock.c          | 17 +++++++++++++++--
 net/dccp/ipv4.c          |  1 +
 net/ipv4/af_inet.c       |  3 +++
 net/ipv4/raw.c           | 26 ++++++++++++++++++++++++++
 net/ipv4/tcp.c           | 34 ++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c      |  1 +
 net/ipv4/udp.c           | 18 ++++++++++++++++++
 net/l2tp/l2tp_ip.c       |  1 +
 net/mptcp/protocol.c     |  1 +
 net/sctp/protocol.c      |  1 +
 net/socket.c             | 13 +++++++++++++
 17 files changed, 134 insertions(+), 2 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 108+ messages in thread

end of thread, other threads:[~2023-05-03 13:27 UTC | newest]

Thread overview: 108+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-06 16:57 [PATCH RFC] io_uring: Pass whole sqe to commands Breno Leitao
2023-04-06 16:57 ` Breno Leitao
2023-04-06 17:50 ` io_uring: Pass whole sqe to commands: Tests Results MPTCP CI
2023-04-07 18:51 ` [PATCH RFC] io_uring: Pass whole sqe to commands Keith Busch
2023-04-07 18:51   ` Keith Busch
2023-04-07 19:43 ` io_uring: Pass whole sqe to commands: Tests Results MPTCP CI
2023-04-11 12:22 ` [PATCH RFC] io_uring: Pass whole sqe to commands Breno Leitao
2023-04-11 12:22   ` Breno Leitao
2023-04-11 12:39 ` Pavel Begunkov
2023-04-11 12:39   ` Pavel Begunkov
2023-04-13  2:56 ` Ming Lei
2023-04-13  2:56   ` Ming Lei
2023-04-13 16:47 ` Breno Leitao
2023-04-13 16:47   ` Breno Leitao
2023-04-14  2:12 ` Ming Lei
2023-04-14  2:12   ` Ming Lei
2023-04-14 13:12 ` Pavel Begunkov
2023-04-14 13:12   ` Pavel Begunkov
2023-04-14 13:59 ` Ming Lei
2023-04-14 13:59   ` Ming Lei
2023-04-14 14:56 ` Pavel Begunkov
2023-04-14 14:56   ` Pavel Begunkov
2023-04-16  9:51 ` Ming Lei
2023-04-16  9:51   ` Ming Lei
  -- strict thread matches above, loose matches on Subject: below --
2023-04-06 14:43 [RFC PATCH 4/4] net: add uring_cmd callback to raw "protocol" Breno Leitao
2023-04-06 14:43 ` Breno Leitao
2023-04-06 14:43 [RFC PATCH 3/4] net: add uring_cmd callback to TCP Breno Leitao
2023-04-06 14:43 ` Breno Leitao
2023-04-06 20:35 ` kernel test robot
2023-04-06 14:43 [RFC PATCH 2/4] net: add uring_cmd callback to UDP Breno Leitao
2023-04-06 14:43 ` Breno Leitao
2023-04-06 19:03 ` kernel test robot
2023-04-11 12:54 ` Pavel Begunkov
2023-04-11 12:54   ` Pavel Begunkov
2023-04-06 14:43 [RFC PATCH 1/4] net: wire up support for file_operations->uring_cmd() Breno Leitao
2023-04-06 14:43 ` Breno Leitao
2023-04-06 14:43 [PATCH 0/5] add initial io_uring_cmd support for sockets Breno Leitao
2023-04-06 14:43 ` Breno Leitao
2023-04-06 15:34 ` Willem de Bruijn
2023-04-06 15:34   ` Willem de Bruijn
2023-04-06 15:59 ` Breno Leitao
2023-04-06 15:59   ` Breno Leitao
2023-04-06 16:41 ` Keith Busch
2023-04-06 16:41   ` Keith Busch
2023-04-06 16:49 ` Jens Axboe
2023-04-06 16:49   ` Jens Axboe
2023-04-06 16:58 ` Breno Leitao
2023-04-06 16:58   ` Breno Leitao
2023-04-06 18:16 ` Willem de Bruijn
2023-04-06 18:16   ` Willem de Bruijn
2023-04-07  2:46 ` David Ahern
2023-04-07  2:46   ` David Ahern
2023-04-11 11:59 ` Breno Leitao
2023-04-11 12:00   ` Breno Leitao
2023-04-11 14:36 ` David Ahern
2023-04-11 14:36   ` David Ahern
2023-04-11 14:41 ` Jens Axboe
2023-04-11 14:41   ` Jens Axboe
2023-04-11 14:51 ` Willem de Bruijn
2023-04-11 14:51   ` Willem de Bruijn
2023-04-11 14:54 ` Jens Axboe
2023-04-11 14:54   ` Jens Axboe
2023-04-11 15:00 ` Willem de Bruijn
2023-04-11 15:00   ` Willem de Bruijn
2023-04-11 15:06 ` Jens Axboe
2023-04-11 15:06   ` Jens Axboe
2023-04-11 15:10 ` David Ahern
2023-04-11 15:10   ` David Ahern
2023-04-11 15:17 ` Jens Axboe
2023-04-11 15:17   ` Jens Axboe
2023-04-11 15:24 ` Willem de Bruijn
2023-04-11 15:24   ` Willem de Bruijn
2023-04-11 15:27 ` David Ahern
2023-04-11 15:27   ` David Ahern
2023-04-11 15:28 ` Jens Axboe
2023-04-11 15:28   ` Jens Axboe
2023-04-11 15:29 ` Jens Axboe
2023-04-11 15:29   ` Jens Axboe
2023-04-12  7:39 ` David Laight
2023-04-12  7:39   ` David Laight
2023-04-12 13:53 ` Breno Leitao
2023-04-12 13:53   ` Breno Leitao
2023-04-12 14:28 ` Willem de Bruijn
2023-04-12 14:28   ` Willem de Bruijn
2023-04-13  0:02 ` Breno Leitao
2023-04-13  0:02   ` Breno Leitao
2023-04-13 14:24 ` Willem de Bruijn
2023-04-13 14:24   ` Willem de Bruijn
2023-04-13 14:45 ` Jakub Kicinski
2023-04-13 14:45   ` Jakub Kicinski
2023-04-13 14:57 ` David Laight
2023-04-13 14:57   ` David Laight
2023-04-18 13:23 ` Breno Leitao
2023-04-18 13:23   ` Breno Leitao
2023-04-18 19:41 ` Willem de Bruijn
2023-04-18 19:41   ` Willem de Bruijn
2023-04-20 14:43 ` Breno Leitao
2023-04-20 14:43   ` Breno Leitao
2023-04-20 16:48 ` Willem de Bruijn
2023-04-20 16:48   ` Willem de Bruijn
2023-05-02  9:21 ` Adrien Delorme
2023-05-02  9:21   ` Adrien Delorme
2023-05-02 13:03 ` Pavel Begunkov
2023-05-02 13:03   ` Pavel Begunkov
2023-05-03 13:11 ` Adrien Delorme
2023-05-03 13:11   ` Adrien Delorme
2023-05-03 13:27 ` David Laight
2023-05-03 13:27   ` David Laight

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.