All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ferry Meng <mengferry@linux.alibaba.com>
To: lizetao <lizetao1@huawei.com>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"io-uring@vger.kernel.org" <io-uring@vger.kernel.org>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Christoph Hellwig <hch@infradead.org>,
	Joseph Qi <joseph.qi@linux.alibaba.com>,
	Jeffle Xu <jefflexu@linux.alibaba.com>
Subject: Re: [PATCH v1 2/3] virtio-blk: add uring_cmd support for I/O passthru on chardev.
Date: Wed, 26 Feb 2025 20:04:11 +0800	[thread overview]
Message-ID: <86311349-5ce2-4a67-87d2-c0080946bb7a@linux.alibaba.com> (raw)
In-Reply-To: <e7e8751a45334b9c8ac75ac8ed325d7c@huawei.com>


On 1/7/25 9:14 PM, lizetao wrote:
> Hi,
>
>> -----Original Message-----
>> From: Ferry Meng <mengferry@linux.alibaba.com>
>> Sent: Wednesday, December 18, 2024 5:25 PM
>> To: Michael S . Tsirkin <mst@redhat.com>; Jason Wang
>> <jasowang@redhat.com>; linux-block@vger.kernel.org; Jens Axboe
>> <axboe@kernel.dk>; virtualization@lists.linux.dev
>> Cc: linux-kernel@vger.kernel.org; io-uring@vger.kernel.org; Stefan Hajnoczi
>> <stefanha@redhat.com>; Christoph Hellwig <hch@infradead.org>; Joseph Qi
>> <joseph.qi@linux.alibaba.com>; Jeffle Xu <jefflexu@linux.alibaba.com>; Ferry
>> Meng <mengferry@linux.alibaba.com>
>> Subject: [PATCH v1 2/3] virtio-blk: add uring_cmd support for I/O passthru on
>> chardev.
>>
>> Add ->uring_cmd() support for virtio-blk chardev (/dev/vdXc0).
>> According to virtio spec, in addition to passing 'hdr' info into kernel, we also
>> need to pass vaddr & data length of the 'iov' requeired for the writev/readv op.
>>
>> Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
>> ---
>>   drivers/block/virtio_blk.c      | 223 +++++++++++++++++++++++++++++++-
>>   include/uapi/linux/virtio_blk.h |  16 +++
>>   2 files changed, 235 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index
>> 3487aaa67514..cd88cf939144 100644
>> --- a/drivers/block/virtio_blk.c
>> +++ b/drivers/block/virtio_blk.c
>> @@ -18,6 +18,9 @@
>>   #include <linux/vmalloc.h>
>>   #include <uapi/linux/virtio_ring.h>
>>   #include <linux/cdev.h>
>> +#include <linux/io_uring/cmd.h>
>> +#include <linux/types.h>
>> +#include <linux/uio.h>
>>
>>   #define PART_BITS 4
>>   #define VQ_NAME_LEN 16
>> @@ -54,6 +57,20 @@ static struct class *vd_chr_class;
>>
>>   static struct workqueue_struct *virtblk_wq;
>>
>> +struct virtblk_uring_cmd_pdu {
>> +	struct request *req;
>> +	struct bio *bio;
>> +	int status;
>> +};
>> +
>> +struct virtblk_command {
>> +	struct virtio_blk_outhdr out_hdr;
>> +
>> +	__u64	data;
>> +	__u32	data_len;
>> +	__u32	flag;
>> +};
>> +
>>   struct virtio_blk_vq {
>>   	struct virtqueue *vq;
>>   	spinlock_t lock;
>> @@ -122,6 +139,11 @@ struct virtblk_req {
>>   	struct scatterlist sg[];
>>   };
>>
>> +static void __user *virtblk_to_user_ptr(uintptr_t ptrval) {
>> +	return (void __user *)ptrval;
>> +}
>> +
>>   static inline blk_status_t virtblk_result(u8 status)  {
>>   	switch (status) {
>> @@ -259,9 +281,6 @@ static blk_status_t virtblk_setup_cmd(struct
>> virtio_device *vdev,
>>   	if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>> op_is_zone_mgmt(req_op(req)))
>>   		return BLK_STS_NOTSUPP;
>>
>> -	/* Set fields for all request types */
>> -	vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
>> -
>>   	switch (req_op(req)) {
>>   	case REQ_OP_READ:
>>   		type = VIRTIO_BLK_T_IN;
>> @@ -309,9 +328,11 @@ static blk_status_t virtblk_setup_cmd(struct
>> virtio_device *vdev,
>>   		type = VIRTIO_BLK_T_ZONE_RESET_ALL;
>>   		break;
>>   	case REQ_OP_DRV_IN:
>> +	case REQ_OP_DRV_OUT:
>>   		/*
>>   		 * Out header has already been prepared by the caller
>> (virtblk_get_id()
>> -		 * or virtblk_submit_zone_report()), nothing to do here.
>> +		 * virtblk_submit_zone_report() or io_uring passthrough cmd),
>> nothing
>> +		 * to do here.
>>   		 */
>>   		return 0;
>>   	default:
>> @@ -323,6 +344,7 @@ static blk_status_t virtblk_setup_cmd(struct
>> virtio_device *vdev,
>>   	vbr->in_hdr_len = in_hdr_len;
>>   	vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
>>   	vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector);
>> +	vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
>>
>>   	if (type == VIRTIO_BLK_T_DISCARD || type ==
>> VIRTIO_BLK_T_WRITE_ZEROES ||
>>   	    type == VIRTIO_BLK_T_SECURE_ERASE) { @@ -832,6 +854,7 @@
>> static int virtblk_get_id(struct gendisk *disk, char *id_str)
>>   	vbr = blk_mq_rq_to_pdu(req);
>>   	vbr->in_hdr_len = sizeof(vbr->in_hdr.status);
>>   	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev,
>> VIRTIO_BLK_T_GET_ID);
>> +	vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev,
>> +req_get_ioprio(req));
>>   	vbr->out_hdr.sector = 0;
>>
>>   	err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES,
>> GFP_KERNEL); @@ -1250,6 +1273,197 @@ static const struct blk_mq_ops
>> virtio_mq_ops = {
>>   	.poll		= virtblk_poll,
>>   };
>>
>> +static inline struct virtblk_uring_cmd_pdu *virtblk_get_uring_cmd_pdu(
>> +		struct io_uring_cmd *ioucmd)
>> +{
>> +	return (struct virtblk_uring_cmd_pdu *)&ioucmd->pdu; }
>> +
>> +static void virtblk_uring_task_cb(struct io_uring_cmd *ioucmd,
>> +		unsigned int issue_flags)
>> +{
>> +	struct virtblk_uring_cmd_pdu *pdu =
>> virtblk_get_uring_cmd_pdu(ioucmd);
>> +	struct virtblk_req *vbr = blk_mq_rq_to_pdu(pdu->req);
>> +	u64 result = 0;
>> +
>> +	if (pdu->bio)
>> +		blk_rq_unmap_user(pdu->bio);
>> +
>> +	/* currently result has no use, it should be zero as cqe->res */
>> +	io_uring_cmd_done(ioucmd, vbr->in_hdr.status, result, issue_flags); }
>> +
>> +static enum rq_end_io_ret virtblk_uring_cmd_end_io(struct request *req,
>> +						   blk_status_t err)
>> +{
>> +	struct io_uring_cmd *ioucmd = req->end_io_data;
>> +	struct virtblk_uring_cmd_pdu *pdu =
>> virtblk_get_uring_cmd_pdu(ioucmd);
>> +
>> +	/*
>> +	 * For iopoll, complete it directly. Note that using the uring_cmd
>> +	 * helper for this is safe only because we check blk_rq_is_poll().
>> +	 * As that returns false if we're NOT on a polled queue, then it's
>> +	 * safe to use the polled completion helper.
>> +	 *
>> +	 * Otherwise, move the completion to task work.
>> +	 */
>> +	if (blk_rq_is_poll(req)) {
>> +		if (pdu->bio)
>> +			blk_rq_unmap_user(pdu->bio);
>> +		io_uring_cmd_iopoll_done(ioucmd, 0, pdu->status);
>> +	} else {
>> +		io_uring_cmd_do_in_task_lazy(ioucmd, virtblk_uring_task_cb);
>> +	}
>> +
>> +	return RQ_END_IO_FREE;
>> +}
>> +
>> +static struct virtblk_req *virtblk_req(struct request *req) {
>> +	return blk_mq_rq_to_pdu(req);
>> +}
>> +
>> +static inline enum req_op virtblk_req_op(const struct virtblk_uring_cmd
>> +*cmd) {
>> +	return (cmd->type & VIRTIO_BLK_T_OUT) ? REQ_OP_DRV_OUT :
>> +REQ_OP_DRV_IN; }
>> +
>> +static struct request *virtblk_alloc_user_request(
>> +		struct request_queue *q, struct virtblk_command *cmd,
>> +		blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags) {
>> +	struct request *req;
>> +
>> +	req = blk_mq_alloc_request(q, rq_flags, blk_flags);
>> +	if (IS_ERR(req))
>> +		return req;
>> +
>> +	req->rq_flags |= RQF_DONTPREP;
>> +	memcpy(&virtblk_req(req)->out_hdr, &cmd->out_hdr, sizeof(struct
>> virtio_blk_outhdr));
>> +	return req;
>> +}
>> +
>> +static int virtblk_map_user_request(struct request *req, u64 ubuffer,
>> +		unsigned int bufflen, struct io_uring_cmd *ioucmd,
>> +		bool vec)
>> +{
>> +	struct request_queue *q = req->q;
>> +	struct virtio_blk *vblk = q->queuedata;
>> +	struct block_device *bdev = vblk ? vblk->disk->part0 : NULL;
>> +	struct bio *bio = NULL;
>> +	int ret;
>> +
>> +	if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
>> +		struct iov_iter iter;
>> +
>> +		/* fixedbufs is only for non-vectored io */
>> +		if (WARN_ON_ONCE(vec))
>> +			return -EINVAL;
> Shoule be goto out here? Or req will not be free. And I suggest to
> free request in virtblk_uring_cmd_io().
Thx, u are right. This will be fixed in v2 series.
I will send out v2 in next week.
>> +		ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
>> +				rq_data_dir(req), &iter, ioucmd);
>> +		if (ret < 0)
>> +			goto out;
>> +		ret = blk_rq_map_user_iov(q, req, NULL,
>> +			&iter, GFP_KERNEL);
>> +	} else {
>> +		ret = blk_rq_map_user_io(req, NULL,
>> +				virtblk_to_user_ptr(ubuffer),
>> +				bufflen, GFP_KERNEL, vec, 0,
>> +				0, rq_data_dir(req));
>> +	}
>> +	if (ret)
>> +		goto out;
>> +
>> +	bio = req->bio;
>> +	if (bdev)
>> +		bio_set_dev(bio, bdev);
>> +	return 0;
>> +
>> +out:
>> +	blk_mq_free_request(req);
>> +	return ret;
>> +}
>> +
>> +static int virtblk_uring_cmd_io(struct virtio_blk *vblk,
>> +		struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool
>> vec) {
>> +	struct virtblk_uring_cmd_pdu *pdu =
>> virtblk_get_uring_cmd_pdu(ioucmd);
>> +	const struct virtblk_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd-
>>> sqe);
>> +	struct request_queue *q = vblk->disk->queue;
>> +	struct virtblk_req *vbr;
>> +	struct virtblk_command d;
>> +	struct request *req;
>> +	blk_opf_t rq_flags = REQ_ALLOC_CACHE | virtblk_req_op(cmd);
>> +	blk_mq_req_flags_t blk_flags = 0;
>> +	int ret;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EACCES;
>> +
>> +	d.out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, READ_ONCE(cmd-
>>> ioprio));
>> +	d.out_hdr.type = cpu_to_virtio32(vblk->vdev, READ_ONCE(cmd->type));
>> +	d.out_hdr.sector = cpu_to_virtio64(vblk->vdev, READ_ONCE(cmd-
>>> sector));
>> +	d.data = READ_ONCE(cmd->data);
>> +	d.data_len = READ_ONCE(cmd->data_len);
>> +
>> +	if (issue_flags & IO_URING_F_NONBLOCK) {
>> +		rq_flags |= REQ_NOWAIT;
>> +		blk_flags = BLK_MQ_REQ_NOWAIT;
>> +	}
>> +	if (issue_flags & IO_URING_F_IOPOLL)
>> +		rq_flags |= REQ_POLLED;
>> +
>> +	req = virtblk_alloc_user_request(q, &d, rq_flags, blk_flags);
>> +	if (IS_ERR(req))
>> +		return PTR_ERR(req);
>> +
>> +	vbr = virtblk_req(req);
>> +	vbr->in_hdr_len = sizeof(vbr->in_hdr.status);
>> +	if (d.data && d.data_len) {
>> +		ret = virtblk_map_user_request(req, d.data, d.data_len,
>> ioucmd, vec);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	/* to free bio on completion, as req->bio will be null at that time */
>> +	pdu->bio = req->bio;
>> +	pdu->req = req;
>> +	req->end_io_data = ioucmd;
>> +	req->end_io = virtblk_uring_cmd_end_io;
>> +	blk_execute_rq_nowait(req, false);
>> +	return -EIOCBQUEUED;
>> +}
>> +
>> +
>> +static int virtblk_uring_cmd(struct virtio_blk *vblk, struct io_uring_cmd
>> *ioucmd,
>> +			     unsigned int issue_flags)
>> +{
>> +	int ret;
>> +
>> +	BUILD_BUG_ON(sizeof(struct virtblk_uring_cmd_pdu) >
>> +sizeof(ioucmd->pdu));
>> +
>> +	switch (ioucmd->cmd_op) {
>> +	case VIRTBLK_URING_CMD_IO:
>> +		ret = virtblk_uring_cmd_io(vblk, ioucmd, issue_flags, false);
>> +		break;
>> +	case VIRTBLK_URING_CMD_IO_VEC:
>> +		ret = virtblk_uring_cmd_io(vblk, ioucmd, issue_flags, true);
>> +		break;
>> +	default:
>> +		ret = -ENOTTY;
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static int virtblk_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned
>> +int issue_flags) {
>> +	struct virtio_blk *vblk = container_of(file_inode(ioucmd->file)->i_cdev,
>> +			struct virtio_blk, cdev);
>> +
>> +	return virtblk_uring_cmd(vblk, ioucmd, issue_flags); }
>> +
>>   static void virtblk_cdev_rel(struct device *dev)  {
>>   	ida_free(&vd_chr_minor_ida, MINOR(dev->devt)); @@ -1297,6
>> +1511,7 @@ static int virtblk_cdev_add(struct virtio_blk *vblk,
>>
>>   static const struct file_operations virtblk_chr_fops = {
>>   	.owner		= THIS_MODULE,
>> +	.uring_cmd	= virtblk_chr_uring_cmd,
>>   };
>>
>>   static unsigned int virtblk_queue_depth; diff --git
>> a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index
>> 3744e4da1b2a..93b6e1b5b9a4 100644
>> --- a/include/uapi/linux/virtio_blk.h
>> +++ b/include/uapi/linux/virtio_blk.h
>> @@ -313,6 +313,22 @@ struct virtio_scsi_inhdr {  };  #endif
>> /* !VIRTIO_BLK_NO_LEGACY */
>>
>> +struct virtblk_uring_cmd {
>> +	/* VIRTIO_BLK_T* */
>> +	__u32 type;
>> +	/* io priority. */
>> +	__u32 ioprio;
>> +	/* Sector (ie. 512 byte offset) */
>> +	__u64 sector;
>> +
>> +	__u64 data;
>> +	__u32 data_len;
>> +	__u32 flag;
>> +};
>> +
>> +#define VIRTBLK_URING_CMD_IO		1
>> +#define VIRTBLK_URING_CMD_IO_VEC	2
>> +
>>   /* And this is the final byte of the write scatter-gather list. */
>>   #define VIRTIO_BLK_S_OK		0
>>   #define VIRTIO_BLK_S_IOERR	1
>> --
>> 2.43.5
>>
>>

  reply	other threads:[~2025-02-26 12:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-18  9:24 [PATCH v1 0/3] virtio-blk: add io_uring passthrough support Ferry Meng
2024-12-18  9:24 ` [PATCH v1 1/3] virtio-blk: add virtio-blk chardev support Ferry Meng
2024-12-30  7:47   ` Joseph Qi
2025-01-07  4:53   ` Jingbo Xu
2024-12-18  9:24 ` [PATCH v1 2/3] virtio-blk: add uring_cmd support for I/O passthru on chardev Ferry Meng
2024-12-30  8:00   ` Joseph Qi
2025-01-07 13:14   ` lizetao
2025-02-26 12:04     ` Ferry Meng [this message]
2024-12-18  9:24 ` [PATCH v1 3/3] virtio-blk: add uring_cmd iopoll support Ferry Meng
2025-01-09 17:27 ` [PATCH v1 0/3] virtio-blk: add io_uring passthrough support Stefan Hajnoczi
2025-02-19  2:01 ` Stefan Hajnoczi
2025-02-26 11:10   ` Ferry Meng
2025-02-27  6:57     ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=86311349-5ce2-4a67-87d2-c0080946bb7a@linux.alibaba.com \
    --to=mengferry@linux.alibaba.com \
    --cc=hch@infradead.org \
    --cc=io-uring@vger.kernel.org \
    --cc=jefflexu@linux.alibaba.com \
    --cc=joseph.qi@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizetao1@huawei.com \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.