From: Josef Bacik <josef@toxicpanda.com>
To: Bernd Schubert <bschubert@ddn.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>,
Amir Goldstein <amir73il@gmail.com>,
linux-fsdevel@vger.kernel.org, bernd.schubert@fastmail.fm
Subject: Re: [PATCH RFC v2 10/19] fuse: {uring} Handle SQEs - register commands
Date: Thu, 30 May 2024 15:55:59 -0400 [thread overview]
Message-ID: <20240530195559.GB2210558@perftesting> (raw)
In-Reply-To: <20240529-fuse-uring-for-6-9-rfc2-out-v1-10-d149476b1d65@ddn.com>
On Wed, May 29, 2024 at 08:00:45PM +0200, Bernd Schubert wrote:
> This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
> For now only FUSE_URING_REQ_FETCH is handled to register queue entries.
>
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
> fs/fuse/dev.c | 1 +
> fs/fuse/dev_uring.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/fuse/dev_uring_i.h | 12 +++
> include/uapi/linux/fuse.h | 33 ++++++
> 4 files changed, 313 insertions(+)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index cd5dc6ae9272..05a87731b5c3 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -2510,6 +2510,7 @@ const struct file_operations fuse_dev_operations = {
> .compat_ioctl = compat_ptr_ioctl,
> #if IS_ENABLED(CONFIG_FUSE_IO_URING)
> .mmap = fuse_uring_mmap,
> + .uring_cmd = fuse_uring_cmd,
> #endif
> };
> EXPORT_SYMBOL_GPL(fuse_dev_operations);
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 2c0ccb378908..48b1118b64f4 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -31,6 +31,27 @@
> #include <linux/topology.h>
> #include <linux/io_uring/cmd.h>
>
> +static void fuse_ring_ring_ent_unset_userspace(struct fuse_ring_ent *ent)
> +{
> + clear_bit(FRRS_USERSPACE, &ent->state);
> + list_del_init(&ent->list);
> +}
> +
> +/* Update conn limits according to ring values */
> +static void fuse_uring_conn_cfg_limits(struct fuse_ring *ring)
> +{
> + struct fuse_conn *fc = ring->fc;
> +
> + WRITE_ONCE(fc->max_pages, min_t(unsigned int, fc->max_pages,
> + ring->req_arg_len / PAGE_SIZE));
> +
> + /* This not ideal, as multiplication with nr_queue assumes the limit
> + * gets reached when all queues are used, but a single threaded
> + * application might already do that.
> + */
> + WRITE_ONCE(fc->max_background, ring->nr_queues * ring->max_nr_async);
> +}
> +
> /*
> * Basic ring setup for this connection based on the provided configuration
> */
> @@ -329,3 +350,249 @@ int fuse_uring_queue_cfg(struct fuse_ring *ring,
> return 0;
> }
>
> +/*
> + * Put a ring request onto hold, it is no longer used for now.
> + */
> +static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
> + struct fuse_ring_queue *queue)
> + __must_hold(&queue->lock)
Sorry I'm just now bringing this up, but I'd love to see a
lockdep_assert_held(<whatever lock>);
in every place where you use __must_hold, so I get a nice big warning when I'm
running stuff. I don't always run sparse, but I always test with lockdep on,
and that'll help me notice problems.
> +{
> + struct fuse_ring *ring = queue->ring;
> +
> + /* unsets all previous flags - basically resets */
> + pr_devel("%s ring=%p qid=%d tag=%d state=%lu async=%d\n", __func__,
> + ring, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
> + ring_ent->async);
> +
> + if (WARN_ON(test_bit(FRRS_USERSPACE, &ring_ent->state))) {
> + pr_warn("%s qid=%d tag=%d state=%lu async=%d\n", __func__,
> + ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
> + ring_ent->async);
> + return;
> + }
> +
> + WARN_ON_ONCE(!list_empty(&ring_ent->list));
> +
> + if (ring_ent->async)
> + list_add(&ring_ent->list, &queue->async_ent_avail_queue);
> + else
> + list_add(&ring_ent->list, &queue->sync_ent_avail_queue);
> +
> + set_bit(FRRS_WAIT, &ring_ent->state);
> +}
> +
> +/*
> + * fuse_uring_req_fetch command handling
> + */
> +static int fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
> + struct io_uring_cmd *cmd, unsigned int issue_flags)
> +__must_hold(ring_ent->queue->lock)
> +{
> + struct fuse_ring_queue *queue = ring_ent->queue;
> + struct fuse_ring *ring = queue->ring;
> + int ret = 0;
> + int nr_ring_sqe;
> +
> + /* register requests for foreground requests first, then backgrounds */
> + if (queue->nr_req_sync >= ring->max_nr_sync) {
> + queue->nr_req_async++;
> + ring_ent->async = 1;
> + } else
> + queue->nr_req_sync++;
IIRC the style guidelines say if you use { in any part of the if, you've got to
use them for all of it. But that may just be what we do in btrfs. Normally I
wouldn't nit about it but I have comments elsewhere for this patch.
> +
> + fuse_uring_ent_avail(ring_ent, queue);
> +
> + if (queue->nr_req_sync + queue->nr_req_async > ring->queue_depth) {
> + /* should be caught by ring state before and queue depth
> + * check before
> + */
> + WARN_ON(1);
> + pr_info("qid=%d tag=%d req cnt (fg=%d async=%d exceeds depth=%zu",
> + queue->qid, ring_ent->tag, queue->nr_req_sync,
> + queue->nr_req_async, ring->queue_depth);
> + ret = -ERANGE;
> + }
> +
> + if (ret)
> + goto out; /* erange */
This can just be
if (whatever) {
WARN_ON_ONCE(1);
return -ERANGE;
}
instead of the goto out thing.
> +
> + WRITE_ONCE(ring_ent->cmd, cmd);
> +
> + nr_ring_sqe = ring->queue_depth * ring->nr_queues;
> + if (atomic_inc_return(&ring->nr_sqe_init) == nr_ring_sqe) {
> + fuse_uring_conn_cfg_limits(ring);
> + ring->ready = 1;
> + }
> +
> +out:
> + return ret;
And this can just be return 0 here with the above change.
> +}
> +
> +static struct fuse_ring_queue *
> +fuse_uring_get_verify_queue(struct fuse_ring *ring,
> + const struct fuse_uring_cmd_req *cmd_req,
> + unsigned int issue_flags)
> +{
> + struct fuse_conn *fc = ring->fc;
> + struct fuse_ring_queue *queue;
> + int ret;
> +
> + if (!(issue_flags & IO_URING_F_SQE128)) {
> + pr_info("qid=%d tag=%d SQE128 not set\n", cmd_req->qid,
> + cmd_req->tag);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + if (unlikely(!fc->connected)) {
> + ret = -ENOTCONN;
> + goto err;
> + }
> +
> + if (unlikely(!ring->configured)) {
> + pr_info("command for a connection that is not ring configured\n");
> + ret = -ENODEV;
> + goto err;
> + }
> +
> + if (unlikely(cmd_req->qid >= ring->nr_queues)) {
> + pr_devel("qid=%u >= nr-queues=%zu\n", cmd_req->qid,
> + ring->nr_queues);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + queue = fuse_uring_get_queue(ring, cmd_req->qid);
> + if (unlikely(queue == NULL)) {
> + pr_info("Got NULL queue for qid=%d\n", cmd_req->qid);
> + ret = -EIO;
> + goto err;
> + }
> +
> + if (unlikely(!queue->configured || queue->stopped)) {
> + pr_info("Ring or queue (qid=%u) not ready.\n", cmd_req->qid);
> + ret = -ENOTCONN;
> + goto err;
> + }
> +
> + if (cmd_req->tag > ring->queue_depth) {
> + pr_info("tag=%u > queue-depth=%zu\n", cmd_req->tag,
> + ring->queue_depth);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + return queue;
> +
> +err:
> + return ERR_PTR(ret);
There's no cleanup here, so just make all the above
return ERR_PTR(-whatever)
instead of the goto err thing.
> +}
> +
> +/**
> + * Entry function from io_uring to handle the given passthrough command
> + * (op cocde IORING_OP_URING_CMD)
> + */
Docstyle thing.
> +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
> +{
> + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
> + struct fuse_dev *fud = fuse_get_dev(cmd->file);
> + struct fuse_conn *fc = fud->fc;
> + struct fuse_ring *ring = fc->ring;
> + struct fuse_ring_queue *queue;
> + struct fuse_ring_ent *ring_ent = NULL;
> + u32 cmd_op = cmd->cmd_op;
> + int ret = 0;
> +
> + if (!ring) {
> + ret = -ENODEV;
> + goto out;
> + }
> +
> + queue = fuse_uring_get_verify_queue(ring, cmd_req, issue_flags);
> + if (IS_ERR(queue)) {
> + ret = PTR_ERR(queue);
> + goto out;
> + }
> +
> + ring_ent = &queue->ring_ent[cmd_req->tag];
> +
> + pr_devel("%s:%d received: cmd op %d qid %d (%p) tag %d (%p)\n",
> + __func__, __LINE__, cmd_op, cmd_req->qid, queue, cmd_req->tag,
> + ring_ent);
> +
> + spin_lock(&queue->lock);
> + if (unlikely(queue->stopped)) {
> + /* XXX how to ensure queue still exists? Add
> + * an rw ring->stop lock? And take that at the beginning
> + * of this function? Better would be to advise uring
> + * not to call this function at all? Or free the queue memory
> + * only, on daemon PF_EXITING?
> + */
> + ret = -ENOTCONN;
> + goto err_unlock;
> + }
> +
> + if (current == queue->server_task)
> + queue->uring_cmd_issue_flags = issue_flags;
> +
> + switch (cmd_op) {
> + case FUSE_URING_REQ_FETCH:
This is all organized kind of oddly, I think I'd prefer if you put all the code
from above where we grab the queue lock and the bit below into a helper.
So instead of
spin_lock(&queue->lock);
blah
switch (cmd_op) {
case FUSE_URING_REQ_FETCH:
blah
default:
ret = -EINVAL;
}
you have
static int fuse_uring_req_fetch(queue, cmd, issue_flags)
{
ring_ent = blah;
spin_lock(&queue->lock);
<blah>
spin_unlock(&que->lock);
return ret;
}
then
switch (cmd_op) {
case FUSE_URING_REQ_FETCH:
ret = fuse_uring_req_fetch(queue, cmd, issue_flags);
break;
default:
ret = -EINVAL;
break;
}
Alternatively just pushe all the queue stuff down into the case
FUSE_URING_REQ_FETCH part, but I think the helper is cleaner.
> + if (queue->server_task == NULL) {
> + queue->server_task = current;
> + queue->uring_cmd_issue_flags = issue_flags;
> + }
> +
> + /* No other bit must be set here */
> + if (ring_ent->state != BIT(FRRS_INIT)) {
> + pr_info_ratelimited(
> + "qid=%d tag=%d register req state %lu expected %lu",
> + cmd_req->qid, cmd_req->tag, ring_ent->state,
> + BIT(FRRS_INIT));
> + ret = -EINVAL;
> + goto err_unlock;
> + }
> +
> + fuse_ring_ring_ent_unset_userspace(ring_ent);
> +
> + ret = fuse_uring_fetch(ring_ent, cmd, issue_flags);
> + if (ret)
> + goto err_unlock;
> +
> + /*
> + * The ring entry is registered now and needs to be handled
> + * for shutdown.
> + */
> + atomic_inc(&ring->queue_refs);
> +
> + spin_unlock(&queue->lock);
> + break;
> + default:
> + ret = -EINVAL;
> + pr_devel("Unknown uring command %d", cmd_op);
> + goto err_unlock;
> + }
> +out:
> + pr_devel("uring cmd op=%d, qid=%d tag=%d ret=%d\n", cmd_op,
> + cmd_req->qid, cmd_req->tag, ret);
> +
> + if (ret < 0) {
> + if (ring_ent != NULL) {
You don't pull anything from ring_ent in the pr_info, so maybe drop the extra
if statement? Thanks,
Josef
next prev parent reply other threads:[~2024-05-30 19:56 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 18:00 [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 01/19] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 02/19] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 03/19] fuse: Move request bits Bernd Schubert
2024-05-29 21:10 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 04/19] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-05-29 21:17 ` Josef Bacik
2024-05-30 12:50 ` Bernd Schubert
2024-05-30 14:59 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 05/19] fuse: Add a uring config ioctl Bernd Schubert
2024-05-29 21:24 ` Josef Bacik
2024-05-30 12:51 ` Bernd Schubert
2024-06-03 13:03 ` Miklos Szeredi
2024-06-03 13:48 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 06/19] Add a vmalloc_node_user function Bernd Schubert
2024-05-30 15:10 ` Josef Bacik
2024-05-30 16:13 ` Bernd Schubert
2024-05-31 13:56 ` Christoph Hellwig
2024-06-03 15:59 ` Kent Overstreet
2024-06-03 19:24 ` Bernd Schubert
2024-06-04 4:20 ` Christoph Hellwig
2024-06-07 2:30 ` Dave Chinner
2024-06-07 4:49 ` Christoph Hellwig
2024-06-04 4:08 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 07/19] fuse uring: Add an mmap method Bernd Schubert
2024-05-30 15:37 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 08/19] fuse: Add the queue configuration ioctl Bernd Schubert
2024-05-30 15:54 ` Josef Bacik
2024-05-30 17:49 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 09/19] fuse: {uring} Add a dev_release exception for fuse-over-io-uring Bernd Schubert
2024-05-30 19:00 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 10/19] fuse: {uring} Handle SQEs - register commands Bernd Schubert
2024-05-30 19:55 ` Josef Bacik [this message]
2024-05-29 18:00 ` [PATCH RFC v2 11/19] fuse: Add support to copy from/to the ring buffer Bernd Schubert
2024-05-30 19:59 ` Josef Bacik
2024-09-01 11:56 ` Bernd Schubert
2024-09-01 11:56 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 12/19] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-05-30 20:08 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 13/19] fuse: {uring} Handle uring shutdown Bernd Schubert
2024-05-30 20:21 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 14/19] fuse: {uring} Allow to queue to the ring Bernd Schubert
2024-05-30 20:32 ` Josef Bacik
2024-05-30 21:26 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 15/19] export __wake_on_current_cpu Bernd Schubert
2024-05-30 20:37 ` Josef Bacik
2024-06-04 9:26 ` Peter Zijlstra
2024-06-04 9:36 ` Bernd Schubert
2024-06-04 19:27 ` Peter Zijlstra
2024-09-01 12:07 ` Bernd Schubert
2024-05-31 13:51 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 16/19] fuse: {uring} Wake requests on the the current cpu Bernd Schubert
2024-05-30 16:44 ` Shachar Sharon
2024-05-30 16:59 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 17/19] fuse: {uring} Send async requests to qid of core + 1 Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 18/19] fuse: {uring} Set a min cpu offset io-size for reads/writes Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 19/19] fuse: {uring} Optimize async sends Bernd Schubert
2024-05-31 16:24 ` Jens Axboe
2024-05-31 17:36 ` Bernd Schubert
2024-05-31 19:10 ` Jens Axboe
2024-06-01 16:37 ` Bernd Schubert
2024-05-30 7:07 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Amir Goldstein
2024-05-30 12:09 ` Bernd Schubert
2024-05-30 15:36 ` Kent Overstreet
2024-05-30 16:02 ` Bernd Schubert
2024-05-30 16:10 ` Kent Overstreet
2024-05-30 16:17 ` Bernd Schubert
2024-05-30 17:30 ` Kent Overstreet
2024-05-30 19:09 ` Josef Bacik
2024-05-30 20:05 ` Kent Overstreet
2024-05-31 3:53 ` [PATCH] fs: sys_ringbuffer() (WIP) Kent Overstreet
2024-05-31 13:11 ` kernel test robot
2024-05-31 15:49 ` kernel test robot
2024-05-30 16:21 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Jens Axboe
2024-05-30 16:32 ` Bernd Schubert
2024-05-30 17:26 ` Jens Axboe
2024-05-30 17:16 ` Kent Overstreet
2024-05-30 17:28 ` Jens Axboe
2024-05-30 17:58 ` Kent Overstreet
2024-05-30 18:48 ` Jens Axboe
2024-05-30 19:35 ` Kent Overstreet
2024-05-31 0:11 ` Jens Axboe
2024-06-04 23:45 ` Ming Lei
2024-05-30 20:47 ` Josef Bacik
2024-06-11 8:20 ` Miklos Szeredi
2024-06-11 10:26 ` Bernd Schubert
2024-06-11 15:35 ` Miklos Szeredi
2024-06-11 17:37 ` Bernd Schubert
2024-06-11 23:35 ` Kent Overstreet
2024-06-12 13:53 ` Bernd Schubert
2024-06-12 14:19 ` Kent Overstreet
2024-06-12 15:40 ` Bernd Schubert
2024-06-12 15:55 ` Kent Overstreet
2024-06-12 16:15 ` Bernd Schubert
2024-06-12 16:24 ` Kent Overstreet
2024-06-12 16:44 ` Bernd Schubert
2024-06-12 7:39 ` Miklos Szeredi
2024-06-12 13:32 ` Bernd Schubert
2024-06-12 13:46 ` Bernd Schubert
2024-06-12 14:07 ` Miklos Szeredi
2024-06-12 14:56 ` Bernd Schubert
2024-08-02 23:03 ` Bernd Schubert
2024-08-29 22:32 ` Bernd Schubert
2024-08-30 13:12 ` Jens Axboe
2024-08-30 13:28 ` Bernd Schubert
2024-08-30 13:33 ` Jens Axboe
2024-08-30 14:55 ` Pavel Begunkov
2024-08-30 15:10 ` Bernd Schubert
2024-08-30 20:08 ` Jens Axboe
2024-08-31 0:02 ` Bernd Schubert
2024-08-31 0:49 ` Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240530195559.GB2210558@perftesting \
--to=josef@toxicpanda.com \
--cc=amir73il@gmail.com \
--cc=bernd.schubert@fastmail.fm \
--cc=bschubert@ddn.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).