From: Josef Bacik <josef@toxicpanda.com>
To: Bernd Schubert <bschubert@ddn.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>,
Amir Goldstein <amir73il@gmail.com>,
linux-fsdevel@vger.kernel.org, bernd.schubert@fastmail.fm
Subject: Re: [PATCH RFC v2 10/19] fuse: {uring} Handle SQEs - register commands
Date: Thu, 30 May 2024 15:55:59 -0400 [thread overview]
Message-ID: <20240530195559.GB2210558@perftesting> (raw)
In-Reply-To: <20240529-fuse-uring-for-6-9-rfc2-out-v1-10-d149476b1d65@ddn.com>
On Wed, May 29, 2024 at 08:00:45PM +0200, Bernd Schubert wrote:
> This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
> For now only FUSE_URING_REQ_FETCH is handled to register queue entries.
>
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
> fs/fuse/dev.c | 1 +
> fs/fuse/dev_uring.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/fuse/dev_uring_i.h | 12 +++
> include/uapi/linux/fuse.h | 33 ++++++
> 4 files changed, 313 insertions(+)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index cd5dc6ae9272..05a87731b5c3 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -2510,6 +2510,7 @@ const struct file_operations fuse_dev_operations = {
> .compat_ioctl = compat_ptr_ioctl,
> #if IS_ENABLED(CONFIG_FUSE_IO_URING)
> .mmap = fuse_uring_mmap,
> + .uring_cmd = fuse_uring_cmd,
> #endif
> };
> EXPORT_SYMBOL_GPL(fuse_dev_operations);
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 2c0ccb378908..48b1118b64f4 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -31,6 +31,27 @@
> #include <linux/topology.h>
> #include <linux/io_uring/cmd.h>
>
> +static void fuse_ring_ring_ent_unset_userspace(struct fuse_ring_ent *ent)
> +{
> + clear_bit(FRRS_USERSPACE, &ent->state);
> + list_del_init(&ent->list);
> +}
> +
> +/* Update conn limits according to ring values */
> +static void fuse_uring_conn_cfg_limits(struct fuse_ring *ring)
> +{
> + struct fuse_conn *fc = ring->fc;
> +
> + WRITE_ONCE(fc->max_pages, min_t(unsigned int, fc->max_pages,
> + ring->req_arg_len / PAGE_SIZE));
> +
> + /* This not ideal, as multiplication with nr_queue assumes the limit
> + * gets reached when all queues are used, but a single threaded
> + * application might already do that.
> + */
> + WRITE_ONCE(fc->max_background, ring->nr_queues * ring->max_nr_async);
> +}
> +
> /*
> * Basic ring setup for this connection based on the provided configuration
> */
> @@ -329,3 +350,249 @@ int fuse_uring_queue_cfg(struct fuse_ring *ring,
> return 0;
> }
>
> +/*
> + * Put a ring request onto hold, it is no longer used for now.
> + */
> +static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
> + struct fuse_ring_queue *queue)
> + __must_hold(&queue->lock)
Sorry I'm just now bringing this up, but I'd love to see a
lockdep_assert_held(<whatever lock>);
in every place where you use __must_hold, so I get a nice big warning when I'm
running stuff. I don't always run sparse, but I always test with lockdep on,
and that'll help me notice problems.
> +{
> + struct fuse_ring *ring = queue->ring;
> +
> + /* unsets all previous flags - basically resets */
> + pr_devel("%s ring=%p qid=%d tag=%d state=%lu async=%d\n", __func__,
> + ring, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
> + ring_ent->async);
> +
> + if (WARN_ON(test_bit(FRRS_USERSPACE, &ring_ent->state))) {
> + pr_warn("%s qid=%d tag=%d state=%lu async=%d\n", __func__,
> + ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
> + ring_ent->async);
> + return;
> + }
> +
> + WARN_ON_ONCE(!list_empty(&ring_ent->list));
> +
> + if (ring_ent->async)
> + list_add(&ring_ent->list, &queue->async_ent_avail_queue);
> + else
> + list_add(&ring_ent->list, &queue->sync_ent_avail_queue);
> +
> + set_bit(FRRS_WAIT, &ring_ent->state);
> +}
> +
> +/*
> + * fuse_uring_req_fetch command handling
> + */
> +static int fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
> + struct io_uring_cmd *cmd, unsigned int issue_flags)
> +__must_hold(ring_ent->queue->lock)
> +{
> + struct fuse_ring_queue *queue = ring_ent->queue;
> + struct fuse_ring *ring = queue->ring;
> + int ret = 0;
> + int nr_ring_sqe;
> +
> + /* register requests for foreground requests first, then backgrounds */
> + if (queue->nr_req_sync >= ring->max_nr_sync) {
> + queue->nr_req_async++;
> + ring_ent->async = 1;
> + } else
> + queue->nr_req_sync++;
IIRC the style guidelines say if you use { in any part of the if, you've got to
use them for all of it. But that may just be what we do in btrfs. Normally I
wouldn't nit about it but I have comments elsewhere for this patch.
> +
> + fuse_uring_ent_avail(ring_ent, queue);
> +
> + if (queue->nr_req_sync + queue->nr_req_async > ring->queue_depth) {
> + /* should be caught by ring state before and queue depth
> + * check before
> + */
> + WARN_ON(1);
> + pr_info("qid=%d tag=%d req cnt (fg=%d async=%d exceeds depth=%zu",
> + queue->qid, ring_ent->tag, queue->nr_req_sync,
> + queue->nr_req_async, ring->queue_depth);
> + ret = -ERANGE;
> + }
> +
> + if (ret)
> + goto out; /* erange */
This can just be
if (whatever) {
WARN_ON_ONCE(1);
return -ERANGE;
}
instead of the goto out thing.
> +
> + WRITE_ONCE(ring_ent->cmd, cmd);
> +
> + nr_ring_sqe = ring->queue_depth * ring->nr_queues;
> + if (atomic_inc_return(&ring->nr_sqe_init) == nr_ring_sqe) {
> + fuse_uring_conn_cfg_limits(ring);
> + ring->ready = 1;
> + }
> +
> +out:
> + return ret;
And this can just be return 0 here with the above change.
> +}
> +
> +static struct fuse_ring_queue *
> +fuse_uring_get_verify_queue(struct fuse_ring *ring,
> + const struct fuse_uring_cmd_req *cmd_req,
> + unsigned int issue_flags)
> +{
> + struct fuse_conn *fc = ring->fc;
> + struct fuse_ring_queue *queue;
> + int ret;
> +
> + if (!(issue_flags & IO_URING_F_SQE128)) {
> + pr_info("qid=%d tag=%d SQE128 not set\n", cmd_req->qid,
> + cmd_req->tag);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + if (unlikely(!fc->connected)) {
> + ret = -ENOTCONN;
> + goto err;
> + }
> +
> + if (unlikely(!ring->configured)) {
> + pr_info("command for a connection that is not ring configured\n");
> + ret = -ENODEV;
> + goto err;
> + }
> +
> + if (unlikely(cmd_req->qid >= ring->nr_queues)) {
> + pr_devel("qid=%u >= nr-queues=%zu\n", cmd_req->qid,
> + ring->nr_queues);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + queue = fuse_uring_get_queue(ring, cmd_req->qid);
> + if (unlikely(queue == NULL)) {
> + pr_info("Got NULL queue for qid=%d\n", cmd_req->qid);
> + ret = -EIO;
> + goto err;
> + }
> +
> + if (unlikely(!queue->configured || queue->stopped)) {
> + pr_info("Ring or queue (qid=%u) not ready.\n", cmd_req->qid);
> + ret = -ENOTCONN;
> + goto err;
> + }
> +
> + if (cmd_req->tag > ring->queue_depth) {
> + pr_info("tag=%u > queue-depth=%zu\n", cmd_req->tag,
> + ring->queue_depth);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + return queue;
> +
> +err:
> + return ERR_PTR(ret);
There's no cleanup here, so just make all the above
return ERR_PTR(-whatever)
instead of the goto err thing.
> +}
> +
> +/**
> + * Entry function from io_uring to handle the given passthrough command
> + * (op cocde IORING_OP_URING_CMD)
> + */
Docstyle thing.
> +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
> +{
> + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
> + struct fuse_dev *fud = fuse_get_dev(cmd->file);
> + struct fuse_conn *fc = fud->fc;
> + struct fuse_ring *ring = fc->ring;
> + struct fuse_ring_queue *queue;
> + struct fuse_ring_ent *ring_ent = NULL;
> + u32 cmd_op = cmd->cmd_op;
> + int ret = 0;
> +
> + if (!ring) {
> + ret = -ENODEV;
> + goto out;
> + }
> +
> + queue = fuse_uring_get_verify_queue(ring, cmd_req, issue_flags);
> + if (IS_ERR(queue)) {
> + ret = PTR_ERR(queue);
> + goto out;
> + }
> +
> + ring_ent = &queue->ring_ent[cmd_req->tag];
> +
> + pr_devel("%s:%d received: cmd op %d qid %d (%p) tag %d (%p)\n",
> + __func__, __LINE__, cmd_op, cmd_req->qid, queue, cmd_req->tag,
> + ring_ent);
> +
> + spin_lock(&queue->lock);
> + if (unlikely(queue->stopped)) {
> + /* XXX how to ensure queue still exists? Add
> + * an rw ring->stop lock? And take that at the beginning
> + * of this function? Better would be to advise uring
> + * not to call this function at all? Or free the queue memory
> + * only, on daemon PF_EXITING?
> + */
> + ret = -ENOTCONN;
> + goto err_unlock;
> + }
> +
> + if (current == queue->server_task)
> + queue->uring_cmd_issue_flags = issue_flags;
> +
> + switch (cmd_op) {
> + case FUSE_URING_REQ_FETCH:
This is all organized kind of oddly, I think I'd prefer if you put all the code
from above where we grab the queue lock and the bit below into a helper.
So instead of
spin_lock(&queue->lock);
blah
switch (cmd_op) {
case FUSE_URING_REQ_FETCH:
blah
default:
ret = -EINVAL;
}
you have
static int fuse_uring_req_fetch(queue, cmd, issue_flags)
{
ring_ent = blah;
spin_lock(&queue->lock);
<blah>
spin_unlock(&que->lock);
return ret;
}
then
switch (cmd_op) {
case FUSE_URING_REQ_FETCH:
ret = fuse_uring_req_fetch(queue, cmd, issue_flags);
break;
default:
ret = -EINVAL;
break;
}
Alternatively just pushe all the queue stuff down into the case
FUSE_URING_REQ_FETCH part, but I think the helper is cleaner.
> + if (queue->server_task == NULL) {
> + queue->server_task = current;
> + queue->uring_cmd_issue_flags = issue_flags;
> + }
> +
> + /* No other bit must be set here */
> + if (ring_ent->state != BIT(FRRS_INIT)) {
> + pr_info_ratelimited(
> + "qid=%d tag=%d register req state %lu expected %lu",
> + cmd_req->qid, cmd_req->tag, ring_ent->state,
> + BIT(FRRS_INIT));
> + ret = -EINVAL;
> + goto err_unlock;
> + }
> +
> + fuse_ring_ring_ent_unset_userspace(ring_ent);
> +
> + ret = fuse_uring_fetch(ring_ent, cmd, issue_flags);
> + if (ret)
> + goto err_unlock;
> +
> + /*
> + * The ring entry is registered now and needs to be handled
> + * for shutdown.
> + */
> + atomic_inc(&ring->queue_refs);
> +
> + spin_unlock(&queue->lock);
> + break;
> + default:
> + ret = -EINVAL;
> + pr_devel("Unknown uring command %d", cmd_op);
> + goto err_unlock;
> + }
> +out:
> + pr_devel("uring cmd op=%d, qid=%d tag=%d ret=%d\n", cmd_op,
> + cmd_req->qid, cmd_req->tag, ret);
> +
> + if (ret < 0) {
> + if (ring_ent != NULL) {
You don't pull anything from ring_ent in the pr_info, so maybe drop the extra
if statement? Thanks,
Josef
next prev parent reply other threads:[~2024-05-30 19:56 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 18:00 [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 01/19] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 02/19] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 03/19] fuse: Move request bits Bernd Schubert
2024-05-29 21:10 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 04/19] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-05-29 21:17 ` Josef Bacik
2024-05-30 12:50 ` Bernd Schubert
2024-05-30 14:59 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 05/19] fuse: Add a uring config ioctl Bernd Schubert
2024-05-29 21:24 ` Josef Bacik
2024-05-30 12:51 ` Bernd Schubert
2024-06-03 13:03 ` Miklos Szeredi
2024-06-03 13:48 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 06/19] Add a vmalloc_node_user function Bernd Schubert
2024-05-30 15:10 ` Josef Bacik
2024-05-30 16:13 ` Bernd Schubert
2024-05-31 13:56 ` Christoph Hellwig
2024-06-03 15:59 ` Kent Overstreet
2024-06-03 19:24 ` Bernd Schubert
2024-06-04 4:20 ` Christoph Hellwig
2024-06-07 2:30 ` Dave Chinner
2024-06-07 4:49 ` Christoph Hellwig
2024-06-04 4:08 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 07/19] fuse uring: Add an mmap method Bernd Schubert
2024-05-30 15:37 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 08/19] fuse: Add the queue configuration ioctl Bernd Schubert
2024-05-30 15:54 ` Josef Bacik
2024-05-30 17:49 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 09/19] fuse: {uring} Add a dev_release exception for fuse-over-io-uring Bernd Schubert
2024-05-30 19:00 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 10/19] fuse: {uring} Handle SQEs - register commands Bernd Schubert
2024-05-30 19:55 ` Josef Bacik [this message]
2024-05-29 18:00 ` [PATCH RFC v2 11/19] fuse: Add support to copy from/to the ring buffer Bernd Schubert
2024-05-30 19:59 ` Josef Bacik
2024-09-01 11:56 ` Bernd Schubert
2024-09-01 11:56 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 12/19] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-05-30 20:08 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 13/19] fuse: {uring} Handle uring shutdown Bernd Schubert
2024-05-30 20:21 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 14/19] fuse: {uring} Allow to queue to the ring Bernd Schubert
2024-05-30 20:32 ` Josef Bacik
2024-05-30 21:26 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 15/19] export __wake_on_current_cpu Bernd Schubert
2024-05-30 20:37 ` Josef Bacik
2024-06-04 9:26 ` Peter Zijlstra
2024-06-04 9:36 ` Bernd Schubert
2024-06-04 19:27 ` Peter Zijlstra
2024-09-01 12:07 ` Bernd Schubert
2024-05-31 13:51 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 16/19] fuse: {uring} Wake requests on the the current cpu Bernd Schubert
2024-05-30 16:44 ` Shachar Sharon
2024-05-30 16:59 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 17/19] fuse: {uring} Send async requests to qid of core + 1 Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 18/19] fuse: {uring} Set a min cpu offset io-size for reads/writes Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 19/19] fuse: {uring} Optimize async sends Bernd Schubert
2024-05-31 16:24 ` Jens Axboe
2024-05-31 17:36 ` Bernd Schubert
2024-05-31 19:10 ` Jens Axboe
2024-06-01 16:37 ` Bernd Schubert
2024-05-30 7:07 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Amir Goldstein
2024-05-30 12:09 ` Bernd Schubert
2024-05-30 15:36 ` Kent Overstreet
2024-05-30 16:02 ` Bernd Schubert
2024-05-30 16:10 ` Kent Overstreet
2024-05-30 16:17 ` Bernd Schubert
2024-05-30 17:30 ` Kent Overstreet
2024-05-30 19:09 ` Josef Bacik
2024-05-30 20:05 ` Kent Overstreet
2024-05-31 3:53 ` [PATCH] fs: sys_ringbuffer() (WIP) Kent Overstreet
2024-05-31 13:11 ` kernel test robot
2024-05-31 15:49 ` kernel test robot
2024-05-30 16:21 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Jens Axboe
2024-05-30 16:32 ` Bernd Schubert
2024-05-30 17:26 ` Jens Axboe
2024-05-30 17:16 ` Kent Overstreet
2024-05-30 17:28 ` Jens Axboe
2024-05-30 17:58 ` Kent Overstreet
2024-05-30 18:48 ` Jens Axboe
2024-05-30 19:35 ` Kent Overstreet
2024-05-31 0:11 ` Jens Axboe
2024-06-04 23:45 ` Ming Lei
2024-05-30 20:47 ` Josef Bacik
2024-06-11 8:20 ` Miklos Szeredi
2024-06-11 10:26 ` Bernd Schubert
2024-06-11 15:35 ` Miklos Szeredi
2024-06-11 17:37 ` Bernd Schubert
2024-06-11 23:35 ` Kent Overstreet
2024-06-12 13:53 ` Bernd Schubert
2024-06-12 14:19 ` Kent Overstreet
2024-06-12 15:40 ` Bernd Schubert
2024-06-12 15:55 ` Kent Overstreet
2024-06-12 16:15 ` Bernd Schubert
2024-06-12 16:24 ` Kent Overstreet
2024-06-12 16:44 ` Bernd Schubert
2024-06-12 7:39 ` Miklos Szeredi
2024-06-12 13:32 ` Bernd Schubert
2024-06-12 13:46 ` Bernd Schubert
2024-06-12 14:07 ` Miklos Szeredi
2024-06-12 14:56 ` Bernd Schubert
2024-08-02 23:03 ` Bernd Schubert
2024-08-29 22:32 ` Bernd Schubert
2024-08-30 13:12 ` Jens Axboe
2024-08-30 13:28 ` Bernd Schubert
2024-08-30 13:33 ` Jens Axboe
2024-08-30 14:55 ` Pavel Begunkov
2024-08-30 15:10 ` Bernd Schubert
2024-08-30 20:08 ` Jens Axboe
2024-08-31 0:02 ` Bernd Schubert
2024-08-31 0:49 ` Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240530195559.GB2210558@perftesting \
--to=josef@toxicpanda.com \
--cc=amir73il@gmail.com \
--cc=bernd.schubert@fastmail.fm \
--cc=bschubert@ddn.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.