From: Josef Bacik <josef@toxicpanda.com>
To: Bernd Schubert <bschubert@ddn.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>,
Amir Goldstein <amir73il@gmail.com>,
linux-fsdevel@vger.kernel.org, bernd.schubert@fastmail.fm
Subject: Re: [PATCH RFC v2 12/19] fuse: {uring} Add uring sqe commit and fetch support
Date: Thu, 30 May 2024 16:08:23 -0400 [thread overview]
Message-ID: <20240530200823.GD2210558@perftesting> (raw)
In-Reply-To: <20240529-fuse-uring-for-6-9-rfc2-out-v1-12-d149476b1d65@ddn.com>
On Wed, May 29, 2024 at 08:00:47PM +0200, Bernd Schubert wrote:
> This adds support for fuse request completion through ring SQEs
> (FUSE_URING_REQ_COMMIT_AND_FETCH handling). After committing
> the ring entry it becomes available for new fuse requests.
> Handling of requests through the ring (SQE/CQE handling)
> is complete now.
>
> Fuse request data are copied through the mmaped ring buffer,
> there is no support for any zero copy yet.
>
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
> fs/fuse/dev_uring.c | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 311 insertions(+)
>
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 48b1118b64f4..5269b3f8891e 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -31,12 +31,23 @@
> #include <linux/topology.h>
> #include <linux/io_uring/cmd.h>
>
> +static void fuse_uring_req_end_and_get_next(struct fuse_ring_ent *ring_ent,
> + bool set_err, int error,
> + unsigned int issue_flags);
> +
Just order this above all the users instead of putting a declaration here.
> static void fuse_ring_ring_ent_unset_userspace(struct fuse_ring_ent *ent)
> {
> clear_bit(FRRS_USERSPACE, &ent->state);
> list_del_init(&ent->list);
> }
>
> +static void
> +fuse_uring_async_send_to_ring(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + io_uring_cmd_done(cmd, 0, 0, issue_flags);
> +}
> +
> /* Update conn limits according to ring values */
> static void fuse_uring_conn_cfg_limits(struct fuse_ring *ring)
> {
> @@ -350,6 +361,188 @@ int fuse_uring_queue_cfg(struct fuse_ring *ring,
> return 0;
> }
>
> +/*
> + * Checks for errors and stores it into the request
> + */
> +static int fuse_uring_ring_ent_has_err(struct fuse_ring *ring,
> + struct fuse_ring_ent *ring_ent)
> +{
> + struct fuse_conn *fc = ring->fc;
> + struct fuse_req *req = ring_ent->fuse_req;
> + struct fuse_out_header *oh = &req->out.h;
> + int err;
> +
> + if (oh->unique == 0) {
> + /* Not supportd through request based uring, this needs another
> + * ring from user space to kernel
> + */
> + pr_warn("Unsupported fuse-notify\n");
> + err = -EINVAL;
> + goto seterr;
> + }
> +
> + if (oh->error <= -512 || oh->error > 0) {
What is -512? No magic numbers please.
> + err = -EINVAL;
> + goto seterr;
> + }
> +
> + if (oh->error) {
> + err = oh->error;
> + pr_devel("%s:%d err=%d op=%d req-ret=%d", __func__, __LINE__,
> + err, req->args->opcode, req->out.h.error);
> + goto err; /* error already set */
> + }
> +
> + if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
> + pr_warn("Unpexted seqno mismatch, expected: %llu got %llu\n",
> + req->in.h.unique, oh->unique & ~FUSE_INT_REQ_BIT);
> + err = -ENOENT;
> + goto seterr;
> + }
> +
> + /* Is it an interrupt reply ID? */
> + if (oh->unique & FUSE_INT_REQ_BIT) {
> + err = 0;
> + if (oh->error == -ENOSYS)
> + fc->no_interrupt = 1;
> + else if (oh->error == -EAGAIN) {
> + /* XXX Interrupts not handled yet */
> + /* err = queue_interrupt(req); */
> + pr_warn("Intrerupt EAGAIN not supported yet");
> + err = -EINVAL;
> + }
> +
> + goto seterr;
> + }
> +
> + return 0;
> +
> +seterr:
> + pr_devel("%s:%d err=%d op=%d req-ret=%d", __func__, __LINE__, err,
> + req->args->opcode, req->out.h.error);
> + oh->error = err;
> +err:
> + pr_devel("%s:%d err=%d op=%d req-ret=%d", __func__, __LINE__, err,
> + req->args->opcode, req->out.h.error);
> + return err;
> +}
> +
> +/*
> + * Copy data from the ring buffer to the fuse request
> + */
> +static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
> + struct fuse_req *req,
> + struct fuse_ring_req *rreq)
> +{
> + struct fuse_copy_state cs;
> + struct fuse_args *args = req->args;
> +
> + fuse_copy_init(&cs, 0, NULL);
> + cs.is_uring = 1;
> + cs.ring.buf = rreq->in_out_arg;
> +
> + if (rreq->in_out_arg_len > ring->req_arg_len) {
> + pr_devel("Max ring buffer len exceeded (%u vs %zu\n",
> + rreq->in_out_arg_len, ring->req_arg_len);
> + return -EINVAL;
> + }
> + cs.ring.buf_sz = rreq->in_out_arg_len;
> + cs.req = req;
> +
> + pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
> + cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
> +
> + return fuse_copy_out_args(&cs, args, rreq->in_out_arg_len);
> +}
> +
> +/*
> + * Copy data from the req to the ring buffer
> + */
> +static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
> + struct fuse_ring_req *rreq)
> +{
> + struct fuse_copy_state cs;
> + struct fuse_args *args = req->args;
> + int err;
> +
> + fuse_copy_init(&cs, 1, NULL);
> + cs.is_uring = 1;
> + cs.ring.buf = rreq->in_out_arg;
> + cs.ring.buf_sz = ring->req_arg_len;
> + cs.req = req;
> +
> + pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
> + cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
> +
> + err = fuse_copy_args(&cs, args->in_numargs, args->in_pages,
> + (struct fuse_arg *)args->in_args, 0);
> + rreq->in_out_arg_len = cs.ring.offset;
Is this ok if there's an error? I genuinely don't know, maybe add a comment for
idiots like me?
> +
> + pr_devel("%s:%d buf=%p len=%d args=%d err=%d\n", __func__, __LINE__,
> + cs.ring.buf, cs.ring.buf_sz, args->out_numargs, err);
> +
> + return err;
> +}
> +
> +/*
> + * Write data to the ring buffer and send the request to userspace,
> + * userspace will read it
> + * This is comparable with classical read(/dev/fuse)
> + */
> +static void fuse_uring_send_to_ring(struct fuse_ring_ent *ring_ent,
> + unsigned int issue_flags, bool send_in_task)
> +{
> + struct fuse_ring *ring = ring_ent->queue->ring;
> + struct fuse_ring_req *rreq = ring_ent->rreq;
> + struct fuse_req *req = ring_ent->fuse_req;
> + struct fuse_ring_queue *queue = ring_ent->queue;
> + int err = 0;
> +
> + spin_lock(&queue->lock);
> +
> + if (WARN_ON(test_bit(FRRS_USERSPACE, &ring_ent->state) ||
> + (test_bit(FRRS_FREED, &ring_ent->state)))) {
WARN_ON(x || b)
Makes me sad when it trips because IDK which one it was, please make them have
their own warn condition.
Also I don't love using WARN_ON() in an if statement if it can be avoided, so
maybe
if (test_bit() || test_bit()) {
WARN_ON_ONCE(test_bit(USERSPACE));
WARN_ON_ONCE(test_bit(FREED));
err = -EIO;
}
Also again I'm sorry for not bringing this up early, I'd prefer WARN_ON_ONCE().
History has shown me many a hung box because I thought this would never happen
and now it's spewing stack traces to my slow ass serial console and I can't get
the box to respond at all.
> + pr_err("qid=%d tag=%d ring-req=%p buf_req=%p invalid state %lu on send\n",
> + queue->qid, ring_ent->tag, ring_ent, rreq,
> + ring_ent->state);
> + err = -EIO;
> + } else {
> + set_bit(FRRS_USERSPACE, &ring_ent->state);
> + list_add(&ring_ent->list, &queue->ent_in_userspace);
> + }
> +
> + spin_unlock(&queue->lock);
> + if (err)
> + goto err;
> +
> + err = fuse_uring_copy_to_ring(ring, req, rreq);
> + if (unlikely(err)) {
> + spin_lock(&queue->lock);
> + fuse_ring_ring_ent_unset_userspace(ring_ent);
> + spin_unlock(&queue->lock);
> + goto err;
> + }
> +
> + /* ring req go directly into the shared memory buffer */
> + rreq->in = req->in.h;
> + set_bit(FR_SENT, &req->flags);
> +
> + pr_devel("%s qid=%d tag=%d state=%lu cmd-done op=%d unique=%llu issue_flags=%u\n",
> + __func__, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
> + rreq->in.opcode, rreq->in.unique, issue_flags);
> +
> + if (send_in_task)
> + io_uring_cmd_complete_in_task(ring_ent->cmd,
> + fuse_uring_async_send_to_ring);
> + else
> + io_uring_cmd_done(ring_ent->cmd, 0, 0, issue_flags);
> +
> + return;
> +
> +err:
> + fuse_uring_req_end_and_get_next(ring_ent, true, err, issue_flags);
> +}
> +
> /*
> * Put a ring request onto hold, it is no longer used for now.
> */
> @@ -381,6 +574,104 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
> set_bit(FRRS_WAIT, &ring_ent->state);
> }
>
> +/*
> + * Assign a fuse queue entry to the given entry
> + */
> +static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ring_ent,
> + struct fuse_req *req)
> +{
> + clear_bit(FRRS_WAIT, &ring_ent->state);
> + list_del_init(&req->list);
> + clear_bit(FR_PENDING, &req->flags);
> + ring_ent->fuse_req = req;
> + set_bit(FRRS_FUSE_REQ, &ring_ent->state);
> +}
> +
> +/*
> + * Release a uring entry and fetch the next fuse request if available
> + *
> + * @return true if a new request has been fetched
> + */
> +static bool fuse_uring_ent_release_and_fetch(struct fuse_ring_ent *ring_ent)
> +{
> + struct fuse_req *req = NULL;
> + struct fuse_ring_queue *queue = ring_ent->queue;
> + struct list_head *req_queue = ring_ent->async ?
> + &queue->async_fuse_req_queue : &queue->sync_fuse_req_queue;
> +
> + spin_lock(&ring_ent->queue->lock);
> + fuse_uring_ent_avail(ring_ent, queue);
> + if (!list_empty(req_queue)) {
> + req = list_first_entry(req_queue, struct fuse_req, list);
> + fuse_uring_add_req_to_ring_ent(ring_ent, req);
> + list_del_init(&ring_ent->list);
> + }
> + spin_unlock(&ring_ent->queue->lock);
> +
> + return req ? true : false;
> +}
> +
> +/*
> + * Finalize a fuse request, then fetch and send the next entry, if available
> + *
> + * has lock/unlock/lock to avoid holding the lock on calling fuse_request_end
> + */
> +static void fuse_uring_req_end_and_get_next(struct fuse_ring_ent *ring_ent,
> + bool set_err, int error,
> + unsigned int issue_flags)
> +{
> + struct fuse_req *req = ring_ent->fuse_req;
> + int has_next;
> +
> + if (set_err)
> + req->out.h.error = error;
The set_err thing seems redundant since we always have it set to true if error
is set, so just drop this bit and set error if there's an error.
> +
> + clear_bit(FR_SENT, &req->flags);
> + fuse_request_end(ring_ent->fuse_req);
> + ring_ent->fuse_req = NULL;
> + clear_bit(FRRS_FUSE_REQ, &ring_ent->state);
> +
> + has_next = fuse_uring_ent_release_and_fetch(ring_ent);
> + if (has_next) {
> + /* called within uring context - use provided flags */
> + fuse_uring_send_to_ring(ring_ent, issue_flags, false);
> + }
> +}
> +
> +/*
> + * Read data from the ring buffer, which user space has written to
> + * This is comparible with handling of classical write(/dev/fuse).
> + * Also make the ring request available again for new fuse requests.
> + */
> +static void fuse_uring_commit_and_release(struct fuse_dev *fud,
> + struct fuse_ring_ent *ring_ent,
> + unsigned int issue_flags)
> +{
> + struct fuse_ring_req *rreq = ring_ent->rreq;
> + struct fuse_req *req = ring_ent->fuse_req;
> + ssize_t err = 0;
> + bool set_err = false;
> +
> + req->out.h = rreq->out;
> +
> + err = fuse_uring_ring_ent_has_err(fud->fc->ring, ring_ent);
> + if (err) {
> + /* req->out.h.error already set */
> + pr_devel("%s:%d err=%zd oh->err=%d\n", __func__, __LINE__, err,
> + req->out.h.error);
> + goto out;
> + }
> +
> + err = fuse_uring_copy_from_ring(fud->fc->ring, req, rreq);
> + if (err)
> + set_err = true;
> +
> +out:
> + pr_devel("%s:%d ret=%zd op=%d req-ret=%d\n", __func__, __LINE__, err,
> + req->args->opcode, req->out.h.error);
> + fuse_uring_req_end_and_get_next(ring_ent, set_err, err, issue_flags);
> +}
> +
> /*
> * fuse_uring_req_fetch command handling
> */
> @@ -566,6 +857,26 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
>
> spin_unlock(&queue->lock);
> break;
> + case FUSE_URING_REQ_COMMIT_AND_FETCH:
> + if (unlikely(!ring->ready)) {
> + pr_info("commit and fetch, but fuse-uringis not ready.");
> + goto err_unlock;
> + }
> +
> + if (!test_bit(FRRS_USERSPACE, &ring_ent->state)) {
> + pr_info("qid=%d tag=%d state %lu SQE already handled\n",
> + queue->qid, ring_ent->tag, ring_ent->state);
> + goto err_unlock;
> + }
> +
> + fuse_ring_ring_ent_unset_userspace(ring_ent);
> + spin_unlock(&queue->lock);
> +
> + WRITE_ONCE(ring_ent->cmd, cmd);
> + fuse_uring_commit_and_release(fud, ring_ent, issue_flags);
> +
> + ret = 0;
> + break;
Hmm ok this changes my comments on the previous patch slightly, tho I think
still it would be better to push this code into a helper as well and do the
locking in there, let me go look at the resulting code...yeah ok I think it's
still better to just have these two cases in their own helper. Thanks,
Josef
next prev parent reply other threads:[~2024-05-30 20:08 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 18:00 [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 01/19] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 02/19] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-05-29 21:09 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 03/19] fuse: Move request bits Bernd Schubert
2024-05-29 21:10 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 04/19] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-05-29 21:17 ` Josef Bacik
2024-05-30 12:50 ` Bernd Schubert
2024-05-30 14:59 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 05/19] fuse: Add a uring config ioctl Bernd Schubert
2024-05-29 21:24 ` Josef Bacik
2024-05-30 12:51 ` Bernd Schubert
2024-06-03 13:03 ` Miklos Szeredi
2024-06-03 13:48 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 06/19] Add a vmalloc_node_user function Bernd Schubert
2024-05-30 15:10 ` Josef Bacik
2024-05-30 16:13 ` Bernd Schubert
2024-05-31 13:56 ` Christoph Hellwig
2024-06-03 15:59 ` Kent Overstreet
2024-06-03 19:24 ` Bernd Schubert
2024-06-04 4:20 ` Christoph Hellwig
2024-06-07 2:30 ` Dave Chinner
2024-06-07 4:49 ` Christoph Hellwig
2024-06-04 4:08 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 07/19] fuse uring: Add an mmap method Bernd Schubert
2024-05-30 15:37 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 08/19] fuse: Add the queue configuration ioctl Bernd Schubert
2024-05-30 15:54 ` Josef Bacik
2024-05-30 17:49 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 09/19] fuse: {uring} Add a dev_release exception for fuse-over-io-uring Bernd Schubert
2024-05-30 19:00 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 10/19] fuse: {uring} Handle SQEs - register commands Bernd Schubert
2024-05-30 19:55 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 11/19] fuse: Add support to copy from/to the ring buffer Bernd Schubert
2024-05-30 19:59 ` Josef Bacik
2024-09-01 11:56 ` Bernd Schubert
2024-09-01 11:56 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 12/19] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-05-30 20:08 ` Josef Bacik [this message]
2024-05-29 18:00 ` [PATCH RFC v2 13/19] fuse: {uring} Handle uring shutdown Bernd Schubert
2024-05-30 20:21 ` Josef Bacik
2024-05-29 18:00 ` [PATCH RFC v2 14/19] fuse: {uring} Allow to queue to the ring Bernd Schubert
2024-05-30 20:32 ` Josef Bacik
2024-05-30 21:26 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 15/19] export __wake_on_current_cpu Bernd Schubert
2024-05-30 20:37 ` Josef Bacik
2024-06-04 9:26 ` Peter Zijlstra
2024-06-04 9:36 ` Bernd Schubert
2024-06-04 19:27 ` Peter Zijlstra
2024-09-01 12:07 ` Bernd Schubert
2024-05-31 13:51 ` Christoph Hellwig
2024-05-29 18:00 ` [PATCH RFC v2 16/19] fuse: {uring} Wake requests on the the current cpu Bernd Schubert
2024-05-30 16:44 ` Shachar Sharon
2024-05-30 16:59 ` Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 17/19] fuse: {uring} Send async requests to qid of core + 1 Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 18/19] fuse: {uring} Set a min cpu offset io-size for reads/writes Bernd Schubert
2024-05-29 18:00 ` [PATCH RFC v2 19/19] fuse: {uring} Optimize async sends Bernd Schubert
2024-05-31 16:24 ` Jens Axboe
2024-05-31 17:36 ` Bernd Schubert
2024-05-31 19:10 ` Jens Axboe
2024-06-01 16:37 ` Bernd Schubert
2024-05-30 7:07 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Amir Goldstein
2024-05-30 12:09 ` Bernd Schubert
2024-05-30 15:36 ` Kent Overstreet
2024-05-30 16:02 ` Bernd Schubert
2024-05-30 16:10 ` Kent Overstreet
2024-05-30 16:17 ` Bernd Schubert
2024-05-30 17:30 ` Kent Overstreet
2024-05-30 19:09 ` Josef Bacik
2024-05-30 20:05 ` Kent Overstreet
2024-05-31 3:53 ` [PATCH] fs: sys_ringbuffer() (WIP) Kent Overstreet
2024-05-31 13:11 ` kernel test robot
2024-05-31 15:49 ` kernel test robot
2024-05-30 16:21 ` [PATCH RFC v2 00/19] fuse: fuse-over-io-uring Jens Axboe
2024-05-30 16:32 ` Bernd Schubert
2024-05-30 17:26 ` Jens Axboe
2024-05-30 17:16 ` Kent Overstreet
2024-05-30 17:28 ` Jens Axboe
2024-05-30 17:58 ` Kent Overstreet
2024-05-30 18:48 ` Jens Axboe
2024-05-30 19:35 ` Kent Overstreet
2024-05-31 0:11 ` Jens Axboe
2024-06-04 23:45 ` Ming Lei
2024-05-30 20:47 ` Josef Bacik
2024-06-11 8:20 ` Miklos Szeredi
2024-06-11 10:26 ` Bernd Schubert
2024-06-11 15:35 ` Miklos Szeredi
2024-06-11 17:37 ` Bernd Schubert
2024-06-11 23:35 ` Kent Overstreet
2024-06-12 13:53 ` Bernd Schubert
2024-06-12 14:19 ` Kent Overstreet
2024-06-12 15:40 ` Bernd Schubert
2024-06-12 15:55 ` Kent Overstreet
2024-06-12 16:15 ` Bernd Schubert
2024-06-12 16:24 ` Kent Overstreet
2024-06-12 16:44 ` Bernd Schubert
2024-06-12 7:39 ` Miklos Szeredi
2024-06-12 13:32 ` Bernd Schubert
2024-06-12 13:46 ` Bernd Schubert
2024-06-12 14:07 ` Miklos Szeredi
2024-06-12 14:56 ` Bernd Schubert
2024-08-02 23:03 ` Bernd Schubert
2024-08-29 22:32 ` Bernd Schubert
2024-08-30 13:12 ` Jens Axboe
2024-08-30 13:28 ` Bernd Schubert
2024-08-30 13:33 ` Jens Axboe
2024-08-30 14:55 ` Pavel Begunkov
2024-08-30 15:10 ` Bernd Schubert
2024-08-30 20:08 ` Jens Axboe
2024-08-31 0:02 ` Bernd Schubert
2024-08-31 0:49 ` Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240530200823.GD2210558@perftesting \
--to=josef@toxicpanda.com \
--cc=amir73il@gmail.com \
--cc=bernd.schubert@fastmail.fm \
--cc=bschubert@ddn.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.