From: Jens Axboe <axboe@kernel.dk>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Pavel Begunkov <asml.silence@gmail.com>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: Re: [git pull] iov_iter fixes
Date: Fri, 10 Sep 2021 10:06:25 -0600 [thread overview]
Message-ID: <1a61c333-680d-71a0-3849-5bfef555a49f@kernel.dk> (raw)
In-Reply-To: <4b26d8cd-c3fa-8536-a295-850ecf052ecd@kernel.dk>
On 9/10/21 9:04 AM, Jens Axboe wrote:
> We could pretty this up and have the state part be explicit in iov_iter,
> and have the store/restore parts end up in uio.h. That'd tie them closer
> together, though I don't expect iov_iter changes to be an issue. It
> would make it more maintainable, though. I'll try and hack up this
> generic solution, see if that looks any better.
Looks something like this. Not super pretty in terms of needing a define
for this, and maybe I'm missing something, but ideally we'd want it as
an anonymous struct that's defined inside iov_iter. Anyway, gets the
point across. Alternatively, since we're down to just a few members now,
we just duplicate them in each struct...
Would be split into two patches, one for the iov_state addition and
the save/restore helpers, and then one switching io_uring to use them.
Figured we'd need some agreement on this first...
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 855ea544807f..539c94299d64 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2608,8 +2608,6 @@ static bool io_resubmit_prep(struct io_kiocb *req)
if (!rw)
return !io_req_prep_async(req);
- /* may have left rw->iter inconsistent on -EIOCBQUEUED */
- iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
return true;
}
@@ -3431,14 +3429,23 @@ static bool need_read_all(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
+static void io_iter_restore(struct iov_iter *iter, struct iov_iter_state *state,
+ ssize_t did_bytes)
+{
+ iov_iter_restore_state(iter, state);
+ if (did_bytes > 0)
+ iov_iter_advance(iter, did_bytes);
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
- ssize_t io_size, ret, ret2;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct iov_iter_state state;
+ ssize_t ret, ret2;
if (rw) {
iter = &rw->iter;
@@ -3448,8 +3455,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
}
- io_size = iov_iter_count(iter);
- req->result = io_size;
+ req->result = iov_iter_count(iter);
+ iov_iter_save_state(iter, &state);
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
@@ -3463,7 +3470,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
return ret ?: -EAGAIN;
}
- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
+ ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), state.count);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -3479,18 +3486,17 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
/* no retry on NONBLOCK nor RWF_NOWAIT */
if (req->flags & REQ_F_NOWAIT)
goto done;
- /* some cases will consume bytes even on error returns */
- iov_iter_reexpand(iter, iter->count + iter->truncated);
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
- } else if (ret <= 0 || ret == io_size || !force_nonblock ||
+ } else if (ret <= 0 || ret == state.count || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
+ io_iter_restore(iter, &state, ret);
+
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
if (ret2)
return ret2;
@@ -3501,7 +3507,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
iter = &rw->iter;
do {
- io_size -= ret;
+ state.count -= ret;
rw->bytes_done += ret;
/* if we can retry, do so with the callbacks armed */
if (!io_rw_should_retry(req)) {
@@ -3520,7 +3526,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
return 0;
/* we got some bytes, but not all. retry. */
kiocb->ki_flags &= ~IOCB_WAITQ;
- } while (ret > 0 && ret < io_size);
+ } while (ret > 0 && ret < state.count);
done:
kiocb_done(kiocb, ret, issue_flags);
out_free:
@@ -3543,8 +3549,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
- ssize_t ret, ret2, io_size;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct iov_iter_state state;
+ ssize_t ret, ret2;
if (rw) {
iter = &rw->iter;
@@ -3554,8 +3561,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
}
- io_size = iov_iter_count(iter);
- req->result = io_size;
+ req->result = iov_iter_count(iter);
+ iov_iter_save_state(iter, &state);
+ ret2 = 0;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
@@ -3572,7 +3580,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
(req->flags & REQ_F_ISREG))
goto copy_iov;
- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
+ ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), state.count);
if (unlikely(ret))
goto out_free;
@@ -3619,9 +3627,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb_done(kiocb, ret2, issue_flags);
} else {
copy_iov:
- /* some cases will consume bytes even on error returns */
- iov_iter_reexpand(iter, iter->count + iter->truncated);
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ io_iter_restore(iter, &state, ret2);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
return ret ?: -EAGAIN;
}
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5265024e8b90..4f9d483096cd 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -27,11 +27,25 @@ enum iter_type {
ITER_DISCARD,
};
+#define IOV_ITER_STATE \
+ size_t iov_offset; \
+ size_t count; \
+ union { \
+ unsigned long nr_segs; \
+ struct { \
+ unsigned int head; \
+ unsigned int start_head; \
+ }; \
+ loff_t xarray_start; \
+ }; \
+
+struct iov_iter_state {
+ IOV_ITER_STATE;
+};
+
struct iov_iter {
u8 iter_type;
bool data_source;
- size_t iov_offset;
- size_t count;
union {
const struct iovec *iov;
const struct kvec *kvec;
@@ -40,12 +54,10 @@ struct iov_iter {
struct pipe_inode_info *pipe;
};
union {
- unsigned long nr_segs;
+ struct iov_iter_state state;
struct {
- unsigned int head;
- unsigned int start_head;
+ IOV_ITER_STATE;
};
- loff_t xarray_start;
};
size_t truncated;
};
@@ -55,6 +67,33 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->iter_type;
}
+static inline void iov_iter_save_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ *state = iter->state;
+}
+
+static inline void iov_iter_restore_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ iter->iov_offset = state->iov_offset;
+ iter->count = state->count;
+
+ switch (iov_iter_type(iter)) {
+ case ITER_IOVEC:
+ case ITER_KVEC:
+ case ITER_BVEC:
+ iter->iov -= state->nr_segs - iter->nr_segs;
+ fallthrough;
+ case ITER_PIPE:
+ case ITER_XARRAY:
+ iter->nr_segs = state->nr_segs;
+ break;
+ case ITER_DISCARD:
+ break;
+ }
+}
+
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_IOVEC;
--
Jens Axboe
next prev parent reply other threads:[~2021-09-10 16:10 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-09 4:22 [git pull] iov_iter fixes Al Viro
2021-09-09 19:37 ` Linus Torvalds
2021-09-09 21:19 ` Jens Axboe
2021-09-09 21:39 ` Jens Axboe
2021-09-09 21:56 ` Linus Torvalds
2021-09-09 22:21 ` Jens Axboe
2021-09-09 22:56 ` Linus Torvalds
2021-09-10 1:35 ` Jens Axboe
2021-09-10 2:43 ` Jens Axboe
2021-09-10 2:48 ` Al Viro
2021-09-10 3:06 ` Jens Axboe
2021-09-10 3:15 ` Al Viro
2021-09-10 3:23 ` Jens Axboe
2021-09-10 3:24 ` Al Viro
2021-09-10 3:28 ` Jens Axboe
2021-09-13 15:29 ` David Laight
2021-09-09 21:42 ` Dave Chinner
2021-09-10 2:57 ` Al Viro
2021-09-10 3:05 ` Jens Axboe
2021-09-10 3:11 ` Al Viro
2021-09-10 3:22 ` Jens Axboe
2021-09-10 3:27 ` Al Viro
2021-09-10 3:30 ` Jens Axboe
2021-09-10 3:36 ` Al Viro
2021-09-10 13:57 ` Jens Axboe
2021-09-10 14:42 ` Al Viro
2021-09-10 15:08 ` Jens Axboe
2021-09-10 15:32 ` Al Viro
2021-09-10 15:36 ` Jens Axboe
2021-09-10 15:04 ` Jens Axboe
2021-09-10 16:06 ` Jens Axboe [this message]
2021-09-10 16:44 ` Linus Torvalds
2021-09-10 16:56 ` Al Viro
2021-09-10 16:58 ` Linus Torvalds
2021-09-10 17:26 ` Jens Axboe
2021-09-10 17:31 ` Linus Torvalds
2021-09-10 17:32 ` Jens Axboe
2021-09-10 18:48 ` Al Viro
2021-09-10 19:04 ` Linus Torvalds
2021-09-10 19:10 ` Linus Torvalds
2021-09-10 19:10 ` Jens Axboe
2021-09-10 17:04 ` Jens Axboe
2021-09-09 22:54 ` Pavel Begunkov
2021-09-09 22:57 ` Pavel Begunkov
2021-09-09 23:14 ` Pavel Begunkov
2021-09-09 20:03 ` pr-tracker-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1a61c333-680d-71a0-3849-5bfef555a49f@kernel.dk \
--to=axboe@kernel.dk \
--cc=asml.silence@gmail.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).