From: Jens Axboe <axboe@kernel.dk>
To: Brian Foster <bfoster@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>,
"Kirill A. Shutemov" <kirill@shutemov.name>,
linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
hannes@cmpxchg.org, clm@meta.com, linux-kernel@vger.kernel.org,
willy@infradead.org
Subject: Re: [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED
Date: Tue, 12 Nov 2024 10:19:02 -0700 [thread overview]
Message-ID: <3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk> (raw)
In-Reply-To: <7a4ef71f-905e-4f2a-b3d2-8fd939c5a865@kernel.dk>
On 11/12/24 10:06 AM, Jens Axboe wrote:
> On 11/12/24 9:39 AM, Brian Foster wrote:
>> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote:
>>> On 11/11/24 10:13 PM, Christoph Hellwig wrote:
>>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote:
>>>>> Here's the slightly cleaned up version, this is the one I ran testing
>>>>> with.
>>>>
>>>> Looks reasonable to me, but you probably get better reviews on the
>>>> fstests lists.
>>>
>>> I'll send it out once this patchset is a bit closer to integration,
>>> there's the usual chicken and egg situation with it. For now, it's quite
>>> handy for my testing, found a few issues with this version. So thanks
>>> for the suggestion, sure beats writing more of your own test cases :-)
>>>
>>
>> fsx support is probably a good idea as well. It's similar in idea to
>> fsstress, but bashes the same file with mixed operations and includes
>> data integrity validation checks as well. It's pretty useful for
>> uncovering subtle corner case issues or bad interactions..
>
> Indeed, I did that too. Re-running xfstests right now with that too.
Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support
for both the sync read/write and io_uring paths.
diff --git a/ltp/fsx.c b/ltp/fsx.c
index 41933354..104910ff 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -43,6 +43,10 @@
# define MAP_FILE 0
#endif
+#ifndef RWF_UNCACHED
+#define RWF_UNCACHED 0x80
+#endif
+
#define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */
/* Operation flags (bitmask) */
@@ -101,7 +105,9 @@ int logcount = 0; /* total ops */
enum {
/* common operations */
OP_READ = 0,
+ OP_READ_UNCACHED,
OP_WRITE,
+ OP_WRITE_UNCACHED,
OP_MAPREAD,
OP_MAPWRITE,
OP_MAX_LITE,
@@ -190,15 +196,16 @@ int o_direct; /* -Z */
int aio = 0;
int uring = 0;
int mark_nr = 0;
+int rwf_uncached = 1;
int page_size;
int page_mask;
int mmap_mask;
-int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
#define READ 0
#define WRITE 1
-#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d)
+#define fsxread(a,b,c,d,f) fsx_rw(READ, a,b,c,d,f)
+#define fsxwrite(a,b,c,d,f) fsx_rw(WRITE, a,b,c,d,f)
struct timespec deadline;
@@ -266,7 +273,9 @@ prterr(const char *prefix)
static const char *op_names[] = {
[OP_READ] = "read",
+ [OP_READ_UNCACHED] = "read_uncached",
[OP_WRITE] = "write",
+ [OP_WRITE_UNCACHED] = "write_uncached",
[OP_MAPREAD] = "mapread",
[OP_MAPWRITE] = "mapwrite",
[OP_TRUNCATE] = "truncate",
@@ -393,12 +402,14 @@ logdump(void)
prt("\t******WWWW");
break;
case OP_READ:
+ case OP_READ_UNCACHED:
prt("READ 0x%x thru 0x%x\t(0x%x bytes)",
lp->args[0], lp->args[0] + lp->args[1] - 1,
lp->args[1]);
if (overlap)
prt("\t***RRRR***");
break;
+ case OP_WRITE_UNCACHED:
case OP_WRITE:
prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)",
lp->args[0], lp->args[0] + lp->args[1] - 1,
@@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
}
void
-doread(unsigned offset, unsigned size)
+__doread(unsigned offset, unsigned size, int flags)
{
- off_t ret;
unsigned iret;
offset -= offset % readbdy;
@@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size)
(monitorend == -1 || offset <= monitorend))))))
prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("doread: lseek");
- report_failure(140);
- }
- iret = fsxread(fd, temp_buf, size, offset);
+ iret = fsxread(fd, temp_buf, size, offset, flags);
if (iret != size) {
- if (iret == -1)
- prterr("doread: read");
- else
+ if (iret == -1) {
+ if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+ rwf_uncached = 1;
+ return;
+ }
+ prterr("dowrite: read");
+ } else {
prt("short read: 0x%x bytes instead of 0x%x\n",
iret, size);
+ }
report_failure(141);
}
check_buffers(temp_buf, offset, size);
}
+void
+doread(unsigned offset, unsigned size)
+{
+ __doread(offset, size, 0);
+}
+void
+doread_uncached(unsigned offset, unsigned size)
+{
+ if (rwf_uncached) {
+ __doread(offset, size, RWF_UNCACHED);
+ if (rwf_uncached)
+ return;
+ }
+ __doread(offset, size, 0);
+}
+
void
check_eofpage(char *s, unsigned offset, char *p, int size)
{
@@ -870,7 +896,6 @@ check_contents(void)
unsigned map_offset;
unsigned map_size;
char *p;
- off_t ret;
unsigned iret;
if (!check_buf) {
@@ -885,13 +910,7 @@ check_contents(void)
if (size == 0)
return;
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("doread: lseek");
- report_failure(140);
- }
-
- iret = fsxread(fd, check_buf, size, offset);
+ iret = fsxread(fd, check_buf, size, offset, 0);
if (iret != size) {
if (iret == -1)
prterr("check_contents: read");
@@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size)
}
void
-dowrite(unsigned offset, unsigned size)
+__dowrite(unsigned offset, unsigned size, int flags)
{
- off_t ret;
unsigned iret;
offset -= offset % writebdy;
@@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size)
(monitorend == -1 || offset <= monitorend))))))
prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("dowrite: lseek");
- report_failure(150);
- }
- iret = fsxwrite(fd, good_buf + offset, size, offset);
+ iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
if (iret != size) {
- if (iret == -1)
+ if (iret == -1) {
+ if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+ rwf_uncached = 0;
+ return;
+ }
prterr("dowrite: write");
- else
+ } else {
prt("short write: 0x%x bytes instead of 0x%x\n",
iret, size);
+ }
report_failure(151);
}
if (do_fsync) {
@@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size)
}
}
+void
+dowrite(unsigned offset, unsigned size)
+{
+ __dowrite(offset, size, 0);
+}
+
+void
+dowrite_uncached(unsigned offset, unsigned size)
+{
+ if (rwf_uncached) {
+ __dowrite(offset, size, RWF_UNCACHED);
+ if (rwf_uncached)
+ return;
+ }
+ __dowrite(offset, size, 0);
+}
void
domapwrite(unsigned offset, unsigned size)
@@ -2340,11 +2374,21 @@ have_op:
doread(offset, size);
break;
+ case OP_READ_UNCACHED:
+ TRIM_OFF_LEN(offset, size, file_size);
+ doread_uncached(offset, size);
+ break;
+
case OP_WRITE:
TRIM_OFF_LEN(offset, size, maxfilelen);
dowrite(offset, size);
break;
+ case OP_WRITE_UNCACHED:
+ TRIM_OFF_LEN(offset, size, maxfilelen);
+ dowrite_uncached(offset, size);
+ break;
+
case OP_MAPREAD:
TRIM_OFF_LEN(offset, size, file_size);
domapread(offset, size);
@@ -2702,7 +2746,7 @@ uring_setup()
}
int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
@@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
} else {
io_uring_prep_writev(sqe, fd, &iovec, 1, o);
}
+ sqe->rw_flags = flags;
ret = io_uring_submit_and_wait(&ring, 1);
if (ret != 1) {
@@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
}
#else
int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
fprintf(stderr, "io_rw: need IO_URING support!\n");
exit(111);
@@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
#endif
int
-fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
int ret;
if (aio) {
ret = aio_rw(rw, fd, buf, len, offset);
} else if (uring) {
- ret = uring_rw(rw, fd, buf, len, offset);
+ ret = uring_rw(rw, fd, buf, len, offset, flags);
} else {
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
+
if (rw == READ)
- ret = read(fd, buf, len);
+ ret = preadv2(fd, &iov, 1, offset, flags);
else
- ret = write(fd, buf, len);
+ ret = pwritev2(fd, &iov, 1, offset, flags);
}
return ret;
}
--
Jens Axboe
next prev parent reply other threads:[~2024-11-12 17:19 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-10 15:27 [PATCHSET v2 0/15] Uncached buffered IO Jens Axboe
2024-11-10 15:27 ` [PATCH 01/15] mm/filemap: change filemap_create_folio() to take a struct kiocb Jens Axboe
2024-11-10 15:27 ` [PATCH 02/15] mm/readahead: add folio allocation helper Jens Axboe
2024-11-10 15:27 ` [PATCH 03/15] mm: add PG_uncached page flag Jens Axboe
2024-11-10 15:27 ` [PATCH 04/15] mm/readahead: add readahead_control->uncached member Jens Axboe
2024-11-10 15:27 ` [PATCH 05/15] mm/filemap: use page_cache_sync_ra() to kick off read-ahead Jens Axboe
2024-11-10 15:27 ` [PATCH 06/15] mm/truncate: make invalidate_complete_folio2() public Jens Axboe
2024-11-10 15:27 ` [PATCH 07/15] fs: add RWF_UNCACHED iocb and FOP_UNCACHED file_operations flag Jens Axboe
2024-11-10 15:28 ` [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED Jens Axboe
2024-11-11 9:15 ` Kirill A. Shutemov
2024-11-11 14:12 ` Jens Axboe
2024-11-11 15:16 ` Christoph Hellwig
2024-11-11 15:17 ` Jens Axboe
2024-11-11 17:09 ` Jens Axboe
2024-11-11 23:42 ` Jens Axboe
2024-11-12 5:13 ` Christoph Hellwig
2024-11-12 15:14 ` Jens Axboe
2024-11-12 16:39 ` Brian Foster
2024-11-12 17:06 ` Jens Axboe
2024-11-12 17:19 ` Jens Axboe [this message]
2024-11-12 18:44 ` Brian Foster
2024-11-12 19:08 ` Jens Axboe
2024-11-12 19:39 ` Brian Foster
2024-11-12 19:45 ` Jens Axboe
2024-11-12 20:21 ` Brian Foster
2024-11-12 20:25 ` Jens Axboe
2024-11-13 14:07 ` Jens Axboe
2024-11-11 15:25 ` Kirill A. Shutemov
2024-11-11 15:31 ` Jens Axboe
2024-11-11 15:51 ` Kirill A. Shutemov
2024-11-11 15:57 ` Jens Axboe
2024-11-11 16:29 ` Kirill A. Shutemov
2024-11-10 15:28 ` [PATCH 09/15] mm/filemap: drop uncached pages when writeback completes Jens Axboe
2024-11-11 9:17 ` Kirill A. Shutemov
2024-11-10 15:28 ` [PATCH 10/15] mm/filemap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-11-10 15:28 ` [PATCH 11/15] mm: add FGP_UNCACHED folio creation flag Jens Axboe
2024-11-10 15:28 ` [PATCH 12/15] ext4: add RWF_UNCACHED write support Jens Axboe
2024-11-10 15:28 ` [PATCH 13/15] iomap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-11-10 15:28 ` [PATCH 14/15] xfs: punt uncached write completions to the completion wq Jens Axboe
2024-11-10 15:28 ` [PATCH 15/15] xfs: flag as supporting FOP_UNCACHED Jens Axboe
2024-11-11 15:27 ` Christoph Hellwig
2024-11-11 15:33 ` Jens Axboe
2024-11-11 17:25 ` [PATCHSET v2 0/15] Uncached buffered IO Matthew Wilcox
2024-11-11 17:39 ` Jens Axboe
2024-11-11 21:24 ` Yu Zhao
2024-11-11 21:48 ` Matthew Wilcox
2024-11-11 22:07 ` Yu Zhao
2024-11-20 23:11 ` Yuanchu Xie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk \
--to=axboe@kernel.dk \
--cc=bfoster@redhat.com \
--cc=clm@meta.com \
--cc=hannes@cmpxchg.org \
--cc=hch@infradead.org \
--cc=kirill@shutemov.name \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox