public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Brian Foster <bfoster@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	hannes@cmpxchg.org, clm@meta.com, linux-kernel@vger.kernel.org,
	willy@infradead.org
Subject: Re: [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED
Date: Tue, 12 Nov 2024 10:19:02 -0700	[thread overview]
Message-ID: <3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk> (raw)
In-Reply-To: <7a4ef71f-905e-4f2a-b3d2-8fd939c5a865@kernel.dk>

On 11/12/24 10:06 AM, Jens Axboe wrote:
> On 11/12/24 9:39 AM, Brian Foster wrote:
>> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote:
>>> On 11/11/24 10:13 PM, Christoph Hellwig wrote:
>>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote:
>>>>> Here's the slightly cleaned up version, this is the one I ran testing
>>>>> with.
>>>>
>>>> Looks reasonable to me, but you probably get better reviews on the
>>>> fstests lists.
>>>
>>> I'll send it out once this patchset is a bit closer to integration,
>>> there's the usual chicken and egg situation with it. For now, it's quite
>>> handy for my testing, found a few issues with this version. So thanks
>>> for the suggestion, sure beats writing more of your own test cases :-)
>>>
>>
>> fsx support is probably a good idea as well. It's similar in idea to
>> fsstress, but bashes the same file with mixed operations and includes
>> data integrity validation checks as well. It's pretty useful for
>> uncovering subtle corner case issues or bad interactions..
> 
> Indeed, I did that too. Re-running xfstests right now with that too.

Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support
for both the sync read/write and io_uring paths.


diff --git a/ltp/fsx.c b/ltp/fsx.c
index 41933354..104910ff 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -43,6 +43,10 @@
 # define MAP_FILE 0
 #endif
 
+#ifndef RWF_UNCACHED
+#define RWF_UNCACHED	0x80
+#endif
+
 #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
 
 /* Operation flags (bitmask) */
@@ -101,7 +105,9 @@ int			logcount = 0;	/* total ops */
 enum {
 	/* common operations */
 	OP_READ = 0,
+	OP_READ_UNCACHED,
 	OP_WRITE,
+	OP_WRITE_UNCACHED,
 	OP_MAPREAD,
 	OP_MAPWRITE,
 	OP_MAX_LITE,
@@ -190,15 +196,16 @@ int	o_direct;			/* -Z */
 int	aio = 0;
 int	uring = 0;
 int	mark_nr = 0;
+int	rwf_uncached = 1;
 
 int page_size;
 int page_mask;
 int mmap_mask;
-int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
 #define READ 0
 #define WRITE 1
-#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
+#define fsxread(a,b,c,d,f)	fsx_rw(READ, a,b,c,d,f)
+#define fsxwrite(a,b,c,d,f)	fsx_rw(WRITE, a,b,c,d,f)
 
 struct timespec deadline;
 
@@ -266,7 +273,9 @@ prterr(const char *prefix)
 
 static const char *op_names[] = {
 	[OP_READ] = "read",
+	[OP_READ_UNCACHED] = "read_uncached",
 	[OP_WRITE] = "write",
+	[OP_WRITE_UNCACHED] = "write_uncached",
 	[OP_MAPREAD] = "mapread",
 	[OP_MAPWRITE] = "mapwrite",
 	[OP_TRUNCATE] = "truncate",
@@ -393,12 +402,14 @@ logdump(void)
 				prt("\t******WWWW");
 			break;
 		case OP_READ:
+		case OP_READ_UNCACHED:
 			prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
 			    lp->args[1]);
 			if (overlap)
 				prt("\t***RRRR***");
 			break;
+		case OP_WRITE_UNCACHED:
 		case OP_WRITE:
 			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
@@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
 }
 
 void
-doread(unsigned offset, unsigned size)
+__doread(unsigned offset, unsigned size, int flags)
 {
-	off_t ret;
 	unsigned iret;
 
 	offset -= offset % readbdy;
@@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size)
 			(monitorend == -1 || offset <= monitorend))))))
 		prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("doread: lseek");
-		report_failure(140);
-	}
-	iret = fsxread(fd, temp_buf, size, offset);
+	iret = fsxread(fd, temp_buf, size, offset, flags);
 	if (iret != size) {
-		if (iret == -1)
-			prterr("doread: read");
-		else
+		if (iret == -1) {
+			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+				rwf_uncached = 1;
+				return;
+			}
+			prterr("dowrite: read");
+		} else {
 			prt("short read: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
+		}
 		report_failure(141);
 	}
 	check_buffers(temp_buf, offset, size);
 }
+void
+doread(unsigned offset, unsigned size)
+{
+	__doread(offset, size, 0);
+}
 
+void
+doread_uncached(unsigned offset, unsigned size)
+{
+	if (rwf_uncached) {
+		__doread(offset, size, RWF_UNCACHED);
+		if (rwf_uncached)
+			return;
+	}
+	__doread(offset, size, 0);
+}
+	
 void
 check_eofpage(char *s, unsigned offset, char *p, int size)
 {
@@ -870,7 +896,6 @@ check_contents(void)
 	unsigned map_offset;
 	unsigned map_size;
 	char *p;
-	off_t ret;
 	unsigned iret;
 
 	if (!check_buf) {
@@ -885,13 +910,7 @@ check_contents(void)
 	if (size == 0)
 		return;
 
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("doread: lseek");
-		report_failure(140);
-	}
-
-	iret = fsxread(fd, check_buf, size, offset);
+	iret = fsxread(fd, check_buf, size, offset, 0);
 	if (iret != size) {
 		if (iret == -1)
 			prterr("check_contents: read");
@@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size)
 }
 
 void
-dowrite(unsigned offset, unsigned size)
+__dowrite(unsigned offset, unsigned size, int flags)
 {
-	off_t ret;
 	unsigned iret;
 
 	offset -= offset % writebdy;
@@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size)
 			(monitorend == -1 || offset <= monitorend))))))
 		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("dowrite: lseek");
-		report_failure(150);
-	}
-	iret = fsxwrite(fd, good_buf + offset, size, offset);
+	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
 	if (iret != size) {
-		if (iret == -1)
+		if (iret == -1) {
+			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+				rwf_uncached = 0;
+				return;
+			}
 			prterr("dowrite: write");
-		else
+		} else {
 			prt("short write: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
+		}
 		report_failure(151);
 	}
 	if (do_fsync) {
@@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size)
 	}
 }
 
+void
+dowrite(unsigned offset, unsigned size)
+{
+	__dowrite(offset, size, 0);
+}
+
+void
+dowrite_uncached(unsigned offset, unsigned size)
+{
+	if (rwf_uncached) {
+		__dowrite(offset, size, RWF_UNCACHED);
+		if (rwf_uncached)
+			return;
+	}
+	__dowrite(offset, size, 0);
+}
 
 void
 domapwrite(unsigned offset, unsigned size)
@@ -2340,11 +2374,21 @@ have_op:
 		doread(offset, size);
 		break;
 
+	case OP_READ_UNCACHED:
+		TRIM_OFF_LEN(offset, size, file_size);
+		doread_uncached(offset, size);
+		break;
+
 	case OP_WRITE:
 		TRIM_OFF_LEN(offset, size, maxfilelen);
 		dowrite(offset, size);
 		break;
 
+	case OP_WRITE_UNCACHED:
+		TRIM_OFF_LEN(offset, size, maxfilelen);
+		dowrite_uncached(offset, size);
+		break;
+
 	case OP_MAPREAD:
 		TRIM_OFF_LEN(offset, size, file_size);
 		domapread(offset, size);
@@ -2702,7 +2746,7 @@ uring_setup()
 }
 
 int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	struct io_uring_sqe     *sqe;
 	struct io_uring_cqe     *cqe;
@@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 		} else {
 			io_uring_prep_writev(sqe, fd, &iovec, 1, o);
 		}
+		sqe->rw_flags = flags;
 
 		ret = io_uring_submit_and_wait(&ring, 1);
 		if (ret != 1) {
@@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 }
 #else
 int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	fprintf(stderr, "io_rw: need IO_URING support!\n");
 	exit(111);
@@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 #endif
 
 int
-fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	int ret;
 
 	if (aio) {
 		ret = aio_rw(rw, fd, buf, len, offset);
 	} else if (uring) {
-		ret = uring_rw(rw, fd, buf, len, offset);
+		ret = uring_rw(rw, fd, buf, len, offset, flags);
 	} else {
+		struct iovec iov = { .iov_base = buf, .iov_len = len };
+
 		if (rw == READ)
-			ret = read(fd, buf, len);
+			ret = preadv2(fd, &iov, 1, offset, flags);
 		else
-			ret = write(fd, buf, len);
+			ret = pwritev2(fd, &iov, 1, offset, flags);
 	}
 	return ret;
 }


-- 
Jens Axboe


  reply	other threads:[~2024-11-12 17:19 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-10 15:27 [PATCHSET v2 0/15] Uncached buffered IO Jens Axboe
2024-11-10 15:27 ` [PATCH 01/15] mm/filemap: change filemap_create_folio() to take a struct kiocb Jens Axboe
2024-11-10 15:27 ` [PATCH 02/15] mm/readahead: add folio allocation helper Jens Axboe
2024-11-10 15:27 ` [PATCH 03/15] mm: add PG_uncached page flag Jens Axboe
2024-11-10 15:27 ` [PATCH 04/15] mm/readahead: add readahead_control->uncached member Jens Axboe
2024-11-10 15:27 ` [PATCH 05/15] mm/filemap: use page_cache_sync_ra() to kick off read-ahead Jens Axboe
2024-11-10 15:27 ` [PATCH 06/15] mm/truncate: make invalidate_complete_folio2() public Jens Axboe
2024-11-10 15:27 ` [PATCH 07/15] fs: add RWF_UNCACHED iocb and FOP_UNCACHED file_operations flag Jens Axboe
2024-11-10 15:28 ` [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED Jens Axboe
2024-11-11  9:15   ` Kirill A. Shutemov
2024-11-11 14:12     ` Jens Axboe
2024-11-11 15:16       ` Christoph Hellwig
2024-11-11 15:17         ` Jens Axboe
2024-11-11 17:09           ` Jens Axboe
2024-11-11 23:42             ` Jens Axboe
2024-11-12  5:13               ` Christoph Hellwig
2024-11-12 15:14                 ` Jens Axboe
2024-11-12 16:39                   ` Brian Foster
2024-11-12 17:06                     ` Jens Axboe
2024-11-12 17:19                       ` Jens Axboe [this message]
2024-11-12 18:44                         ` Brian Foster
2024-11-12 19:08                           ` Jens Axboe
2024-11-12 19:39                             ` Brian Foster
2024-11-12 19:45                               ` Jens Axboe
2024-11-12 20:21                                 ` Brian Foster
2024-11-12 20:25                                   ` Jens Axboe
2024-11-13 14:07                                     ` Jens Axboe
2024-11-11 15:25       ` Kirill A. Shutemov
2024-11-11 15:31         ` Jens Axboe
2024-11-11 15:51           ` Kirill A. Shutemov
2024-11-11 15:57             ` Jens Axboe
2024-11-11 16:29               ` Kirill A. Shutemov
2024-11-10 15:28 ` [PATCH 09/15] mm/filemap: drop uncached pages when writeback completes Jens Axboe
2024-11-11  9:17   ` Kirill A. Shutemov
2024-11-10 15:28 ` [PATCH 10/15] mm/filemap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-11-10 15:28 ` [PATCH 11/15] mm: add FGP_UNCACHED folio creation flag Jens Axboe
2024-11-10 15:28 ` [PATCH 12/15] ext4: add RWF_UNCACHED write support Jens Axboe
2024-11-10 15:28 ` [PATCH 13/15] iomap: make buffered writes work with RWF_UNCACHED Jens Axboe
2024-11-10 15:28 ` [PATCH 14/15] xfs: punt uncached write completions to the completion wq Jens Axboe
2024-11-10 15:28 ` [PATCH 15/15] xfs: flag as supporting FOP_UNCACHED Jens Axboe
2024-11-11 15:27   ` Christoph Hellwig
2024-11-11 15:33     ` Jens Axboe
2024-11-11 17:25 ` [PATCHSET v2 0/15] Uncached buffered IO Matthew Wilcox
2024-11-11 17:39   ` Jens Axboe
2024-11-11 21:24   ` Yu Zhao
2024-11-11 21:48     ` Matthew Wilcox
2024-11-11 22:07       ` Yu Zhao
2024-11-20 23:11         ` Yuanchu Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=bfoster@redhat.com \
    --cc=clm@meta.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=kirill@shutemov.name \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox