FS/XFS testing framework
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: zlang@kernel.org, fstests@vger.kernel.org
Subject: Re: [PATCH 2/2] fsx: add support for RWF_DONTCACHE
Date: Mon, 6 Jan 2025 18:09:38 -0800	[thread overview]
Message-ID: <20250107020938.GN6160@frogsfrogsfrogs> (raw)
In-Reply-To: <20250106174919.103199-3-axboe@kernel.dk>

On Mon, Jan 06, 2025 at 10:48:47AM -0700, Jens Axboe wrote:
> Using RWF_DONTCACHE tells the kernel that any page cache instantiated
> by this operation should get pruned once the operation completes. If
> data is in cache prior to the operation it will remain there.
> 
> Add ops for testing both the read and write side of this. At startup,
> kernel support for this feature is probed. If support isn't available,
> uncached/dontcache IO is performed as regular buffered IO. If -Z is
> used to turn on O_DIRECT, then uncached/dontcache IO isn't performed.
> Defaults to on if available, and adds a -T parameter to turn it off.
> 
> See the kernel posting adding support:
> 
> https://lore.kernel.org/linux-fsdevel/20241220154831.1086649-1-axboe@kernel.dk/
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  ltp/fsx.c | 115 ++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 77 insertions(+), 38 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 41933354328a..7c996026157d 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -43,6 +43,10 @@
>  # define MAP_FILE 0
>  #endif
>  
> +#ifndef RWF_DONTCACHE
> +#define RWF_DONTCACHE	0x80
> +#endif
> +
>  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
>  
>  /* Operation flags (bitmask) */
> @@ -101,7 +105,9 @@ int			logcount = 0;	/* total ops */
>  enum {
>  	/* common operations */
>  	OP_READ = 0,
> +	OP_READ_DONTCACHE,
>  	OP_WRITE,
> +	OP_WRITE_DONTCACHE,
>  	OP_MAPREAD,
>  	OP_MAPWRITE,
>  	OP_MAX_LITE,
> @@ -190,15 +196,16 @@ int	o_direct;			/* -Z */
>  int	aio = 0;
>  int	uring = 0;
>  int	mark_nr = 0;
> +int	dontcache_io = 1;
>  
>  int page_size;
>  int page_mask;
>  int mmap_mask;
> -int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
>  #define READ 0
>  #define WRITE 1
> -#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> -#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
> +#define fsxread(a,b,c,d,f)	fsx_rw(READ, a,b,c,d,f)
> +#define fsxwrite(a,b,c,d,f)	fsx_rw(WRITE, a,b,c,d,f)
>  
>  struct timespec deadline;
>  
> @@ -266,7 +273,9 @@ prterr(const char *prefix)
>  
>  static const char *op_names[] = {
>  	[OP_READ] = "read",
> +	[OP_READ_DONTCACHE] = "read_dontcache",
>  	[OP_WRITE] = "write",
> +	[OP_WRITE_DONTCACHE] = "write_dontcache",
>  	[OP_MAPREAD] = "mapread",
>  	[OP_MAPWRITE] = "mapwrite",
>  	[OP_TRUNCATE] = "truncate",
> @@ -393,12 +402,14 @@ logdump(void)
>  				prt("\t******WWWW");
>  			break;
>  		case OP_READ:
> +		case OP_READ_DONTCACHE:
>  			prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
>  			    lp->args[1]);
>  			if (overlap)
>  				prt("\t***RRRR***");
>  			break;
> +		case OP_WRITE_DONTCACHE:
>  		case OP_WRITE:
>  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> @@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
>  }
>  
>  void
> -doread(unsigned offset, unsigned size)
> +doread(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % readbdy;
> @@ -818,12 +828,7 @@ doread(unsigned offset, unsigned size)
>  			(monitorend == -1 || offset <= monitorend))))))
>  		prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
>  		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -	iret = fsxread(fd, temp_buf, size, offset);
> +	iret = fsxread(fd, temp_buf, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("doread: read");
> @@ -870,7 +875,6 @@ check_contents(void)
>  	unsigned map_offset;
>  	unsigned map_size;
>  	char *p;
> -	off_t ret;
>  	unsigned iret;
>  
>  	if (!check_buf) {
> @@ -885,13 +889,7 @@ check_contents(void)
>  	if (size == 0)
>  		return;
>  
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("doread: lseek");
> -		report_failure(140);
> -	}
> -
> -	iret = fsxread(fd, check_buf, size, offset);
> +	iret = fsxread(fd, check_buf, size, offset, 0);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("check_contents: read");
> @@ -1064,9 +1062,8 @@ update_file_size(unsigned offset, unsigned size)
>  }
>  
>  void
> -dowrite(unsigned offset, unsigned size)
> +dowrite(unsigned offset, unsigned size, int flags)
>  {
> -	off_t ret;
>  	unsigned iret;
>  
>  	offset -= offset % writebdy;
> @@ -1099,14 +1096,9 @@ dowrite(unsigned offset, unsigned size)
>  		       (monitorstart == -1 ||
>  			(offset + size > monitorstart &&
>  			(monitorend == -1 || offset <= monitorend))))))
> -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
> -		    offset, offset + size - 1, size);
> -	ret = lseek(fd, (off_t)offset, SEEK_SET);
> -	if (ret == (off_t)-1) {
> -		prterr("dowrite: lseek");
> -		report_failure(150);
> -	}
> -	iret = fsxwrite(fd, good_buf + offset, size, offset);
> +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> +	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
>  			prterr("dowrite: write");
> @@ -1954,6 +1946,26 @@ do_preallocate(unsigned offset, unsigned length, int keep_size, int unshare)
>  }
>  #endif
>  
> +int
> +test_dontcache_io(void)
> +{
> +	char buf[4096];
> +	struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) };
> +	int ret, e;
> +
> +	ret = preadv2(fd, &iov, 1, 0, RWF_DONTCACHE);
> +	e = ret < 0 ? errno : 0;
> +	if (e == EOPNOTSUPP) {
> +		if (!quiet)
> +			fprintf(stderr,
> +				"main: filesystem does not support "
> +				"dontcache IO, disabling!\n");
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
>  void
>  writefileimage()
>  {
> @@ -2337,12 +2349,28 @@ have_op:
>  	switch (op) {
>  	case OP_READ:
>  		TRIM_OFF_LEN(offset, size, file_size);
> -		doread(offset, size);
> +		doread(offset, size, 0);
> +		break;
> +
> +	case OP_READ_DONTCACHE:
> +		TRIM_OFF_LEN(offset, size, file_size);
> +		if (dontcache_io)
> +			doread(offset, size, RWF_DONTCACHE);
> +		else
> +			doread(offset, size, 0);
>  		break;
>  
>  	case OP_WRITE:
>  		TRIM_OFF_LEN(offset, size, maxfilelen);
> -		dowrite(offset, size);
> +		dowrite(offset, size, 0);
> +		break;
> +
> +	case OP_WRITE_DONTCACHE:
> +		TRIM_OFF_LEN(offset, size, maxfilelen);
> +		if (dontcache_io)
> +			dowrite(offset, size, RWF_DONTCACHE);
> +		else
> +			dowrite(offset, size, 0);
>  		break;
>  
>  	case OP_MAPREAD:
> @@ -2538,6 +2566,7 @@ usage(void)
>  "	-0: Do not use exchange range calls\n"
>  #endif
>  "	-K: Do not use keep size\n\
> +	-T: Do not use dontcache IO\n\
>  	-L: fsxLite - no file creations & no file size changes\n\
>  	-N numops: total # operations to do (default infinity)\n\
>  	-O: use oplen (see -o flag) for every op (default random)\n\
> @@ -2546,7 +2575,7 @@ usage(void)
>  	-S seed: for random # generator (default 1) 0 gets timestamp\n\
>  	-W: mapped write operations DISabled\n\
>  	-X: Read file and compare to good buffer after every operation\n\
> -	-Z: O_DIRECT (use -R, -W, -r and -w too)\n\
> +	-Z: O_DIRECT (use -R, -W, -r and -w too, excludes dontcache IO)\n\
>  	--replay-ops=opsfile: replay ops from recorded .fsxops file\n\
>  	--record-ops[=opsfile]: dump ops file also on success. optionally specify ops file name\n\
>  	--duration=seconds: ignore any -N setting and run for this many seconds\n\
> @@ -2702,7 +2731,7 @@ uring_setup()
>  }
>  
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	struct io_uring_sqe     *sqe;
>  	struct io_uring_cqe     *cqe;
> @@ -2733,6 +2762,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  		} else {
>  			io_uring_prep_writev(sqe, fd, &iovec, 1, o);
>  		}
> +		sqe->rw_flags = flags;
>  
>  		ret = io_uring_submit_and_wait(&ring, 1);
>  		if (ret != 1) {
> @@ -2781,7 +2811,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  }
>  #else
>  int
> -uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	fprintf(stderr, "io_rw: need IO_URING support!\n");
>  	exit(111);
> @@ -2789,19 +2819,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  #endif
>  
>  int
> -fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
>  {
>  	int ret;
>  
>  	if (aio) {
>  		ret = aio_rw(rw, fd, buf, len, offset);
>  	} else if (uring) {
> -		ret = uring_rw(rw, fd, buf, len, offset);
> +		ret = uring_rw(rw, fd, buf, len, offset, flags);
>  	} else {
> +		struct iovec iov = { .iov_base = buf, .iov_len = len };
> +
>  		if (rw == READ)
> -			ret = read(fd, buf, len);
> +			ret = preadv2(fd, &iov, 1, offset, flags);
>  		else
> -			ret = write(fd, buf, len);
> +			ret = pwritev2(fd, &iov, 1, offset, flags);
>  	}
>  	return ret;
>  }
> @@ -3065,6 +3097,9 @@ main(int argc, char **argv)
>  			if (seed < 0)
>  				usage();
>  			break;
> +		case 'T':
> +			dontcache_io = 0;
> +			break;
>  		case 'W':
>  		        mapped_writes = 0;
>  			if (!quiet)
> @@ -3076,6 +3111,7 @@ main(int argc, char **argv)
>  		case 'Z':
>  			o_direct = O_DIRECT;
>  			o_flags |= O_DIRECT;
> +			dontcache_io = 0;
>  			break;
>  		case 254:  /* --duration */
>  			if (!optarg) {
> @@ -3293,6 +3329,9 @@ main(int argc, char **argv)
>  		copy_range_calls = test_copy_range();
>  	if (exchange_range_calls)
>  		exchange_range_calls = test_exchange_range();
> +	if (dontcache_io)
> +		dontcache_io = test_dontcache_io();
> +	printf("Dontcache_io=%d\n", dontcache_io);

Is this a debug printf that got left in by mistake?

(Everything else in here looks fine to me...)

--D

>  
>  	while (keep_running())
>  		if (!test())
> -- 
> 2.47.1
> 
> 

  reply	other threads:[~2025-01-07  2:09 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-06 17:48 [PATCHSET 0/2] Add RWF_DONTCACHE support Jens Axboe
2025-01-06 17:48 ` [PATCH 1/2] fsstress: add support for RWF_DONTCACHE Jens Axboe
2025-01-07  2:11   ` Darrick J. Wong
2025-01-07  2:16     ` Jens Axboe
2025-01-07 17:30       ` Darrick J. Wong
2025-01-06 17:48 ` [PATCH 2/2] fsx: " Jens Axboe
2025-01-07  2:09   ` Darrick J. Wong [this message]
2025-01-07  2:12     ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2025-01-07 16:05 [PATCHSET v2 0/2] Add RWF_DONTCACHE support Jens Axboe
2025-01-07 16:05 ` [PATCH 2/2] fsx: add support for RWF_DONTCACHE Jens Axboe
2025-01-07 18:19   ` Darrick J. Wong
2025-01-07 18:24     ` Jens Axboe
2025-01-07 23:22       ` Darrick J. Wong
2025-01-08  0:00         ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250107020938.GN6160@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=fstests@vger.kernel.org \
    --cc=zlang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox