* [PATCH v3 1/3] xfs_io: Add support for preadv2
2025-03-15 8:20 [PATCH v3 0/3] xfsprogs: Add support for preadv2() and RWF_DONTCACHE Ritesh Harjani (IBM)
@ 2025-03-15 8:20 ` Ritesh Harjani (IBM)
2025-03-15 8:20 ` [PATCH v3 2/3] xfs_io: Add RWF_DONTCACHE support to pwritev2 Ritesh Harjani (IBM)
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Ritesh Harjani (IBM) @ 2025-03-15 8:20 UTC (permalink / raw)
To: linux-xfs
Cc: Darrick J . Wong, Jens Axboe, linux-fsdevel, Ritesh Harjani (IBM)
This patch adds support for preadv2() to xfs_io.
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
io/Makefile | 2 +-
io/pread.c | 45 ++++++++++++++++++++++++++++++---------------
2 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/io/Makefile b/io/Makefile
index 8f835ec7..14a3fe20 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -66,7 +66,7 @@ LLDLIBS += $(LIBEDITLINE) $(LIBTERMCAP)
endif
ifeq ($(HAVE_PWRITEV2),yes)
-LCFLAGS += -DHAVE_PWRITEV2
+LCFLAGS += -DHAVE_PWRITEV2 -DHAVE_PREADV2
endif
ifeq ($(HAVE_MAP_SYNC),yes)
diff --git a/io/pread.c b/io/pread.c
index 62c771fb..b314fbc7 100644
--- a/io/pread.c
+++ b/io/pread.c
@@ -162,7 +162,8 @@ static ssize_t
do_preadv(
int fd,
off_t offset,
- long long count)
+ long long count,
+ int preadv2_flags)
{
int vecs = 0;
ssize_t oldlen = 0;
@@ -181,8 +182,14 @@ do_preadv(
} else {
vecs = vectors;
}
+#ifdef HAVE_PREADV2
+ if (preadv2_flags)
+ bytes = preadv2(fd, iov, vectors, offset, preadv2_flags);
+ else
+ bytes = preadv(fd, iov, vectors, offset);
+#else
bytes = preadv(fd, iov, vectors, offset);
-
+#endif
/* restore trimmed iov */
if (oldlen)
iov[vecs - 1].iov_len = oldlen;
@@ -195,12 +202,13 @@ do_pread(
int fd,
off_t offset,
long long count,
- size_t buffer_size)
+ size_t buffer_size,
+ int preadv2_flags)
{
if (!vectors)
return pread(fd, io_buffer, min(count, buffer_size), offset);
- return do_preadv(fd, offset, count);
+ return do_preadv(fd, offset, count, preadv2_flags);
}
static int
@@ -210,7 +218,8 @@ read_random(
long long count,
long long *total,
unsigned int seed,
- int eof)
+ int eof,
+ int preadv2_flags)
{
off_t end, off, range;
ssize_t bytes;
@@ -234,7 +243,7 @@ read_random(
io_buffersize;
else
off = offset;
- bytes = do_pread(fd, off, io_buffersize, io_buffersize);
+ bytes = do_pread(fd, off, io_buffersize, io_buffersize, preadv2_flags);
if (bytes == 0)
break;
if (bytes < 0) {
@@ -256,7 +265,8 @@ read_backward(
off_t *offset,
long long *count,
long long *total,
- int eof)
+ int eof,
+ int preadv2_flags)
{
off_t end, off = *offset;
ssize_t bytes = 0, bytes_requested;
@@ -276,7 +286,7 @@ read_backward(
/* Do initial unaligned read if needed */
if ((bytes_requested = (off % io_buffersize))) {
off -= bytes_requested;
- bytes = do_pread(fd, off, bytes_requested, io_buffersize);
+ bytes = do_pread(fd, off, bytes_requested, io_buffersize, preadv2_flags);
if (bytes == 0)
return ops;
if (bytes < 0) {
@@ -294,7 +304,7 @@ read_backward(
while (cnt > end) {
bytes_requested = min(cnt, io_buffersize);
off -= bytes_requested;
- bytes = do_pread(fd, off, cnt, io_buffersize);
+ bytes = do_pread(fd, off, cnt, io_buffersize, preadv2_flags);
if (bytes == 0)
break;
if (bytes < 0) {
@@ -318,14 +328,15 @@ read_forward(
long long *total,
int verbose,
int onlyone,
- int eof)
+ int eof,
+ int preadv2_flags)
{
ssize_t bytes;
int ops = 0;
*total = 0;
while (count > 0 || eof) {
- bytes = do_pread(fd, offset, count, io_buffersize);
+ bytes = do_pread(fd, offset, count, io_buffersize, preadv2_flags);
if (bytes == 0)
break;
if (bytes < 0) {
@@ -353,7 +364,7 @@ read_buffer(
int verbose,
int onlyone)
{
- return read_forward(fd, offset, count, total, verbose, onlyone, 0);
+ return read_forward(fd, offset, count, total, verbose, onlyone, 0, 0);
}
static int
@@ -371,6 +382,7 @@ pread_f(
int Cflag, qflag, uflag, vflag;
int eof = 0, direction = IO_FORWARD;
int c;
+ int preadv2_flags = 0;
Cflag = qflag = uflag = vflag = 0;
init_cvtnum(&fsblocksize, &fssectsize);
@@ -463,15 +475,18 @@ pread_f(
case IO_RANDOM:
if (!zeed) /* srandom seed */
zeed = time(NULL);
- c = read_random(file->fd, offset, count, &total, zeed, eof);
+ c = read_random(file->fd, offset, count, &total, zeed, eof,
+ preadv2_flags);
break;
case IO_FORWARD:
- c = read_forward(file->fd, offset, count, &total, vflag, 0, eof);
+ c = read_forward(file->fd, offset, count, &total, vflag, 0, eof,
+ preadv2_flags);
if (eof)
count = total;
break;
case IO_BACKWARD:
- c = read_backward(file->fd, &offset, &count, &total, eof);
+ c = read_backward(file->fd, &offset, &count, &total, eof,
+ preadv2_flags);
break;
default:
ASSERT(0);
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v3 2/3] xfs_io: Add RWF_DONTCACHE support to pwritev2
2025-03-15 8:20 [PATCH v3 0/3] xfsprogs: Add support for preadv2() and RWF_DONTCACHE Ritesh Harjani (IBM)
2025-03-15 8:20 ` [PATCH v3 1/3] xfs_io: Add support for preadv2 Ritesh Harjani (IBM)
@ 2025-03-15 8:20 ` Ritesh Harjani (IBM)
2025-03-15 8:20 ` [PATCH v3 3/3] xfs_io: Add RWF_DONTCACHE support to preadv2 Ritesh Harjani (IBM)
2025-03-15 12:49 ` [PATCH v3 0/3] xfsprogs: Add support for preadv2() and RWF_DONTCACHE Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: Ritesh Harjani (IBM) @ 2025-03-15 8:20 UTC (permalink / raw)
To: linux-xfs
Cc: Darrick J . Wong, Jens Axboe, linux-fsdevel, Ritesh Harjani (IBM)
Add per-io RWF_DONTCACHE support flag to pwritev2().
This enables xfs_io to perform uncached buffered-io writes.
e.g. xfs_io -fc "pwrite -U -V 1 0 16K" /mnt/f1
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
include/linux.h | 5 +++++
io/pwrite.c | 14 ++++++++++++--
man/man8/xfs_io.8 | 8 +++++++-
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/include/linux.h b/include/linux.h
index b3516d54..6e83e073 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -237,6 +237,11 @@ struct fsxattr {
#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
#endif
+/* buffered IO that drops the cache after reading or writing data */
+#ifndef RWF_DONTCACHE
+#define RWF_DONTCACHE ((__kernel_rwf_t)0x00000080)
+#endif
+
/*
* Reminder: anything added to this file will be compiled into downstream
* userspace projects!
diff --git a/io/pwrite.c b/io/pwrite.c
index fab59be4..7df71e23 100644
--- a/io/pwrite.c
+++ b/io/pwrite.c
@@ -45,6 +45,7 @@ pwrite_help(void)
" -N -- Perform the pwritev2() with RWF_NOWAIT\n"
" -D -- Perform the pwritev2() with RWF_DSYNC\n"
" -A -- Perform the pwritev2() with RWF_ATOMIC\n"
+" -U -- Perform the pwritev2() with RWF_DONTCACHE\n"
#endif
"\n"));
}
@@ -285,7 +286,7 @@ pwrite_f(
init_cvtnum(&fsblocksize, &fssectsize);
bsize = fsblocksize;
- while ((c = getopt(argc, argv, "Ab:BCdDf:Fi:NqRs:OS:uV:wWZ:")) != EOF) {
+ while ((c = getopt(argc, argv, "Ab:BCdDf:Fi:NqRs:OS:uUV:wWZ:")) != EOF) {
switch (c) {
case 'b':
tmp = cvtnum(fsblocksize, fssectsize, optarg);
@@ -328,6 +329,9 @@ pwrite_f(
case 'A':
pwritev2_flags |= RWF_ATOMIC;
break;
+ case 'U':
+ pwritev2_flags |= RWF_DONTCACHE;
+ break;
#endif
case 's':
skip = cvtnum(fsblocksize, fssectsize, optarg);
@@ -392,6 +396,12 @@ pwrite_f(
exitcode = 1;
return command_usage(&pwrite_cmd);
}
+ if (pwritev2_flags != 0 && vectors == 0) {
+ printf(_("pwritev2 flags require vectored I/O (-V)\n"));
+ exitcode = 1;
+ return command_usage(&pwrite_cmd);
+ }
+
offset = cvtnum(fsblocksize, fssectsize, argv[optind]);
if (offset < 0) {
printf(_("non-numeric offset argument -- %s\n"), argv[optind]);
@@ -480,7 +490,7 @@ pwrite_init(void)
pwrite_cmd.argmax = -1;
pwrite_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK;
pwrite_cmd.args =
-_("[-i infile [-qAdDwNOW] [-s skip]] [-b bs] [-S seed] [-FBR [-Z N]] [-V N] off len");
+_("[-i infile [-qAdDwNOUW] [-s skip]] [-b bs] [-S seed] [-FBR [-Z N]] [-V N] off len");
pwrite_cmd.oneline =
_("writes a number of bytes at a specified offset");
pwrite_cmd.help = pwrite_help;
diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8
index 59d5ddc5..47af5232 100644
--- a/man/man8/xfs_io.8
+++ b/man/man8/xfs_io.8
@@ -244,7 +244,7 @@ See the
.B pread
command.
.TP
-.BI "pwrite [ \-i " file " ] [ \-qAdDwNOW ] [ \-s " skip " ] [ \-b " size " ] [ \-S " seed " ] [ \-FBR [ \-Z " zeed " ] ] [ \-V " vectors " ] " "offset length"
+.BI "pwrite [ \-i " file " ] [ \-qAdDwNOUW ] [ \-s " skip " ] [ \-b " size " ] [ \-S " seed " ] [ \-FBR [ \-Z " zeed " ] ] [ \-V " vectors " ] " "offset length"
Writes a range of bytes in a specified blocksize from the given
.IR offset .
The bytes written can be either a set pattern or read in from another
@@ -287,6 +287,12 @@ Perform the
call with
.IR RWF_ATOMIC .
.TP
+.B \-U
+Perform the
+.BR pwritev2 (2)
+call with
+.IR RWF_DONTCACHE .
+.TP
.B \-O
perform pwrite once and return the (maybe partial) bytes written.
.TP
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v3 3/3] xfs_io: Add RWF_DONTCACHE support to preadv2
2025-03-15 8:20 [PATCH v3 0/3] xfsprogs: Add support for preadv2() and RWF_DONTCACHE Ritesh Harjani (IBM)
2025-03-15 8:20 ` [PATCH v3 1/3] xfs_io: Add support for preadv2 Ritesh Harjani (IBM)
2025-03-15 8:20 ` [PATCH v3 2/3] xfs_io: Add RWF_DONTCACHE support to pwritev2 Ritesh Harjani (IBM)
@ 2025-03-15 8:20 ` Ritesh Harjani (IBM)
2025-03-15 12:49 ` [PATCH v3 0/3] xfsprogs: Add support for preadv2() and RWF_DONTCACHE Jens Axboe
3 siblings, 0 replies; 5+ messages in thread
From: Ritesh Harjani (IBM) @ 2025-03-15 8:20 UTC (permalink / raw)
To: linux-xfs
Cc: Darrick J . Wong, Jens Axboe, linux-fsdevel, Ritesh Harjani (IBM)
Add per-io RWF_DONTCACHE support flag to preadv2().
This enables xfs_io to perform uncached buffered-io reads.
e.g. xfs_io -c "pread -U -V 1 0 16K" /mnt/f1
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
---
io/pread.c | 17 +++++++++++++++--
man/man8/xfs_io.8 | 8 +++++++-
2 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/io/pread.c b/io/pread.c
index b314fbc7..606bfe36 100644
--- a/io/pread.c
+++ b/io/pread.c
@@ -38,6 +38,9 @@ pread_help(void)
" -Z N -- zeed the random number generator (used when reading randomly)\n"
" (heh, zorry, the -s/-S arguments were already in use in pwrite)\n"
" -V N -- use vectored IO with N iovecs of blocksize each (preadv)\n"
+#ifdef HAVE_PREADV2
+" -U -- Perform the preadv2() with RWF_DONTCACHE\n"
+#endif
"\n"
" When in \"random\" mode, the number of read operations will equal the\n"
" number required to do a complete forward/backward scan of the range.\n"
@@ -388,7 +391,7 @@ pread_f(
init_cvtnum(&fsblocksize, &fssectsize);
bsize = fsblocksize;
- while ((c = getopt(argc, argv, "b:BCFRquvV:Z:")) != EOF) {
+ while ((c = getopt(argc, argv, "b:BCFRquUvV:Z:")) != EOF) {
switch (c) {
case 'b':
tmp = cvtnum(fsblocksize, fssectsize, optarg);
@@ -417,6 +420,11 @@ pread_f(
case 'u':
uflag = 1;
break;
+#ifdef HAVE_PREADV2
+ case 'U':
+ preadv2_flags |= RWF_DONTCACHE;
+ break;
+#endif
case 'v':
vflag = 1;
break;
@@ -446,6 +454,11 @@ pread_f(
exitcode = 1;
return command_usage(&pread_cmd);
}
+ if (preadv2_flags != 0 && vectors == 0) {
+ printf(_("preadv2 flags require vectored I/O (-V)\n"));
+ exitcode = 1;
+ return command_usage(&pread_cmd);
+ }
offset = cvtnum(fsblocksize, fssectsize, argv[optind]);
if (offset < 0 && (direction & (IO_RANDOM|IO_BACKWARD))) {
@@ -514,7 +527,7 @@ pread_init(void)
pread_cmd.argmin = 2;
pread_cmd.argmax = -1;
pread_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK;
- pread_cmd.args = _("[-b bs] [-qv] [-i N] [-FBR [-Z N]] off len");
+ pread_cmd.args = _("[-b bs] [-qUv] [-i N] [-FBR [-Z N]] off len");
pread_cmd.oneline = _("reads a number of bytes at a specified offset");
pread_cmd.help = pread_help;
diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8
index 47af5232..df508054 100644
--- a/man/man8/xfs_io.8
+++ b/man/man8/xfs_io.8
@@ -200,7 +200,7 @@ option will set the file permissions to read-write (0644). This allows xfs_io to
set up mismatches between the file permissions and the open file descriptor
read/write mode to exercise permission checks inside various syscalls.
.TP
-.BI "pread [ \-b " bsize " ] [ \-qv ] [ \-FBR [ \-Z " seed " ] ] [ \-V " vectors " ] " "offset length"
+.BI "pread [ \-b " bsize " ] [ \-qUv ] [ \-FBR [ \-Z " seed " ] ] [ \-V " vectors " ] " "offset length"
Reads a range of bytes in a specified blocksize from the given
.IR offset .
.RS 1.0i
@@ -214,6 +214,12 @@ requests will be split. The default blocksize is 4096 bytes.
.B \-q
quiet mode, do not write anything to standard output.
.TP
+.B \-U
+Perform the
+.BR preadv2 (2)
+call with
+.IR RWF_DONTCACHE .
+.TP
.B \-v
dump the contents of the buffer after reading,
by default only the count of bytes actually read is dumped.
--
2.48.1
^ permalink raw reply related [flat|nested] 5+ messages in thread