[PATCH 0/3] fstests/fsx: test coverage for eof zeroing

public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] fstests/fsx: test coverage for eof zeroing
@ 2024-08-22 14:44 Brian Foster
  2024-08-22 14:44 ` [PATCH 1/3] fsx: factor out a file size update helper Brian Foster
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Brian Foster @ 2024-08-22 14:44 UTC (permalink / raw)
  To: fstests; +Cc: linux-xfs, djwong, josef, david

Hi all,

One of the questions that came up on the last round of review on dealing
with the iomap zeroing problem [1] was having some proper test coverage.
When looking back at some of my custom tests, I realized some relied on
delay injection hacks, and otherwise felt like a random collection of
command sequences and whatnot that were probably somewhat duplicative.

Rather than adding fstests that continuously add random sequences of
xfs_io commands to try and test for zeroing problems, I started playing
around with fsx and came up with this series. In a nutshell, this does
intentional post-eof writes (to the eof page, via mapped writes) for
operations that are otherwise expected to perform partial page zeroing.
This allows the existing post-eof and unexpected data checks in fsx to
detect problems as normal. In my initial tests, this reproduces both the
truncate (already fixed) and write extension problems related to iomap
zero range fairly quickly on XFS.

It also produces zeroing issues on other non-iomap filesystems (ext4,
btrfs, bcachefs), which I suspect is something that probably needs to be
fixed on a per-fs basis. To test fsx itself, this so far seems to run
reliably longer term on XFS with the corresponding iomap zero range fix
in place.

Patch 1 is refactoring prep, patch 2 enables the eof data write
mechanism, and patch 3 adds a new fstests test to use it. The test
currently only supports XFS due to the aforementioned issues, but the
idea is to remove the restriction as other fs' are fixed. Thoughts,
reviews, flames appreciated.

Brian

[1] https://lore.kernel.org/linux-fsdevel/20240718130212.23905-1-bfoster@redhat.com/

Brian Foster (3):
  fsx: factor out a file size update helper
  fsx: support eof page pollution for eof zeroing test coverage
  generic: test to run fsx eof pollution

 ltp/fsx.c             | 129 +++++++++++++++++++++++++++++++-----------
 tests/generic/362     |  27 +++++++++
 tests/generic/362.out |   2 +
 3 files changed, 126 insertions(+), 32 deletions(-)
 create mode 100755 tests/generic/362
 create mode 100644 tests/generic/362.out

-- 
2.45.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] fsx: factor out a file size update helper
  2024-08-22 14:44 [PATCH 0/3] fstests/fsx: test coverage for eof zeroing Brian Foster
@ 2024-08-22 14:44 ` Brian Foster
  2024-08-22 20:50   ` Darrick J. Wong
  2024-08-22 14:44 ` [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage Brian Foster
  2024-08-22 14:44 ` [PATCH 3/3] generic: test to run fsx eof pollution Brian Foster
  2 siblings, 1 reply; 11+ messages in thread
From: Brian Foster @ 2024-08-22 14:44 UTC (permalink / raw)
  To: fstests; +Cc: linux-xfs, djwong, josef, david

In preparation for support for eof page pollution, factor out a file
size update helper. This updates the internally tracked file size
based on the upcoming operation and zeroes the appropriate range in
the good buffer for extending operations.

Note that a handful of callers currently make these updates after
performing the associated operation. Order is not important to
current behavior, but it will be for a follow on patch, so make
those calls a bit earlier as well.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 ltp/fsx.c | 57 +++++++++++++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/ltp/fsx.c b/ltp/fsx.c
index 2dc59b06..1389c51d 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -983,6 +983,17 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
 	}
 }
 
+/*
+ * Helper to update the tracked file size. If the offset begins beyond current
+ * EOF, zero the range from EOF to offset in the good buffer.
+ */
+void
+update_file_size(unsigned offset, unsigned size)
+{
+	if (offset > file_size)
+		memset(good_buf + file_size, '\0', offset - file_size);
+	file_size = offset + size;
+}
 
 void
 dowrite(unsigned offset, unsigned size)
@@ -1003,10 +1014,8 @@ dowrite(unsigned offset, unsigned size)
 	log4(OP_WRITE, offset, size, FL_NONE);
 
 	gendata(original_buf, good_buf, offset, size);
-	if (file_size < offset + size) {
-		if (file_size < offset)
-			memset(good_buf + file_size, '\0', offset - file_size);
-		file_size = offset + size;
+	if (offset + size > file_size) {
+		update_file_size(offset, size);
 		if (lite) {
 			warn("Lite file size bug in fsx!");
 			report_failure(149);
@@ -1070,10 +1079,8 @@ domapwrite(unsigned offset, unsigned size)
 	log4(OP_MAPWRITE, offset, size, FL_NONE);
 
 	gendata(original_buf, good_buf, offset, size);
-	if (file_size < offset + size) {
-		if (file_size < offset)
-			memset(good_buf + file_size, '\0', offset - file_size);
-		file_size = offset + size;
+	if (offset + size > file_size) {
+		update_file_size(offset, size);
 		if (lite) {
 			warn("Lite file size bug in fsx!");
 			report_failure(200);
@@ -1136,9 +1143,7 @@ dotruncate(unsigned size)
 
 	log4(OP_TRUNCATE, 0, size, FL_NONE);
 
-	if (size > file_size)
-		memset(good_buf + file_size, '\0', size - file_size);
-	file_size = size;
+	update_file_size(size, 0);
 
 	if (testcalls <= simulatedopcount)
 		return;
@@ -1247,6 +1252,9 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
 	log4(OP_ZERO_RANGE, offset, length,
 	     keep_size ? FL_KEEP_SIZE : FL_NONE);
 
+	if (end_offset > file_size)
+		update_file_size(offset, length);
+
 	if (testcalls <= simulatedopcount)
 		return;
 
@@ -1263,17 +1271,6 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
 	}
 
 	memset(good_buf + offset, '\0', length);
-
-	if (!keep_size && end_offset > file_size) {
-		/*
-		 * If there's a gap between the old file size and the offset of
-		 * the zero range operation, fill the gap with zeroes.
-		 */
-		if (offset > file_size)
-			memset(good_buf + file_size, '\0', offset - file_size);
-
-		file_size = end_offset;
-	}
 }
 
 #else
@@ -1538,6 +1535,9 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
 
 	log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
 
+	if (dest + length > file_size)
+		update_file_size(dest, length);
+
 	if (testcalls <= simulatedopcount)
 		return;
 
@@ -1556,10 +1556,6 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
 	}
 
 	memcpy(good_buf + dest, good_buf + offset, length);
-	if (dest > file_size)
-		memset(good_buf + file_size, '\0', dest - file_size);
-	if (dest + length > file_size)
-		file_size = dest + length;
 }
 
 #else
@@ -1756,6 +1752,9 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
 
 	log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
 
+	if (dest + length > file_size)
+		update_file_size(dest, length);
+
 	if (testcalls <= simulatedopcount)
 		return;
 
@@ -1792,10 +1791,6 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
 	}
 
 	memcpy(good_buf + dest, good_buf + offset, length);
-	if (dest > file_size)
-		memset(good_buf + file_size, '\0', dest - file_size);
-	if (dest + length > file_size)
-		file_size = dest + length;
 }
 
 #else
@@ -1846,7 +1841,7 @@ do_preallocate(unsigned offset, unsigned length, int keep_size)
 
 	if (end_offset > file_size) {
 		memset(good_buf + file_size, '\0', end_offset - file_size);
-		file_size = end_offset;
+		update_file_size(offset, length);
 	}
 
 	if (testcalls <= simulatedopcount)
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage
  2024-08-22 14:44 [PATCH 0/3] fstests/fsx: test coverage for eof zeroing Brian Foster
  2024-08-22 14:44 ` [PATCH 1/3] fsx: factor out a file size update helper Brian Foster
@ 2024-08-22 14:44 ` Brian Foster
  2024-08-22 20:52   ` Darrick J. Wong
  2024-08-22 14:44 ` [PATCH 3/3] generic: test to run fsx eof pollution Brian Foster
  2 siblings, 1 reply; 11+ messages in thread
From: Brian Foster @ 2024-08-22 14:44 UTC (permalink / raw)
  To: fstests; +Cc: linux-xfs, djwong, josef, david

File ranges that are newly exposed via size changing operations are
expected to return zeroes until written to. This behavior tends to
be difficult to regression test as failures can be racy and
transient. fsx is probably the best tool for this type of test
coverage, but uncovering issues can require running for a
significantly longer period of time than is typically invoked
through fstests tests. As a result, these types of regressions tend
to go unnoticed for an unfortunate amount of time.

To facilitate uncovering these problems more quickly, implement an
eof pollution mode in fsx that opportunistically injects post-eof
data prior to operations that change file size. Since data injection
occurs immediately before the size changing operation, it can be
used to detect problems in partial eof page/block zeroing associated
with each relevant operation.

The implementation takes advantage of the fact that mapped writes
can place data beyond eof so long as the page starts within eof. The
main reason for the isolated per-operation approach (vs. something
like allowing mapped writes to write beyond eof, for example) is to
accommodate the fact that writeback zeroes post-eof data on the eof
page. The current approach is therefore not necessarily guaranteed
to detect all problems, but provides more generic and broad test
coverage than the alternative of testing explicit command sequences
and doesn't require significant changes to how fsx works. If this
proves useful long term, further enhancements can be considered that
might facilitate the presence of post-eof data across operations.

Enable the feature with the -e command line option. It is disabled
by default because zeroing behavior is inconsistent across
filesystems. This can also be revisited in the future if zeroing
behavior is refined for the major filesystems that rely on fstests
for regression testing.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 ltp/fsx.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/ltp/fsx.c b/ltp/fsx.c
index 1389c51d..20b8cd9f 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -178,6 +178,7 @@ int	dedupe_range_calls = 1;		/* -B flag disables */
 int	copy_range_calls = 1;		/* -E flag disables */
 int	exchange_range_calls = 1;	/* -0 flag disables */
 int	integrity = 0;			/* -i flag */
+int	pollute_eof = 0;		/* -e flag */
 int	fsxgoodfd = 0;
 int	o_direct;			/* -Z */
 int	aio = 0;
@@ -983,6 +984,58 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
 	}
 }
 
+/*
+ * Pollute the EOF page with data beyond EOF prior to size change operations.
+ * This provides additional test coverage for partial EOF block/page zeroing.
+ * If the upcoming operation does not correctly zero, incorrect file data will
+ * be detected.
+ */
+void
+pollute_eofpage(unsigned int maxoff)
+{
+	unsigned offset = file_size;
+	unsigned pg_offset;
+	unsigned write_size;
+	char    *p;
+
+	if (!pollute_eof)
+		return;
+
+	/* write up to specified max or the end of the eof page */
+	pg_offset = offset & mmap_mask;
+	write_size = MIN(PAGE_SIZE - pg_offset, maxoff - offset);
+
+	if (!pg_offset)
+		return;
+
+	if (!quiet &&
+	    ((progressinterval && testcalls % progressinterval == 0) ||
+	    (debug &&
+	     (monitorstart == -1 ||
+	     (offset + write_size > monitorstart &&
+	      (monitorend == -1 || offset <= monitorend)))))) {
+		prt("%lld pollute_eof\t0x%x thru\t0x%x\t(0x%x bytes)\n",
+			testcalls, offset, offset + write_size - 1, write_size);
+	}
+
+	if ((p = (char *)mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+			      MAP_FILE | MAP_SHARED, fd,
+			      (off_t)(offset - pg_offset))) == (char *)-1) {
+		prterr("pollute_eofpage: mmap");
+		return;
+	}
+
+	/*
+	 * Write to a range just past EOF of the test file. Do not update the
+	 * good buffer because the upcoming operation is expected to zero this
+	 * range of the file.
+	 */
+	gendata(original_buf, p, pg_offset, write_size);
+
+	if (munmap(p, PAGE_SIZE) != 0)
+		prterr("pollute_eofpage: munmap");
+}
+
 /*
  * Helper to update the tracked file size. If the offset begins beyond current
  * EOF, zero the range from EOF to offset in the good buffer.
@@ -990,8 +1043,10 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
 void
 update_file_size(unsigned offset, unsigned size)
 {
-	if (offset > file_size)
+	if (offset > file_size) {
+		pollute_eofpage(offset + size);
 		memset(good_buf + file_size, '\0', offset - file_size);
+	}
 	file_size = offset + size;
 }
 
@@ -1143,6 +1198,9 @@ dotruncate(unsigned size)
 
 	log4(OP_TRUNCATE, 0, size, FL_NONE);
 
+	/* pollute the current EOF before a truncate down */
+	if (size < file_size)
+		pollute_eofpage(maxfilelen);
 	update_file_size(size, 0);
 
 	if (testcalls <= simulatedopcount)
@@ -1305,6 +1363,9 @@ do_collapse_range(unsigned offset, unsigned length)
 
 	log4(OP_COLLAPSE_RANGE, offset, length, FL_NONE);
 
+	/* pollute current eof before collapse truncates down */
+	pollute_eofpage(maxfilelen);
+
 	if (testcalls <= simulatedopcount)
 		return;
 
@@ -1356,6 +1417,9 @@ do_insert_range(unsigned offset, unsigned length)
 
 	log4(OP_INSERT_RANGE, offset, length, FL_NONE);
 
+	/* pollute current eof before insert truncates up */
+	pollute_eofpage(maxfilelen);
+
 	if (testcalls <= simulatedopcount)
 		return;
 
@@ -2385,6 +2449,7 @@ usage(void)
 	-b opnum: beginning operation number (default 1)\n\
 	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
 	-d: debug output for all operations\n\
+	-e: pollute post-eof on size changes (default 0)\n\
 	-f: flush and invalidate cache after I/O\n\
 	-g X: write character X instead of random generated data\n\
 	-i logdev: do integrity testing, logdev is the dm log writes device\n\
@@ -2783,7 +2848,7 @@ main(int argc, char **argv)
 	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
 
 	while ((ch = getopt_long(argc, argv,
-				 "0b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
+				 "0b:c:de:fg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
 				 longopts, NULL)) != EOF)
 		switch (ch) {
 		case 'b':
@@ -2805,6 +2870,11 @@ main(int argc, char **argv)
 		case 'd':
 			debug = 1;
 			break;
+		case 'e':
+			pollute_eof = getnum(optarg, &endp);
+			if (pollute_eof < 0 || pollute_eof > 1)
+				usage();
+			break;
 		case 'f':
 			flush = 1;
 			break;
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] generic: test to run fsx eof pollution
  2024-08-22 14:44 [PATCH 0/3] fstests/fsx: test coverage for eof zeroing Brian Foster
  2024-08-22 14:44 ` [PATCH 1/3] fsx: factor out a file size update helper Brian Foster
  2024-08-22 14:44 ` [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage Brian Foster
@ 2024-08-22 14:44 ` Brian Foster
  2024-08-22 20:54   ` Darrick J. Wong
  2 siblings, 1 reply; 11+ messages in thread
From: Brian Foster @ 2024-08-22 14:44 UTC (permalink / raw)
  To: fstests; +Cc: linux-xfs, djwong, josef, david

Filesystem regressions related to partial page zeroing can go
unnoticed for a decent amount of time. A recent example is the issue
of iomap zero range not handling dirty pagecache over unwritten
extents, which leads to wrong behavior on certain file extending
operations (i.e. truncate, write extension, etc.).

fsx does occasionally uncover these sorts of problems, but failures
can be rare and/or require longer running tests outside what is
typically run via full fstests regression runs. fsx now supports a
mode that injects post-eof data in order to explicitly test partial
eof zeroing behavior. This uncovers certain problems more quickly
and applies coverage more broadly across size changing operations.

Add a new test that runs an fsx instance (modeled after generic/127)
with eof pollution mode enabled. While the test is generic, it is
currently limited to XFS as that is currently the only known major
fs that does enough zeroing to satisfy the strict semantics expected
by fsx. The long term goal is to uncover and fix issues so more
filesystems can enable this test.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 tests/generic/362     | 27 +++++++++++++++++++++++++++
 tests/generic/362.out |  2 ++
 2 files changed, 29 insertions(+)
 create mode 100755 tests/generic/362
 create mode 100644 tests/generic/362.out

diff --git a/tests/generic/362 b/tests/generic/362
new file mode 100755
index 00000000..30870cd0
--- /dev/null
+++ b/tests/generic/362
@@ -0,0 +1,27 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Red Hat, Inc.  All Rights Reserved.
+#
+# FSQA Test No. 362
+#
+# Run fsx with EOF pollution enabled. This provides test coverage for partial
+# EOF page/block zeroing for operations that change file size.
+#
+
+. ./common/preamble
+_begin_fstest rw auto
+
+FSX_FILE_SIZE=262144
+# on failure, replace -q with -d to see post-eof writes in the dump output
+FSX_ARGS="-q -l $FSX_FILE_SIZE -e 1 -N 100000"
+
+_require_test
+
+# currently only xfs performs enough zeroing to satisfy fsx
+_supported_fs xfs
+
+ltp/fsx $FSX_ARGS $FSX_AVOID $TEST_DIR/fsx.$seq > $tmp.output 2>&1
+cat $tmp.output
+
+status=0
+exit
diff --git a/tests/generic/362.out b/tests/generic/362.out
new file mode 100644
index 00000000..7af6b96a
--- /dev/null
+++ b/tests/generic/362.out
@@ -0,0 +1,2 @@
+QA output created by 362
+All 100000 operations completed A-OK!
-- 
2.45.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] fsx: factor out a file size update helper
  2024-08-22 14:44 ` [PATCH 1/3] fsx: factor out a file size update helper Brian Foster
@ 2024-08-22 20:50   ` Darrick J. Wong
  2024-08-26 14:03     ` Brian Foster
  0 siblings, 1 reply; 11+ messages in thread
From: Darrick J. Wong @ 2024-08-22 20:50 UTC (permalink / raw)
  To: Brian Foster; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 10:44:20AM -0400, Brian Foster wrote:
> In preparation for support for eof page pollution, factor out a file
> size update helper. This updates the internally tracked file size
> based on the upcoming operation and zeroes the appropriate range in
> the good buffer for extending operations.
> 
> Note that a handful of callers currently make these updates after
> performing the associated operation. Order is not important to
> current behavior, but it will be for a follow on patch, so make
> those calls a bit earlier as well.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
>  ltp/fsx.c | 57 +++++++++++++++++++++++++------------------------------
>  1 file changed, 26 insertions(+), 31 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 2dc59b06..1389c51d 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -983,6 +983,17 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
>  	}
>  }
>  
> +/*
> + * Helper to update the tracked file size. If the offset begins beyond current
> + * EOF, zero the range from EOF to offset in the good buffer.
> + */
> +void
> +update_file_size(unsigned offset, unsigned size)
> +{
> +	if (offset > file_size)
> +		memset(good_buf + file_size, '\0', offset - file_size);
> +	file_size = offset + size;
> +}
>  
>  void
>  dowrite(unsigned offset, unsigned size)
> @@ -1003,10 +1014,8 @@ dowrite(unsigned offset, unsigned size)
>  	log4(OP_WRITE, offset, size, FL_NONE);
>  
>  	gendata(original_buf, good_buf, offset, size);
> -	if (file_size < offset + size) {
> -		if (file_size < offset)
> -			memset(good_buf + file_size, '\0', offset - file_size);
> -		file_size = offset + size;
> +	if (offset + size > file_size) {
> +		update_file_size(offset, size);
>  		if (lite) {
>  			warn("Lite file size bug in fsx!");
>  			report_failure(149);
> @@ -1070,10 +1079,8 @@ domapwrite(unsigned offset, unsigned size)
>  	log4(OP_MAPWRITE, offset, size, FL_NONE);
>  
>  	gendata(original_buf, good_buf, offset, size);
> -	if (file_size < offset + size) {
> -		if (file_size < offset)
> -			memset(good_buf + file_size, '\0', offset - file_size);
> -		file_size = offset + size;
> +	if (offset + size > file_size) {
> +		update_file_size(offset, size);
>  		if (lite) {
>  			warn("Lite file size bug in fsx!");
>  			report_failure(200);
> @@ -1136,9 +1143,7 @@ dotruncate(unsigned size)
>  
>  	log4(OP_TRUNCATE, 0, size, FL_NONE);
>  
> -	if (size > file_size)
> -		memset(good_buf + file_size, '\0', size - file_size);
> -	file_size = size;
> +	update_file_size(size, 0);
>  
>  	if (testcalls <= simulatedopcount)
>  		return;
> @@ -1247,6 +1252,9 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
>  	log4(OP_ZERO_RANGE, offset, length,
>  	     keep_size ? FL_KEEP_SIZE : FL_NONE);
>  
> +	if (end_offset > file_size)
> +		update_file_size(offset, length);
> +
>  	if (testcalls <= simulatedopcount)
>  		return;

Don't we only want to do the goodbuf zeroing if we don't bail out due to
the (testcalls <= simulatedopcount) logic?  Same question for
do_clone_range and do_copy_range.

/me reads the second patch but doesn't quite get it. :/

Are you doing this to mirror what the kernel does?  A comment here to
explain why we're doing this differently would help me.

--D

>  
> @@ -1263,17 +1271,6 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
>  	}
>  
>  	memset(good_buf + offset, '\0', length);
> -
> -	if (!keep_size && end_offset > file_size) {
> -		/*
> -		 * If there's a gap between the old file size and the offset of
> -		 * the zero range operation, fill the gap with zeroes.
> -		 */
> -		if (offset > file_size)
> -			memset(good_buf + file_size, '\0', offset - file_size);
> -
> -		file_size = end_offset;
> -	}
>  }
>  
>  #else
> @@ -1538,6 +1535,9 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
>  
>  	log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
>  
> +	if (dest + length > file_size)
> +		update_file_size(dest, length);
> +
>  	if (testcalls <= simulatedopcount)
>  		return;
>  
> @@ -1556,10 +1556,6 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
>  	}
>  
>  	memcpy(good_buf + dest, good_buf + offset, length);
> -	if (dest > file_size)
> -		memset(good_buf + file_size, '\0', dest - file_size);
> -	if (dest + length > file_size)
> -		file_size = dest + length;
>  }
>  
>  #else
> @@ -1756,6 +1752,9 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
>  
>  	log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
>  
> +	if (dest + length > file_size)
> +		update_file_size(dest, length);
> +
>  	if (testcalls <= simulatedopcount)
>  		return;
>  
> @@ -1792,10 +1791,6 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
>  	}
>  
>  	memcpy(good_buf + dest, good_buf + offset, length);
> -	if (dest > file_size)
> -		memset(good_buf + file_size, '\0', dest - file_size);
> -	if (dest + length > file_size)
> -		file_size = dest + length;
>  }
>  
>  #else
> @@ -1846,7 +1841,7 @@ do_preallocate(unsigned offset, unsigned length, int keep_size)
>  
>  	if (end_offset > file_size) {
>  		memset(good_buf + file_size, '\0', end_offset - file_size);
> -		file_size = end_offset;
> +		update_file_size(offset, length);
>  	}
>  
>  	if (testcalls <= simulatedopcount)
> -- 
> 2.45.0
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage
  2024-08-22 14:44 ` [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage Brian Foster
@ 2024-08-22 20:52   ` Darrick J. Wong
  2024-08-26 14:04     ` Brian Foster
  0 siblings, 1 reply; 11+ messages in thread
From: Darrick J. Wong @ 2024-08-22 20:52 UTC (permalink / raw)
  To: Brian Foster; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 10:44:21AM -0400, Brian Foster wrote:
> File ranges that are newly exposed via size changing operations are
> expected to return zeroes until written to. This behavior tends to
> be difficult to regression test as failures can be racy and
> transient. fsx is probably the best tool for this type of test
> coverage, but uncovering issues can require running for a
> significantly longer period of time than is typically invoked
> through fstests tests. As a result, these types of regressions tend
> to go unnoticed for an unfortunate amount of time.
> 
> To facilitate uncovering these problems more quickly, implement an
> eof pollution mode in fsx that opportunistically injects post-eof
> data prior to operations that change file size. Since data injection
> occurs immediately before the size changing operation, it can be
> used to detect problems in partial eof page/block zeroing associated
> with each relevant operation.
> 
> The implementation takes advantage of the fact that mapped writes
> can place data beyond eof so long as the page starts within eof. The
> main reason for the isolated per-operation approach (vs. something
> like allowing mapped writes to write beyond eof, for example) is to
> accommodate the fact that writeback zeroes post-eof data on the eof
> page. The current approach is therefore not necessarily guaranteed
> to detect all problems, but provides more generic and broad test
> coverage than the alternative of testing explicit command sequences
> and doesn't require significant changes to how fsx works. If this
> proves useful long term, further enhancements can be considered that
> might facilitate the presence of post-eof data across operations.
> 
> Enable the feature with the -e command line option. It is disabled
> by default because zeroing behavior is inconsistent across
> filesystems. This can also be revisited in the future if zeroing
> behavior is refined for the major filesystems that rely on fstests
> for regression testing.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
>  ltp/fsx.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 1389c51d..20b8cd9f 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -178,6 +178,7 @@ int	dedupe_range_calls = 1;		/* -B flag disables */
>  int	copy_range_calls = 1;		/* -E flag disables */
>  int	exchange_range_calls = 1;	/* -0 flag disables */
>  int	integrity = 0;			/* -i flag */
> +int	pollute_eof = 0;		/* -e flag */
>  int	fsxgoodfd = 0;
>  int	o_direct;			/* -Z */
>  int	aio = 0;
> @@ -983,6 +984,58 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
>  	}
>  }
>  
> +/*
> + * Pollute the EOF page with data beyond EOF prior to size change operations.
> + * This provides additional test coverage for partial EOF block/page zeroing.
> + * If the upcoming operation does not correctly zero, incorrect file data will
> + * be detected.
> + */
> +void
> +pollute_eofpage(unsigned int maxoff)
> +{
> +	unsigned offset = file_size;
> +	unsigned pg_offset;
> +	unsigned write_size;
> +	char    *p;
> +
> +	if (!pollute_eof)
> +		return;
> +
> +	/* write up to specified max or the end of the eof page */
> +	pg_offset = offset & mmap_mask;
> +	write_size = MIN(PAGE_SIZE - pg_offset, maxoff - offset);
> +
> +	if (!pg_offset)
> +		return;
> +
> +	if (!quiet &&
> +	    ((progressinterval && testcalls % progressinterval == 0) ||
> +	    (debug &&
> +	     (monitorstart == -1 ||
> +	     (offset + write_size > monitorstart &&
> +	      (monitorend == -1 || offset <= monitorend)))))) {
> +		prt("%lld pollute_eof\t0x%x thru\t0x%x\t(0x%x bytes)\n",
> +			testcalls, offset, offset + write_size - 1, write_size);
> +	}
> +
> +	if ((p = (char *)mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
> +			      MAP_FILE | MAP_SHARED, fd,
> +			      (off_t)(offset - pg_offset))) == (char *)-1) {

Nit:

if (mmap(...) == MAP_FAILED)?

Otherwise I like the concept here. :)

--D

> +		prterr("pollute_eofpage: mmap");
> +		return;
> +	}
> +
> +	/*
> +	 * Write to a range just past EOF of the test file. Do not update the
> +	 * good buffer because the upcoming operation is expected to zero this
> +	 * range of the file.
> +	 */
> +	gendata(original_buf, p, pg_offset, write_size);
> +
> +	if (munmap(p, PAGE_SIZE) != 0)
> +		prterr("pollute_eofpage: munmap");
> +}
> +
>  /*
>   * Helper to update the tracked file size. If the offset begins beyond current
>   * EOF, zero the range from EOF to offset in the good buffer.
> @@ -990,8 +1043,10 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
>  void
>  update_file_size(unsigned offset, unsigned size)
>  {
> -	if (offset > file_size)
> +	if (offset > file_size) {
> +		pollute_eofpage(offset + size);
>  		memset(good_buf + file_size, '\0', offset - file_size);
> +	}
>  	file_size = offset + size;
>  }
>  
> @@ -1143,6 +1198,9 @@ dotruncate(unsigned size)
>  
>  	log4(OP_TRUNCATE, 0, size, FL_NONE);
>  
> +	/* pollute the current EOF before a truncate down */
> +	if (size < file_size)
> +		pollute_eofpage(maxfilelen);
>  	update_file_size(size, 0);
>  
>  	if (testcalls <= simulatedopcount)
> @@ -1305,6 +1363,9 @@ do_collapse_range(unsigned offset, unsigned length)
>  
>  	log4(OP_COLLAPSE_RANGE, offset, length, FL_NONE);
>  
> +	/* pollute current eof before collapse truncates down */
> +	pollute_eofpage(maxfilelen);
> +
>  	if (testcalls <= simulatedopcount)
>  		return;
>  
> @@ -1356,6 +1417,9 @@ do_insert_range(unsigned offset, unsigned length)
>  
>  	log4(OP_INSERT_RANGE, offset, length, FL_NONE);
>  
> +	/* pollute current eof before insert truncates up */
> +	pollute_eofpage(maxfilelen);
> +
>  	if (testcalls <= simulatedopcount)
>  		return;
>  
> @@ -2385,6 +2449,7 @@ usage(void)
>  	-b opnum: beginning operation number (default 1)\n\
>  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
>  	-d: debug output for all operations\n\
> +	-e: pollute post-eof on size changes (default 0)\n\
>  	-f: flush and invalidate cache after I/O\n\
>  	-g X: write character X instead of random generated data\n\
>  	-i logdev: do integrity testing, logdev is the dm log writes device\n\
> @@ -2783,7 +2848,7 @@ main(int argc, char **argv)
>  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
>  
>  	while ((ch = getopt_long(argc, argv,
> -				 "0b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> +				 "0b:c:de:fg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
>  				 longopts, NULL)) != EOF)
>  		switch (ch) {
>  		case 'b':
> @@ -2805,6 +2870,11 @@ main(int argc, char **argv)
>  		case 'd':
>  			debug = 1;
>  			break;
> +		case 'e':
> +			pollute_eof = getnum(optarg, &endp);
> +			if (pollute_eof < 0 || pollute_eof > 1)
> +				usage();
> +			break;
>  		case 'f':
>  			flush = 1;
>  			break;
> -- 
> 2.45.0
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] generic: test to run fsx eof pollution
  2024-08-22 14:44 ` [PATCH 3/3] generic: test to run fsx eof pollution Brian Foster
@ 2024-08-22 20:54   ` Darrick J. Wong
  2024-08-26 14:07     ` Brian Foster
  0 siblings, 1 reply; 11+ messages in thread
From: Darrick J. Wong @ 2024-08-22 20:54 UTC (permalink / raw)
  To: Brian Foster; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 10:44:22AM -0400, Brian Foster wrote:
> Filesystem regressions related to partial page zeroing can go
> unnoticed for a decent amount of time. A recent example is the issue
> of iomap zero range not handling dirty pagecache over unwritten
> extents, which leads to wrong behavior on certain file extending
> operations (i.e. truncate, write extension, etc.).
> 
> fsx does occasionally uncover these sorts of problems, but failures
> can be rare and/or require longer running tests outside what is
> typically run via full fstests regression runs. fsx now supports a
> mode that injects post-eof data in order to explicitly test partial
> eof zeroing behavior. This uncovers certain problems more quickly
> and applies coverage more broadly across size changing operations.
> 
> Add a new test that runs an fsx instance (modeled after generic/127)
> with eof pollution mode enabled. While the test is generic, it is
> currently limited to XFS as that is currently the only known major
> fs that does enough zeroing to satisfy the strict semantics expected
> by fsx. The long term goal is to uncover and fix issues so more
> filesystems can enable this test.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
>  tests/generic/362     | 27 +++++++++++++++++++++++++++
>  tests/generic/362.out |  2 ++
>  2 files changed, 29 insertions(+)
>  create mode 100755 tests/generic/362
>  create mode 100644 tests/generic/362.out
> 
> diff --git a/tests/generic/362 b/tests/generic/362
> new file mode 100755
> index 00000000..30870cd0
> --- /dev/null
> +++ b/tests/generic/362
> @@ -0,0 +1,27 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2024 Red Hat, Inc.  All Rights Reserved.
> +#
> +# FSQA Test No. 362
> +#
> +# Run fsx with EOF pollution enabled. This provides test coverage for partial
> +# EOF page/block zeroing for operations that change file size.
> +#
> +
> +. ./common/preamble
> +_begin_fstest rw auto
> +
> +FSX_FILE_SIZE=262144
> +# on failure, replace -q with -d to see post-eof writes in the dump output
> +FSX_ARGS="-q -l $FSX_FILE_SIZE -e 1 -N 100000"
> +
> +_require_test
> +
> +# currently only xfs performs enough zeroing to satisfy fsx
> +_supported_fs xfs

Should get rid of this. ;)

> +ltp/fsx $FSX_ARGS $FSX_AVOID $TEST_DIR/fsx.$seq > $tmp.output 2>&1

I wonder, is there a reason not to use run_fsx from common/rc?

Otherwise this looks ok to me.

--D

> +cat $tmp.output
> +
> +status=0
> +exit
> diff --git a/tests/generic/362.out b/tests/generic/362.out
> new file mode 100644
> index 00000000..7af6b96a
> --- /dev/null
> +++ b/tests/generic/362.out
> @@ -0,0 +1,2 @@
> +QA output created by 362
> +All 100000 operations completed A-OK!
> -- 
> 2.45.0
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] fsx: factor out a file size update helper
  2024-08-22 20:50   ` Darrick J. Wong
@ 2024-08-26 14:03     ` Brian Foster
  2024-08-26 17:10       ` Brian Foster
  0 siblings, 1 reply; 11+ messages in thread
From: Brian Foster @ 2024-08-26 14:03 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 01:50:19PM -0700, Darrick J. Wong wrote:
> On Thu, Aug 22, 2024 at 10:44:20AM -0400, Brian Foster wrote:
> > In preparation for support for eof page pollution, factor out a file
> > size update helper. This updates the internally tracked file size
> > based on the upcoming operation and zeroes the appropriate range in
> > the good buffer for extending operations.
> > 
> > Note that a handful of callers currently make these updates after
> > performing the associated operation. Order is not important to
> > current behavior, but it will be for a follow on patch, so make
> > those calls a bit earlier as well.
> > 
> > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > ---
> >  ltp/fsx.c | 57 +++++++++++++++++++++++++------------------------------
> >  1 file changed, 26 insertions(+), 31 deletions(-)
> > 
> > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > index 2dc59b06..1389c51d 100644
> > --- a/ltp/fsx.c
> > +++ b/ltp/fsx.c
> > @@ -983,6 +983,17 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
> >  	}
> >  }
> >  
> > +/*
> > + * Helper to update the tracked file size. If the offset begins beyond current
> > + * EOF, zero the range from EOF to offset in the good buffer.
> > + */
> > +void
> > +update_file_size(unsigned offset, unsigned size)
> > +{
> > +	if (offset > file_size)
> > +		memset(good_buf + file_size, '\0', offset - file_size);
> > +	file_size = offset + size;
> > +}
> >  
> >  void
> >  dowrite(unsigned offset, unsigned size)
> > @@ -1003,10 +1014,8 @@ dowrite(unsigned offset, unsigned size)
> >  	log4(OP_WRITE, offset, size, FL_NONE);
> >  
> >  	gendata(original_buf, good_buf, offset, size);
> > -	if (file_size < offset + size) {
> > -		if (file_size < offset)
> > -			memset(good_buf + file_size, '\0', offset - file_size);
> > -		file_size = offset + size;
> > +	if (offset + size > file_size) {
> > +		update_file_size(offset, size);
> >  		if (lite) {
> >  			warn("Lite file size bug in fsx!");
> >  			report_failure(149);
> > @@ -1070,10 +1079,8 @@ domapwrite(unsigned offset, unsigned size)
> >  	log4(OP_MAPWRITE, offset, size, FL_NONE);
> >  
> >  	gendata(original_buf, good_buf, offset, size);
> > -	if (file_size < offset + size) {
> > -		if (file_size < offset)
> > -			memset(good_buf + file_size, '\0', offset - file_size);
> > -		file_size = offset + size;
> > +	if (offset + size > file_size) {
> > +		update_file_size(offset, size);
> >  		if (lite) {
> >  			warn("Lite file size bug in fsx!");
> >  			report_failure(200);
> > @@ -1136,9 +1143,7 @@ dotruncate(unsigned size)
> >  
> >  	log4(OP_TRUNCATE, 0, size, FL_NONE);
> >  
> > -	if (size > file_size)
> > -		memset(good_buf + file_size, '\0', size - file_size);
> > -	file_size = size;
> > +	update_file_size(size, 0);
> >  
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> > @@ -1247,6 +1252,9 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
> >  	log4(OP_ZERO_RANGE, offset, length,
> >  	     keep_size ? FL_KEEP_SIZE : FL_NONE);
> >  
> > +	if (end_offset > file_size)
> > +		update_file_size(offset, length);
> > +
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> 
> Don't we only want to do the goodbuf zeroing if we don't bail out due to
> the (testcalls <= simulatedopcount) logic?  Same question for
> do_clone_range and do_copy_range.
> 

Hm, good question. It's quite possible I busted something there. For
some reason I was thinking this would all be a no-op for that mode but I
hadn't quite thought it through (or tested it).

It looks like this is for the -b "beginning operation number" support,
so you can start a test from somewhere beyond operation 0 of a given
seed/sequence. As such it appears to make changes to incore state and
then write the goodbuf out to the file and truncate it as a final step
before proper operation begins.

Looking at how some of the current operations are handled, I think the
size-related goodbuf zeroing is Ok because we'd expect that part of the
file to remain zeroed, after a truncate up, etc., for example. In fact,
I'm wondering if the current zero range behavior is technically wrong
because if a zero range were to extend the file during simulation, we
don't actually update the in-core state as expected. This might actually
be worth factoring out into a bug fix patch with proper explanation.

WRT the eof pollution behavior during simulation, I'm kind of on the
fence. It's arguably wrong because we're not running the kernel
operations and so there's nothing to verify, but OTOH perhaps we should
be doing the zeroing associated with size changes in-core that should
wipe out the eof pollution. Then again, nothing really verifies this and
if something fails, we end up writing that data out to the test file.
None of that is really the purpose of this mechanism, so I'm leaning
towards calling it wrong and making the following tweaks:

1. Split out another bug fix patch to do size related updates (including
zeroing) consistently during simulation.

2. Update pollute_eofpage() in the next patch to skip out when testcalls
<= simulatedopcount (with some commenting).

Let me know if you think any of that doesn't make sense.

> /me reads the second patch but doesn't quite get it. :/
> 

The zeroing tests I was using were basically manual test sequences to do
things like this:

	xfs_io -fc "truncate 2k" -c "falloc -k 0 4k" ... -c "mwrite 0 4k" \
		-c "truncate 8k" <file>

... which itentionally writes beyond EOF before a truncate up operation
to test whether the zero range in the truncate path DTRT with a dirty
folio over an unwritten block. In a nutshell, I'm just trying to
genericize that sort of test a bit more by doing similar post-eof
mwrites opportunistically in fsx in operations that should be expected
to do similar zeroing in-kernel.

I.e., replace the "truncate 8k" above with any size extending operation
that starts beyond EOF such that we should expect the range from
2k-<endofpage> to be zeroed. Does that explain things better?

> Are you doing this to mirror what the kernel does?  A comment here to
> explain why we're doing this differently would help me.
> 

Yeah, sort of... it's more like writing a canary value to a small range
of the file but rather than expecting it to remain unmodified, we expect
it to be zeroed by the upcoming operation. Therefore we don't write the
garbage data to goodbuf, because goodbuf should contain zeroes and we
want to detect a data mismatch on subsequent reads if the kernel didn't
do the expected zeroing.

Brian

> --D
> 
> >  
> > @@ -1263,17 +1271,6 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
> >  	}
> >  
> >  	memset(good_buf + offset, '\0', length);
> > -
> > -	if (!keep_size && end_offset > file_size) {
> > -		/*
> > -		 * If there's a gap between the old file size and the offset of
> > -		 * the zero range operation, fill the gap with zeroes.
> > -		 */
> > -		if (offset > file_size)
> > -			memset(good_buf + file_size, '\0', offset - file_size);
> > -
> > -		file_size = end_offset;
> > -	}
> >  }
> >  
> >  #else
> > @@ -1538,6 +1535,9 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
> >  
> >  	log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
> >  
> > +	if (dest + length > file_size)
> > +		update_file_size(dest, length);
> > +
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> >  
> > @@ -1556,10 +1556,6 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
> >  	}
> >  
> >  	memcpy(good_buf + dest, good_buf + offset, length);
> > -	if (dest > file_size)
> > -		memset(good_buf + file_size, '\0', dest - file_size);
> > -	if (dest + length > file_size)
> > -		file_size = dest + length;
> >  }
> >  
> >  #else
> > @@ -1756,6 +1752,9 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
> >  
> >  	log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
> >  
> > +	if (dest + length > file_size)
> > +		update_file_size(dest, length);
> > +
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> >  
> > @@ -1792,10 +1791,6 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
> >  	}
> >  
> >  	memcpy(good_buf + dest, good_buf + offset, length);
> > -	if (dest > file_size)
> > -		memset(good_buf + file_size, '\0', dest - file_size);
> > -	if (dest + length > file_size)
> > -		file_size = dest + length;
> >  }
> >  
> >  #else
> > @@ -1846,7 +1841,7 @@ do_preallocate(unsigned offset, unsigned length, int keep_size)
> >  
> >  	if (end_offset > file_size) {
> >  		memset(good_buf + file_size, '\0', end_offset - file_size);
> > -		file_size = end_offset;
> > +		update_file_size(offset, length);
> >  	}
> >  
> >  	if (testcalls <= simulatedopcount)
> > -- 
> > 2.45.0
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage
  2024-08-22 20:52   ` Darrick J. Wong
@ 2024-08-26 14:04     ` Brian Foster
  0 siblings, 0 replies; 11+ messages in thread
From: Brian Foster @ 2024-08-26 14:04 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 01:52:57PM -0700, Darrick J. Wong wrote:
> On Thu, Aug 22, 2024 at 10:44:21AM -0400, Brian Foster wrote:
> > File ranges that are newly exposed via size changing operations are
> > expected to return zeroes until written to. This behavior tends to
> > be difficult to regression test as failures can be racy and
> > transient. fsx is probably the best tool for this type of test
> > coverage, but uncovering issues can require running for a
> > significantly longer period of time than is typically invoked
> > through fstests tests. As a result, these types of regressions tend
> > to go unnoticed for an unfortunate amount of time.
> > 
> > To facilitate uncovering these problems more quickly, implement an
> > eof pollution mode in fsx that opportunistically injects post-eof
> > data prior to operations that change file size. Since data injection
> > occurs immediately before the size changing operation, it can be
> > used to detect problems in partial eof page/block zeroing associated
> > with each relevant operation.
> > 
> > The implementation takes advantage of the fact that mapped writes
> > can place data beyond eof so long as the page starts within eof. The
> > main reason for the isolated per-operation approach (vs. something
> > like allowing mapped writes to write beyond eof, for example) is to
> > accommodate the fact that writeback zeroes post-eof data on the eof
> > page. The current approach is therefore not necessarily guaranteed
> > to detect all problems, but provides more generic and broad test
> > coverage than the alternative of testing explicit command sequences
> > and doesn't require significant changes to how fsx works. If this
> > proves useful long term, further enhancements can be considered that
> > might facilitate the presence of post-eof data across operations.
> > 
> > Enable the feature with the -e command line option. It is disabled
> > by default because zeroing behavior is inconsistent across
> > filesystems. This can also be revisited in the future if zeroing
> > behavior is refined for the major filesystems that rely on fstests
> > for regression testing.
> > 
> > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > ---
> >  ltp/fsx.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 72 insertions(+), 2 deletions(-)
> > 
> > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > index 1389c51d..20b8cd9f 100644
> > --- a/ltp/fsx.c
> > +++ b/ltp/fsx.c
> > @@ -178,6 +178,7 @@ int	dedupe_range_calls = 1;		/* -B flag disables */
> >  int	copy_range_calls = 1;		/* -E flag disables */
> >  int	exchange_range_calls = 1;	/* -0 flag disables */
> >  int	integrity = 0;			/* -i flag */
> > +int	pollute_eof = 0;		/* -e flag */
> >  int	fsxgoodfd = 0;
> >  int	o_direct;			/* -Z */
> >  int	aio = 0;
> > @@ -983,6 +984,58 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
> >  	}
> >  }
> >  
> > +/*
> > + * Pollute the EOF page with data beyond EOF prior to size change operations.
> > + * This provides additional test coverage for partial EOF block/page zeroing.
> > + * If the upcoming operation does not correctly zero, incorrect file data will
> > + * be detected.
> > + */
> > +void
> > +pollute_eofpage(unsigned int maxoff)
> > +{
> > +	unsigned offset = file_size;
> > +	unsigned pg_offset;
> > +	unsigned write_size;
> > +	char    *p;
> > +
> > +	if (!pollute_eof)
> > +		return;
> > +
> > +	/* write up to specified max or the end of the eof page */
> > +	pg_offset = offset & mmap_mask;
> > +	write_size = MIN(PAGE_SIZE - pg_offset, maxoff - offset);
> > +
> > +	if (!pg_offset)
> > +		return;
> > +
> > +	if (!quiet &&
> > +	    ((progressinterval && testcalls % progressinterval == 0) ||
> > +	    (debug &&
> > +	     (monitorstart == -1 ||
> > +	     (offset + write_size > monitorstart &&
> > +	      (monitorend == -1 || offset <= monitorend)))))) {
> > +		prt("%lld pollute_eof\t0x%x thru\t0x%x\t(0x%x bytes)\n",
> > +			testcalls, offset, offset + write_size - 1, write_size);
> > +	}
> > +
> > +	if ((p = (char *)mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
> > +			      MAP_FILE | MAP_SHARED, fd,
> > +			      (off_t)(offset - pg_offset))) == (char *)-1) {
> 
> Nit:
> 
> if (mmap(...) == MAP_FAILED)?
> 
> Otherwise I like the concept here. :)
> 

Yep, copy paste fail. Will fix.

Brian

> --D
> 
> > +		prterr("pollute_eofpage: mmap");
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Write to a range just past EOF of the test file. Do not update the
> > +	 * good buffer because the upcoming operation is expected to zero this
> > +	 * range of the file.
> > +	 */
> > +	gendata(original_buf, p, pg_offset, write_size);
> > +
> > +	if (munmap(p, PAGE_SIZE) != 0)
> > +		prterr("pollute_eofpage: munmap");
> > +}
> > +
> >  /*
> >   * Helper to update the tracked file size. If the offset begins beyond current
> >   * EOF, zero the range from EOF to offset in the good buffer.
> > @@ -990,8 +1043,10 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
> >  void
> >  update_file_size(unsigned offset, unsigned size)
> >  {
> > -	if (offset > file_size)
> > +	if (offset > file_size) {
> > +		pollute_eofpage(offset + size);
> >  		memset(good_buf + file_size, '\0', offset - file_size);
> > +	}
> >  	file_size = offset + size;
> >  }
> >  
> > @@ -1143,6 +1198,9 @@ dotruncate(unsigned size)
> >  
> >  	log4(OP_TRUNCATE, 0, size, FL_NONE);
> >  
> > +	/* pollute the current EOF before a truncate down */
> > +	if (size < file_size)
> > +		pollute_eofpage(maxfilelen);
> >  	update_file_size(size, 0);
> >  
> >  	if (testcalls <= simulatedopcount)
> > @@ -1305,6 +1363,9 @@ do_collapse_range(unsigned offset, unsigned length)
> >  
> >  	log4(OP_COLLAPSE_RANGE, offset, length, FL_NONE);
> >  
> > +	/* pollute current eof before collapse truncates down */
> > +	pollute_eofpage(maxfilelen);
> > +
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> >  
> > @@ -1356,6 +1417,9 @@ do_insert_range(unsigned offset, unsigned length)
> >  
> >  	log4(OP_INSERT_RANGE, offset, length, FL_NONE);
> >  
> > +	/* pollute current eof before insert truncates up */
> > +	pollute_eofpage(maxfilelen);
> > +
> >  	if (testcalls <= simulatedopcount)
> >  		return;
> >  
> > @@ -2385,6 +2449,7 @@ usage(void)
> >  	-b opnum: beginning operation number (default 1)\n\
> >  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
> >  	-d: debug output for all operations\n\
> > +	-e: pollute post-eof on size changes (default 0)\n\
> >  	-f: flush and invalidate cache after I/O\n\
> >  	-g X: write character X instead of random generated data\n\
> >  	-i logdev: do integrity testing, logdev is the dm log writes device\n\
> > @@ -2783,7 +2848,7 @@ main(int argc, char **argv)
> >  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
> >  
> >  	while ((ch = getopt_long(argc, argv,
> > -				 "0b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> > +				 "0b:c:de:fg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> >  				 longopts, NULL)) != EOF)
> >  		switch (ch) {
> >  		case 'b':
> > @@ -2805,6 +2870,11 @@ main(int argc, char **argv)
> >  		case 'd':
> >  			debug = 1;
> >  			break;
> > +		case 'e':
> > +			pollute_eof = getnum(optarg, &endp);
> > +			if (pollute_eof < 0 || pollute_eof > 1)
> > +				usage();
> > +			break;
> >  		case 'f':
> >  			flush = 1;
> >  			break;
> > -- 
> > 2.45.0
> > 
> > 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] generic: test to run fsx eof pollution
  2024-08-22 20:54   ` Darrick J. Wong
@ 2024-08-26 14:07     ` Brian Foster
  0 siblings, 0 replies; 11+ messages in thread
From: Brian Foster @ 2024-08-26 14:07 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: fstests, linux-xfs, josef, david

On Thu, Aug 22, 2024 at 01:54:30PM -0700, Darrick J. Wong wrote:
> On Thu, Aug 22, 2024 at 10:44:22AM -0400, Brian Foster wrote:
> > Filesystem regressions related to partial page zeroing can go
> > unnoticed for a decent amount of time. A recent example is the issue
> > of iomap zero range not handling dirty pagecache over unwritten
> > extents, which leads to wrong behavior on certain file extending
> > operations (i.e. truncate, write extension, etc.).
> > 
> > fsx does occasionally uncover these sorts of problems, but failures
> > can be rare and/or require longer running tests outside what is
> > typically run via full fstests regression runs. fsx now supports a
> > mode that injects post-eof data in order to explicitly test partial
> > eof zeroing behavior. This uncovers certain problems more quickly
> > and applies coverage more broadly across size changing operations.
> > 
> > Add a new test that runs an fsx instance (modeled after generic/127)
> > with eof pollution mode enabled. While the test is generic, it is
> > currently limited to XFS as that is currently the only known major
> > fs that does enough zeroing to satisfy the strict semantics expected
> > by fsx. The long term goal is to uncover and fix issues so more
> > filesystems can enable this test.
> > 
> > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > ---
> >  tests/generic/362     | 27 +++++++++++++++++++++++++++
> >  tests/generic/362.out |  2 ++
> >  2 files changed, 29 insertions(+)
> >  create mode 100755 tests/generic/362
> >  create mode 100644 tests/generic/362.out
> > 
> > diff --git a/tests/generic/362 b/tests/generic/362
> > new file mode 100755
> > index 00000000..30870cd0
> > --- /dev/null
> > +++ b/tests/generic/362
> > @@ -0,0 +1,27 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2024 Red Hat, Inc.  All Rights Reserved.
> > +#
> > +# FSQA Test No. 362
> > +#
> > +# Run fsx with EOF pollution enabled. This provides test coverage for partial
> > +# EOF page/block zeroing for operations that change file size.
> > +#
> > +
> > +. ./common/preamble
> > +_begin_fstest rw auto
> > +
> > +FSX_FILE_SIZE=262144
> > +# on failure, replace -q with -d to see post-eof writes in the dump output
> > +FSX_ARGS="-q -l $FSX_FILE_SIZE -e 1 -N 100000"
> > +
> > +_require_test
> > +
> > +# currently only xfs performs enough zeroing to satisfy fsx
> > +_supported_fs xfs
> 
> Should get rid of this. ;)
> 

Agreed. But just to be clear, I was planning to leave this in for now so
we don't have to go and fix the various other filesystem issues
(assuming various maintainers agree) as a gate to fixing iomap zeroing
and having decent test coverage. I'm planning to look into the other
failures once I get through the iomap revalidation thing to avoid the
flush on XFS.

> > +ltp/fsx $FSX_ARGS $FSX_AVOID $TEST_DIR/fsx.$seq > $tmp.output 2>&1
> 
> I wonder, is there a reason not to use run_fsx from common/rc?
> 

At first glance the only thing that stands out to me is run_fsx
hardcodes the file to $TEST_DIR/junk instead of using and leaving around
a test-specific file. I find that ever so slightly annoying, but not
enough to hardcode the fsx command in the test, so I'll make that
change.

My initial hope was to be able to just turn on the eof pollution thing
by default and rely on existing fstests to (which I've confirmed do)
detect the iomap zeroing problems, but the other non-iomap fs failures
mean we can't do that without being disruptive and hence the need for
the custom fstest. If we can get to the point of completely removing the
_supported_fs check above, then perhaps we could also revisit changing
the fsx default and removing this test.

> Otherwise this looks ok to me.
> 

Thanks for the comments.

Brian

> --D
> 
> > +cat $tmp.output
> > +
> > +status=0
> > +exit
> > diff --git a/tests/generic/362.out b/tests/generic/362.out
> > new file mode 100644
> > index 00000000..7af6b96a
> > --- /dev/null
> > +++ b/tests/generic/362.out
> > @@ -0,0 +1,2 @@
> > +QA output created by 362
> > +All 100000 operations completed A-OK!
> > -- 
> > 2.45.0
> > 
> > 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] fsx: factor out a file size update helper
  2024-08-26 14:03     ` Brian Foster
@ 2024-08-26 17:10       ` Brian Foster
  0 siblings, 0 replies; 11+ messages in thread
From: Brian Foster @ 2024-08-26 17:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: fstests, linux-xfs, josef, david

On Mon, Aug 26, 2024 at 10:03:54AM -0400, Brian Foster wrote:
> On Thu, Aug 22, 2024 at 01:50:19PM -0700, Darrick J. Wong wrote:
> > On Thu, Aug 22, 2024 at 10:44:20AM -0400, Brian Foster wrote:
> > > In preparation for support for eof page pollution, factor out a file
> > > size update helper. This updates the internally tracked file size
> > > based on the upcoming operation and zeroes the appropriate range in
> > > the good buffer for extending operations.
> > > 
> > > Note that a handful of callers currently make these updates after
> > > performing the associated operation. Order is not important to
> > > current behavior, but it will be for a follow on patch, so make
> > > those calls a bit earlier as well.
> > > 
> > > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > > ---
> > >  ltp/fsx.c | 57 +++++++++++++++++++++++++------------------------------
> > >  1 file changed, 26 insertions(+), 31 deletions(-)
> > > 
> > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > index 2dc59b06..1389c51d 100644
> > > --- a/ltp/fsx.c
> > > +++ b/ltp/fsx.c
> > > @@ -983,6 +983,17 @@ gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
> > >  	}
> > >  }
> > >  
> > > +/*
> > > + * Helper to update the tracked file size. If the offset begins beyond current
> > > + * EOF, zero the range from EOF to offset in the good buffer.
> > > + */
> > > +void
> > > +update_file_size(unsigned offset, unsigned size)
> > > +{
> > > +	if (offset > file_size)
> > > +		memset(good_buf + file_size, '\0', offset - file_size);
> > > +	file_size = offset + size;
> > > +}
> > >  
> > >  void
> > >  dowrite(unsigned offset, unsigned size)
> > > @@ -1003,10 +1014,8 @@ dowrite(unsigned offset, unsigned size)
> > >  	log4(OP_WRITE, offset, size, FL_NONE);
> > >  
> > >  	gendata(original_buf, good_buf, offset, size);
> > > -	if (file_size < offset + size) {
> > > -		if (file_size < offset)
> > > -			memset(good_buf + file_size, '\0', offset - file_size);
> > > -		file_size = offset + size;
> > > +	if (offset + size > file_size) {
> > > +		update_file_size(offset, size);
> > >  		if (lite) {
> > >  			warn("Lite file size bug in fsx!");
> > >  			report_failure(149);
> > > @@ -1070,10 +1079,8 @@ domapwrite(unsigned offset, unsigned size)
> > >  	log4(OP_MAPWRITE, offset, size, FL_NONE);
> > >  
> > >  	gendata(original_buf, good_buf, offset, size);
> > > -	if (file_size < offset + size) {
> > > -		if (file_size < offset)
> > > -			memset(good_buf + file_size, '\0', offset - file_size);
> > > -		file_size = offset + size;
> > > +	if (offset + size > file_size) {
> > > +		update_file_size(offset, size);
> > >  		if (lite) {
> > >  			warn("Lite file size bug in fsx!");
> > >  			report_failure(200);
> > > @@ -1136,9 +1143,7 @@ dotruncate(unsigned size)
> > >  
> > >  	log4(OP_TRUNCATE, 0, size, FL_NONE);
> > >  
> > > -	if (size > file_size)
> > > -		memset(good_buf + file_size, '\0', size - file_size);
> > > -	file_size = size;
> > > +	update_file_size(size, 0);
> > >  
> > >  	if (testcalls <= simulatedopcount)
> > >  		return;
> > > @@ -1247,6 +1252,9 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
> > >  	log4(OP_ZERO_RANGE, offset, length,
> > >  	     keep_size ? FL_KEEP_SIZE : FL_NONE);
> > >  
> > > +	if (end_offset > file_size)
> > > +		update_file_size(offset, length);
> > > +
> > >  	if (testcalls <= simulatedopcount)
> > >  		return;
> > 
> > Don't we only want to do the goodbuf zeroing if we don't bail out due to
> > the (testcalls <= simulatedopcount) logic?  Same question for
> > do_clone_range and do_copy_range.
> > 
> 
> Hm, good question. It's quite possible I busted something there. For
> some reason I was thinking this would all be a no-op for that mode but I
> hadn't quite thought it through (or tested it).
> 
> It looks like this is for the -b "beginning operation number" support,
> so you can start a test from somewhere beyond operation 0 of a given
> seed/sequence. As such it appears to make changes to incore state and
> then write the goodbuf out to the file and truncate it as a final step
> before proper operation begins.
> 
> Looking at how some of the current operations are handled, I think the
> size-related goodbuf zeroing is Ok because we'd expect that part of the
> file to remain zeroed, after a truncate up, etc., for example. In fact,
> I'm wondering if the current zero range behavior is technically wrong
> because if a zero range were to extend the file during simulation, we
> don't actually update the in-core state as expected. This might actually
> be worth factoring out into a bug fix patch with proper explanation.
> 
> WRT the eof pollution behavior during simulation, I'm kind of on the
> fence. It's arguably wrong because we're not running the kernel
> operations and so there's nothing to verify, but OTOH perhaps we should
> be doing the zeroing associated with size changes in-core that should
> wipe out the eof pollution. Then again, nothing really verifies this and
> if something fails, we end up writing that data out to the test file.
> None of that is really the purpose of this mechanism, so I'm leaning
> towards calling it wrong and making the following tweaks:
> 

Err.. I think you can disregard most of this reasoning. I got my wires
crossed between zeroing behavior and pollute_eofpage().
pollute_eofpage() updates the file only, so just blows up in simulation
mode and is clearly wrong. :) That makes things more simple, though I
think the plan below to disable it is the same...

Brian

> 1. Split out another bug fix patch to do size related updates (including
> zeroing) consistently during simulation.
> 
> 2. Update pollute_eofpage() in the next patch to skip out when testcalls
> <= simulatedopcount (with some commenting).
> 
> Let me know if you think any of that doesn't make sense.
> 
> > /me reads the second patch but doesn't quite get it. :/
> > 
> 
> The zeroing tests I was using were basically manual test sequences to do
> things like this:
> 
> 	xfs_io -fc "truncate 2k" -c "falloc -k 0 4k" ... -c "mwrite 0 4k" \
> 		-c "truncate 8k" <file>
> 
> ... which itentionally writes beyond EOF before a truncate up operation
> to test whether the zero range in the truncate path DTRT with a dirty
> folio over an unwritten block. In a nutshell, I'm just trying to
> genericize that sort of test a bit more by doing similar post-eof
> mwrites opportunistically in fsx in operations that should be expected
> to do similar zeroing in-kernel.
> 
> I.e., replace the "truncate 8k" above with any size extending operation
> that starts beyond EOF such that we should expect the range from
> 2k-<endofpage> to be zeroed. Does that explain things better?
> 
> > Are you doing this to mirror what the kernel does?  A comment here to
> > explain why we're doing this differently would help me.
> > 
> 
> Yeah, sort of... it's more like writing a canary value to a small range
> of the file but rather than expecting it to remain unmodified, we expect
> it to be zeroed by the upcoming operation. Therefore we don't write the
> garbage data to goodbuf, because goodbuf should contain zeroes and we
> want to detect a data mismatch on subsequent reads if the kernel didn't
> do the expected zeroing.
> 
> Brian
> 
> > --D
> > 
> > >  
> > > @@ -1263,17 +1271,6 @@ do_zero_range(unsigned offset, unsigned length, int keep_size)
> > >  	}
> > >  
> > >  	memset(good_buf + offset, '\0', length);
> > > -
> > > -	if (!keep_size && end_offset > file_size) {
> > > -		/*
> > > -		 * If there's a gap between the old file size and the offset of
> > > -		 * the zero range operation, fill the gap with zeroes.
> > > -		 */
> > > -		if (offset > file_size)
> > > -			memset(good_buf + file_size, '\0', offset - file_size);
> > > -
> > > -		file_size = end_offset;
> > > -	}
> > >  }
> > >  
> > >  #else
> > > @@ -1538,6 +1535,9 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
> > >  
> > >  	log5(OP_CLONE_RANGE, offset, length, dest, FL_NONE);
> > >  
> > > +	if (dest + length > file_size)
> > > +		update_file_size(dest, length);
> > > +
> > >  	if (testcalls <= simulatedopcount)
> > >  		return;
> > >  
> > > @@ -1556,10 +1556,6 @@ do_clone_range(unsigned offset, unsigned length, unsigned dest)
> > >  	}
> > >  
> > >  	memcpy(good_buf + dest, good_buf + offset, length);
> > > -	if (dest > file_size)
> > > -		memset(good_buf + file_size, '\0', dest - file_size);
> > > -	if (dest + length > file_size)
> > > -		file_size = dest + length;
> > >  }
> > >  
> > >  #else
> > > @@ -1756,6 +1752,9 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
> > >  
> > >  	log5(OP_COPY_RANGE, offset, length, dest, FL_NONE);
> > >  
> > > +	if (dest + length > file_size)
> > > +		update_file_size(dest, length);
> > > +
> > >  	if (testcalls <= simulatedopcount)
> > >  		return;
> > >  
> > > @@ -1792,10 +1791,6 @@ do_copy_range(unsigned offset, unsigned length, unsigned dest)
> > >  	}
> > >  
> > >  	memcpy(good_buf + dest, good_buf + offset, length);
> > > -	if (dest > file_size)
> > > -		memset(good_buf + file_size, '\0', dest - file_size);
> > > -	if (dest + length > file_size)
> > > -		file_size = dest + length;
> > >  }
> > >  
> > >  #else
> > > @@ -1846,7 +1841,7 @@ do_preallocate(unsigned offset, unsigned length, int keep_size)
> > >  
> > >  	if (end_offset > file_size) {
> > >  		memset(good_buf + file_size, '\0', end_offset - file_size);
> > > -		file_size = end_offset;
> > > +		update_file_size(offset, length);
> > >  	}
> > >  
> > >  	if (testcalls <= simulatedopcount)
> > > -- 
> > > 2.45.0
> > > 
> > > 
> > 
> 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-08-26 17:09 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-22 14:44 [PATCH 0/3] fstests/fsx: test coverage for eof zeroing Brian Foster
2024-08-22 14:44 ` [PATCH 1/3] fsx: factor out a file size update helper Brian Foster
2024-08-22 20:50   ` Darrick J. Wong
2024-08-26 14:03     ` Brian Foster
2024-08-26 17:10       ` Brian Foster
2024-08-22 14:44 ` [PATCH 2/3] fsx: support eof page pollution for eof zeroing test coverage Brian Foster
2024-08-22 20:52   ` Darrick J. Wong
2024-08-26 14:04     ` Brian Foster
2024-08-22 14:44 ` [PATCH 3/3] generic: test to run fsx eof pollution Brian Foster
2024-08-22 20:54   ` Darrick J. Wong
2024-08-26 14:07     ` Brian Foster

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox