public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] test extending sub-block AIO writes for races
@ 2015-09-30  1:03 Eric Sandeen
  2015-09-30 11:47 ` Brian Foster
  2015-09-30 15:59 ` [PATCH V2] " Eric Sandeen
  0 siblings, 2 replies; 6+ messages in thread
From: Eric Sandeen @ 2015-09-30  1:03 UTC (permalink / raw)
  To: fstests; +Cc: xfs

This tests bfoster's
[PATCH 1/2] xfs: always drain dio before extending aio write submission
patch; it launches four adjacent 1k IOs past EOF, then reads back
to see if we have 4k worth of the data we wrote, or something else - 
possibly zeros from sub-block zeroing and eof racing.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---


diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
new file mode 100644
index 0000000..c1ce695
--- /dev/null
+++ b/src/aio-dio-regress/aio-dio-eof-race.c
@@ -0,0 +1,170 @@
+/*
+ * Launch 4 file-extending extending sub-block AIOs and ensure that we
+ * don't see data corruption when they're all complete.
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include <libaio.h>
+
+#define BUF_SIZE	4096
+#define IO_PATTERN	0xab
+
+void
+dump_buffer(
+	void	*buf,
+	off64_t	offset,
+	ssize_t	len)
+{
+	int	i, j;
+	char	*p;
+	int	new;
+	
+	for (i = 0, p = (char *)buf; i < len; i += 16) {
+		char    *s = p;
+
+		if (i && !memcmp(p, p - 16, 16)) {
+			new = 0;
+		} else {
+			if (i)
+				printf("*\n");
+			new = 1;
+		}
+
+		if (!new) {
+			p += 16;
+			continue;
+		}
+
+		printf("%08llx  ", (unsigned long long)offset + i);
+		for (j = 0; j < 16 && i + j < len; j++, p++)
+			printf("%02x ", *p);
+		printf(" ");
+		for (j = 0; j < 16 && i + j < len; j++, s++) {
+			if (isalnum((int)*s))
+				printf("%c", *s);
+			else
+				printf(".");
+		}
+		printf("\n");
+
+	}
+	printf("%08llx\n", (unsigned long long)offset + i);
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_context *ctx = NULL;
+	struct io_event evs[4];
+	struct iocb iocb1, iocb2, iocb3, iocb4;
+	struct iocb *iocbs[] = { &iocb1, &iocb2, &iocb3, &iocb4 };
+	void *buf;
+	struct stat statbuf;
+	char cmp_buf[BUF_SIZE];
+	int fd, err = 0;
+	off_t eof;
+
+	fd = open(argv[1], O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
+	if (fd == -1) {
+		perror("open");
+		return 1;
+	}
+
+	err = posix_memalign(&buf, BUF_SIZE, BUF_SIZE);
+	if (err) {
+		fprintf(stderr, "error %s during %s\n",
+			strerror(err),
+			"posix_memalign");
+		return 1;
+	}
+	memset(cmp_buf, IO_PATTERN, BUF_SIZE);
+
+	err = io_setup(4, &ctx);
+	if (err) {
+		fprintf(stderr, "error %s during %s\n",
+			strerror(err),
+			"io_setup");
+		return 1;
+	}
+
+	eof = 0;
+
+	/* Keep extending until 8MB */
+	while (eof < 8 * 1024 * 1024) {
+		memset(buf, IO_PATTERN, BUF_SIZE);
+
+		fstat(fd, &statbuf);
+		eof = statbuf.st_size;
+
+		/*
+		 * 4 ios, racing to extend EOF, combined they write full BUF_SIZE
+		 */
+		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0 * BUF_SIZE/4);
+		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1 * BUF_SIZE/4);
+		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2 * BUF_SIZE/4);
+		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3 * BUF_SIZE/4);
+	
+		err = io_submit(ctx, 4, iocbs);
+		if (err != 4) {
+			fprintf(stderr, "error %s during %s\n",
+				strerror(err),
+				"io_submit");
+			return 1;
+		}
+
+		err = io_getevents(ctx, 4, 4, evs, NULL);
+		if (err != 4) {
+			fprintf(stderr, "error %s during %s\n",
+				strerror(err),
+				"io_getevents");
+			return 1;
+		}
+
+		/*
+		 * And then read it back.
+		 *
+		 * Using pread to keep it simple, but AIO has the same effect.
+		 *
+		 * eof is the old eof, we just wrote BUF_SIZE more
+		 */
+		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
+			perror("pread");
+			return 1;
+		}
+
+		/*
+		 * We launched 4 AIOs which, stiched together, should write
+		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block
+		 */
+		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
+			printf("corruption while extending from %ld\n", eof);
+			dump_buffer(buf, 0, BUF_SIZE);
+			return 1;
+		}
+	}
+
+	printf("Success, all done.\n");
+	return 0;
+}
diff --git a/tests/generic/326 b/tests/generic/326
new file mode 100755
index 0000000..7db04ac
--- /dev/null
+++ b/tests/generic/326
@@ -0,0 +1,64 @@
+#! /bin/bash
+# FS QA Test No. 326
+#
+# Test races while extending past EOF via sub-block AIO writes
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+    cd /
+    rm -f $TEST_DIR/tst-aio-dio-eof-race
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+_supported_fs generic
+_supported_os Linux
+
+_require_test
+_require_sparse_files
+_require_aiodio aio-dio-eof-race
+
+# Test does 512 byte DIO, so make sure that'll work
+logical_block_size=`_min_dio_alignment $TEST_DEV`
+
+if [ "$logical_block_size" -gt "512" ]; then
+	_notrun "device block size: $logical_block_size greater than 512"
+fi
+
+# If 512 isn't a sub-block IO, the test should still pass, so
+# let that go.
+
+# This test does several extending loops internally
+$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
+
+status=$?
+exit
diff --git a/tests/generic/326.out b/tests/generic/326.out
new file mode 100644
index 0000000..22a3e78
--- /dev/null
+++ b/tests/generic/326.out
@@ -0,0 +1,2 @@
+QA output created by 326
+Success, all done.
diff --git a/tests/generic/group b/tests/generic/group
index 4ae256f..a5f3008 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -207,3 +207,4 @@
 323 auto aio stress
 324 auto fsr quick
 325 auto quick data log
+326 auto quick aio


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] test extending sub-block AIO writes for races
  2015-09-30  1:03 [PATCH] test extending sub-block AIO writes for races Eric Sandeen
@ 2015-09-30 11:47 ` Brian Foster
  2015-09-30 12:53   ` Eric Sandeen
  2015-09-30 15:59 ` [PATCH V2] " Eric Sandeen
  1 sibling, 1 reply; 6+ messages in thread
From: Brian Foster @ 2015-09-30 11:47 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: fstests, xfs

On Tue, Sep 29, 2015 at 08:03:31PM -0500, Eric Sandeen wrote:
> This tests bfoster's
> [PATCH 1/2] xfs: always drain dio before extending aio write submission
> patch; it launches four adjacent 1k IOs past EOF, then reads back
> to see if we have 4k worth of the data we wrote, or something else - 
> possibly zeros from sub-block zeroing and eof racing.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 

Thanks for the test! A few high level comments...

> 
> diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
> new file mode 100644
> index 0000000..c1ce695
> --- /dev/null
> +++ b/src/aio-dio-regress/aio-dio-eof-race.c
> @@ -0,0 +1,170 @@
> +/*
> + * Launch 4 file-extending extending sub-block AIOs and ensure that we
> + * don't see data corruption when they're all complete.
> + *

It might be good to point out that this stresses EOF zeroing, which also
means a key point is not just file-extending I/O, but file-extending I/O
beyond current EOF (e.g., I/O that creates holes between the current EOF
and start of the new I/O).

> + * Copyright (C) 2015 Red Hat, Inc. All Rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> + */
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <ctype.h>
> +
> +#include <libaio.h>
> +
> +#define BUF_SIZE	4096

Have you considered making the buffer size variable? As is currently
written, I don't think this will reproduce the problem on 512b or 1024b
fsb filesystems because at least some of the file-extending I/Os must
not be block aligned for it to trigger.

> +#define IO_PATTERN	0xab
> +
> +void
> +dump_buffer(
> +	void	*buf,
> +	off64_t	offset,
> +	ssize_t	len)
> +{
> +	int	i, j;
> +	char	*p;
> +	int	new;
> +	
> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
> +		char    *s = p;
> +
> +		if (i && !memcmp(p, p - 16, 16)) {
> +			new = 0;
> +		} else {
> +			if (i)
> +				printf("*\n");
> +			new = 1;
> +		}
> +
> +		if (!new) {
> +			p += 16;
> +			continue;
> +		}
> +
> +		printf("%08llx  ", (unsigned long long)offset + i);
> +		for (j = 0; j < 16 && i + j < len; j++, p++)
> +			printf("%02x ", *p);
> +		printf(" ");
> +		for (j = 0; j < 16 && i + j < len; j++, s++) {
> +			if (isalnum((int)*s))
> +				printf("%c", *s);
> +			else
> +				printf(".");
> +		}
> +		printf("\n");
> +
> +	}
> +	printf("%08llx\n", (unsigned long long)offset + i);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct io_context *ctx = NULL;
> +	struct io_event evs[4];
> +	struct iocb iocb1, iocb2, iocb3, iocb4;
> +	struct iocb *iocbs[] = { &iocb1, &iocb2, &iocb3, &iocb4 };
> +	void *buf;
> +	struct stat statbuf;
> +	char cmp_buf[BUF_SIZE];
> +	int fd, err = 0;
> +	off_t eof;
> +
> +	fd = open(argv[1], O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
> +	if (fd == -1) {
> +		perror("open");
> +		return 1;
> +	}
> +
> +	err = posix_memalign(&buf, BUF_SIZE, BUF_SIZE);

The alignment should technically be PAGE_SIZE (even though BUF_SIZE ==
4096), right?

> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +			strerror(err),
> +			"posix_memalign");
> +		return 1;
> +	}
> +	memset(cmp_buf, IO_PATTERN, BUF_SIZE);
> +
> +	err = io_setup(4, &ctx);
> +	if (err) {
> +		fprintf(stderr, "error %s during %s\n",
> +			strerror(err),
> +			"io_setup");
> +		return 1;
> +	}
> +
> +	eof = 0;
> +
> +	/* Keep extending until 8MB */
> +	while (eof < 8 * 1024 * 1024) {
> +		memset(buf, IO_PATTERN, BUF_SIZE);
> +
> +		fstat(fd, &statbuf);
> +		eof = statbuf.st_size;
> +
> +		/*
> +		 * 4 ios, racing to extend EOF, combined they write full BUF_SIZE
> +		 */

I would expand on this comment with something like the following:

"Split the buffer into multiple I/Os to send a mix of block
aligned/unaligned writes as well as writes that start beyond the current
EOF. This stresses things like inode size management and stale block
zeroing for races and can lead to data corruption when not handled
properly."

... but feel free to reword that as necessary. I just wanted to point
out that 1.) unaligned I/O is required and 2.) write offsets beyond EOF
are required.

> +		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0 * BUF_SIZE/4);
> +		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1 * BUF_SIZE/4);
> +		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2 * BUF_SIZE/4);
> +		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3 * BUF_SIZE/4);
> +	
> +		err = io_submit(ctx, 4, iocbs);
> +		if (err != 4) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_submit");
> +			return 1;
> +		}
> +
> +		err = io_getevents(ctx, 4, 4, evs, NULL);
> +		if (err != 4) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_getevents");
> +			return 1;
> +		}
> +
> +		/*
> +		 * And then read it back.
> +		 *
> +		 * Using pread to keep it simple, but AIO has the same effect.
> +		 *
> +		 * eof is the old eof, we just wrote BUF_SIZE more
> +		 */
> +		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
> +			perror("pread");
> +			return 1;
> +		}
> +
> +		/*
> +		 * We launched 4 AIOs which, stiched together, should write

Nit:					     stitched

> +		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block
> +		 */
> +		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
> +			printf("corruption while extending from %ld\n", eof);
> +			dump_buffer(buf, 0, BUF_SIZE);
> +			return 1;
> +		}
> +	}
> +
> +	printf("Success, all done.\n");
> +	return 0;
> +}
> diff --git a/tests/generic/326 b/tests/generic/326
> new file mode 100755
> index 0000000..7db04ac
> --- /dev/null
> +++ b/tests/generic/326
> @@ -0,0 +1,64 @@
> +#! /bin/bash
> +# FS QA Test No. 326
> +#
> +# Test races while extending past EOF via sub-block AIO writes
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +    cd /
> +    rm -f $TEST_DIR/tst-aio-dio-eof-race
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +_supported_fs generic
> +_supported_os Linux
> +
> +_require_test
> +_require_sparse_files
> +_require_aiodio aio-dio-eof-race
> +
> +# Test does 512 byte DIO, so make sure that'll work
> +logical_block_size=`_min_dio_alignment $TEST_DEV`
> +
> +if [ "$logical_block_size" -gt "512" ]; then
> +	_notrun "device block size: $logical_block_size greater than 512"
> +fi
> +

Technically the test splits a 4k buffer into four parts and so sends 1k
dio. Not that it really matters here, but I guess I'd update the
comments. :)

> +# If 512 isn't a sub-block IO, the test should still pass, so
> +# let that go.
> +

Ok, so this at least points out the limitation to the test. I think it
would be slightly better if the test were configurable as mentioned in
the first comment above. For example, the test program could take a
block size parameter and "do the right thing" based on that.
Alternatively, we could specify buffer size and iocb count as parameters
and let the xfstests script send the right params based on the test fs.
I guess the latter would complicate things slightly because the program
probably wants to ensure full buffers are written in each io_submit()
instance for verification purposes.

Just some thoughts... we could leave it as is too. In that case I would
suggest to expand the comment above to be specific about which block
sizes (512b, 1k) do not result in unaligned I/O.

Brian

> +# This test does several extending loops internally
> +$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
> +
> +status=$?
> +exit
> diff --git a/tests/generic/326.out b/tests/generic/326.out
> new file mode 100644
> index 0000000..22a3e78
> --- /dev/null
> +++ b/tests/generic/326.out
> @@ -0,0 +1,2 @@
> +QA output created by 326
> +Success, all done.
> diff --git a/tests/generic/group b/tests/generic/group
> index 4ae256f..a5f3008 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -207,3 +207,4 @@
>  323 auto aio stress
>  324 auto fsr quick
>  325 auto quick data log
> +326 auto quick aio
> 
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test extending sub-block AIO writes for races
  2015-09-30 11:47 ` Brian Foster
@ 2015-09-30 12:53   ` Eric Sandeen
  2015-09-30 15:24     ` Brian Foster
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Sandeen @ 2015-09-30 12:53 UTC (permalink / raw)
  To: Brian Foster; +Cc: fstests, xfs



On 9/30/15 6:47 AM, Brian Foster wrote:
> On Tue, Sep 29, 2015 at 08:03:31PM -0500, Eric Sandeen wrote:
>> This tests bfoster's
>> [PATCH 1/2] xfs: always drain dio before extending aio write submission
>> patch; it launches four adjacent 1k IOs past EOF, then reads back
>> to see if we have 4k worth of the data we wrote, or something else - 
>> possibly zeros from sub-block zeroing and eof racing.
>>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
> 
> Thanks for the test! A few high level comments...
> 
>>
>> diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
>> new file mode 100644
>> index 0000000..c1ce695
>> --- /dev/null
>> +++ b/src/aio-dio-regress/aio-dio-eof-race.c
>> @@ -0,0 +1,170 @@
>> +/*
>> + * Launch 4 file-extending extending sub-block AIOs and ensure that we
>> + * don't see data corruption when they're all complete.
>> + *
> 
> It might be good to point out that this stresses EOF zeroing, which also
> means a key point is not just file-extending I/O, but file-extending I/O
> beyond current EOF (e.g., I/O that creates holes between the current EOF
> and start of the new I/O).

yeah, ok, good to be specific.

>> + * Copyright (C) 2015 Red Hat, Inc. All Rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
>> + */
>> +#include <sys/stat.h>
>> +#include <sys/types.h>
>> +#include <errno.h>
>> +#include <fcntl.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <ctype.h>
>> +
>> +#include <libaio.h>
>> +
>> +#define BUF_SIZE	4096
> 
> Have you considered making the buffer size variable? As is currently
> written, I don't think this will reproduce the problem on 512b or 1024b
> fsb filesystems because at least some of the file-extending I/Os must
> not be block aligned for it to trigger.

yeah, I meant to make it smaller, then got distracted, tbh ;

But I think launching 4x512 IOs should always do the trick, right?

And it's impossible to repro on a 512b fs block I think, because
we can't have any sub-block/unaligned IOs?

So I think that perhaps switching it to 2048 & doing 4x512 IOs
would suffice, right?

>> +#define IO_PATTERN	0xab
>> +
>> +void
>> +dump_buffer(
>> +	void	*buf,
>> +	off64_t	offset,
>> +	ssize_t	len)
>> +{
>> +	int	i, j;
>> +	char	*p;
>> +	int	new;
>> +	
>> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
>> +		char    *s = p;
>> +
>> +		if (i && !memcmp(p, p - 16, 16)) {
>> +			new = 0;
>> +		} else {
>> +			if (i)
>> +				printf("*\n");
>> +			new = 1;
>> +		}
>> +
>> +		if (!new) {
>> +			p += 16;
>> +			continue;
>> +		}
>> +
>> +		printf("%08llx  ", (unsigned long long)offset + i);
>> +		for (j = 0; j < 16 && i + j < len; j++, p++)
>> +			printf("%02x ", *p);
>> +		printf(" ");
>> +		for (j = 0; j < 16 && i + j < len; j++, s++) {
>> +			if (isalnum((int)*s))
>> +				printf("%c", *s);
>> +			else
>> +				printf(".");
>> +		}
>> +		printf("\n");
>> +
>> +	}
>> +	printf("%08llx\n", (unsigned long long)offset + i);
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +	struct io_context *ctx = NULL;
>> +	struct io_event evs[4];
>> +	struct iocb iocb1, iocb2, iocb3, iocb4;
>> +	struct iocb *iocbs[] = { &iocb1, &iocb2, &iocb3, &iocb4 };
>> +	void *buf;
>> +	struct stat statbuf;
>> +	char cmp_buf[BUF_SIZE];
>> +	int fd, err = 0;
>> +	off_t eof;
>> +
>> +	fd = open(argv[1], O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
>> +	if (fd == -1) {
>> +		perror("open");
>> +		return 1;
>> +	}
>> +
>> +	err = posix_memalign(&buf, BUF_SIZE, BUF_SIZE);
> 
> The alignment should technically be PAGE_SIZE (even though BUF_SIZE ==
> 4096), right?

hm, tbh this just came along from the other AIO tests I copied from ;)
Sure, I can do it page-sized, that'd be better.

>> +	if (err) {
>> +		fprintf(stderr, "error %s during %s\n",
>> +			strerror(err),
>> +			"posix_memalign");
>> +		return 1;
>> +	}
>> +	memset(cmp_buf, IO_PATTERN, BUF_SIZE);
>> +
>> +	err = io_setup(4, &ctx);
>> +	if (err) {
>> +		fprintf(stderr, "error %s during %s\n",
>> +			strerror(err),
>> +			"io_setup");
>> +		return 1;
>> +	}
>> +
>> +	eof = 0;
>> +
>> +	/* Keep extending until 8MB */
>> +	while (eof < 8 * 1024 * 1024) {
>> +		memset(buf, IO_PATTERN, BUF_SIZE);
>> +
>> +		fstat(fd, &statbuf);
>> +		eof = statbuf.st_size;
>> +
>> +		/*
>> +		 * 4 ios, racing to extend EOF, combined they write full BUF_SIZE
>> +		 */
> 
> I would expand on this comment with something like the following:
> 
> "Split the buffer into multiple I/Os to send a mix of block
> aligned/unaligned writes as well as writes that start beyond the current
> EOF. This stresses things like inode size management and stale block
> zeroing for races and can lead to data corruption when not handled
> properly."
> 
> ... but feel free to reword that as necessary. I just wanted to point
> out that 1.) unaligned I/O is required and 2.) write offsets beyond EOF
> are required.

ok

 
>> +		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0 * BUF_SIZE/4);
>> +		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1 * BUF_SIZE/4);
>> +		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2 * BUF_SIZE/4);
>> +		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3 * BUF_SIZE/4);
>> +	
>> +		err = io_submit(ctx, 4, iocbs);
>> +		if (err != 4) {
>> +			fprintf(stderr, "error %s during %s\n",
>> +				strerror(err),
>> +				"io_submit");
>> +			return 1;
>> +		}
>> +
>> +		err = io_getevents(ctx, 4, 4, evs, NULL);
>> +		if (err != 4) {
>> +			fprintf(stderr, "error %s during %s\n",
>> +				strerror(err),
>> +				"io_getevents");
>> +			return 1;
>> +		}
>> +
>> +		/*
>> +		 * And then read it back.
>> +		 *
>> +		 * Using pread to keep it simple, but AIO has the same effect.
>> +		 *
>> +		 * eof is the old eof, we just wrote BUF_SIZE more
>> +		 */
>> +		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
>> +			perror("pread");
>> +			return 1;
>> +		}
>> +
>> +		/*
>> +		 * We launched 4 AIOs which, stiched together, should write
> 
> Nit:					     stitched

doh ;)

>> +		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block
>> +		 */
>> +		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
>> +			printf("corruption while extending from %ld\n", eof);
>> +			dump_buffer(buf, 0, BUF_SIZE);
>> +			return 1;
>> +		}
>> +	}
>> +
>> +	printf("Success, all done.\n");
>> +	return 0;
>> +}
>> diff --git a/tests/generic/326 b/tests/generic/326
>> new file mode 100755
>> index 0000000..7db04ac
>> --- /dev/null
>> +++ b/tests/generic/326
>> @@ -0,0 +1,64 @@
>> +#! /bin/bash
>> +# FS QA Test No. 326
>> +#
>> +# Test races while extending past EOF via sub-block AIO writes
>> +#
>> +#-----------------------------------------------------------------------
>> +# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
>> +#
>> +# This program is free software; you can redistribute it and/or
>> +# modify it under the terms of the GNU General Public License as
>> +# published by the Free Software Foundation.
>> +#
>> +# This program is distributed in the hope that it would be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program; if not, write the Free Software Foundation,
>> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
>> +#-----------------------------------------------------------------------
>> +#
>> +
>> +seq=`basename $0`
>> +seqres=$RESULT_DIR/$seq
>> +echo "QA output created by $seq"
>> +
>> +here=`pwd`
>> +tmp=/tmp/$$
>> +status=1	# failure is the default!
>> +trap "_cleanup; exit \$status" 0 1 2 3 15
>> +
>> +_cleanup()
>> +{
>> +    cd /
>> +    rm -f $TEST_DIR/tst-aio-dio-eof-race
>> +}
>> +
>> +# get standard environment, filters and checks
>> +. ./common/rc
>> +. ./common/filter
>> +
>> +_supported_fs generic
>> +_supported_os Linux
>> +
>> +_require_test
>> +_require_sparse_files
>> +_require_aiodio aio-dio-eof-race
>> +
>> +# Test does 512 byte DIO, so make sure that'll work
>> +logical_block_size=`_min_dio_alignment $TEST_DEV`
>> +
>> +if [ "$logical_block_size" -gt "512" ]; then
>> +	_notrun "device block size: $logical_block_size greater than 512"
>> +fi
>> +
> 
> Technically the test splits a 4k buffer into four parts and so sends 1k
> dio. Not that it really matters here, but I guess I'd update the
> comments. :)
> 
>> +# If 512 isn't a sub-block IO, the test should still pass, so
>> +# let that go.
>> +
> 
> Ok, so this at least points out the limitation to the test. I think it
> would be slightly better if the test were configurable as mentioned in
> the first comment above. For example, the test program could take a
> block size parameter and "do the right thing" based on that.
> Alternatively, we could specify buffer size and iocb count as parameters
> and let the xfstests script send the right params based on the test fs.
> I guess the latter would complicate things slightly because the program
> probably wants to ensure full buffers are written in each io_submit()
> instance for verification purposes.
> 
> Just some thoughts... we could leave it as is too. In that case I would
> suggest to expand the comment above to be specific about which block
> sizes (512b, 1k) do not result in unaligned I/O.

ok, I'll make it a bit more universal one way or the other.

Thanks for the review,
-Eric

> Brian
> 
>> +# This test does several extending loops internally
>> +$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
>> +
>> +status=$?
>> +exit
>> diff --git a/tests/generic/326.out b/tests/generic/326.out
>> new file mode 100644
>> index 0000000..22a3e78
>> --- /dev/null
>> +++ b/tests/generic/326.out
>> @@ -0,0 +1,2 @@
>> +QA output created by 326
>> +Success, all done.
>> diff --git a/tests/generic/group b/tests/generic/group
>> index 4ae256f..a5f3008 100644
>> --- a/tests/generic/group
>> +++ b/tests/generic/group
>> @@ -207,3 +207,4 @@
>>  323 auto aio stress
>>  324 auto fsr quick
>>  325 auto quick data log
>> +326 auto quick aio
>>
>>
>> _______________________________________________
>> xfs mailing list
>> xfs@oss.sgi.com
>> http://oss.sgi.com/mailman/listinfo/xfs
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test extending sub-block AIO writes for races
  2015-09-30 12:53   ` Eric Sandeen
@ 2015-09-30 15:24     ` Brian Foster
  0 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2015-09-30 15:24 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: fstests, xfs

On Wed, Sep 30, 2015 at 07:53:39AM -0500, Eric Sandeen wrote:
> 
> 
> On 9/30/15 6:47 AM, Brian Foster wrote:
> > On Tue, Sep 29, 2015 at 08:03:31PM -0500, Eric Sandeen wrote:
> >> This tests bfoster's
> >> [PATCH 1/2] xfs: always drain dio before extending aio write submission
> >> patch; it launches four adjacent 1k IOs past EOF, then reads back
> >> to see if we have 4k worth of the data we wrote, or something else - 
> >> possibly zeros from sub-block zeroing and eof racing.
> >>
> >> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> >> ---
> >>
> > 
> > Thanks for the test! A few high level comments...
> > 
> >>
> >> diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
> >> new file mode 100644
> >> index 0000000..c1ce695
> >> --- /dev/null
> >> +++ b/src/aio-dio-regress/aio-dio-eof-race.c
> >> @@ -0,0 +1,170 @@
> >> +/*
> >> + * Launch 4 file-extending extending sub-block AIOs and ensure that we
> >> + * don't see data corruption when they're all complete.
> >> + *
> > 
> > It might be good to point out that this stresses EOF zeroing, which also
> > means a key point is not just file-extending I/O, but file-extending I/O
> > beyond current EOF (e.g., I/O that creates holes between the current EOF
> > and start of the new I/O).
> 
> yeah, ok, good to be specific.
> 
> >> + * Copyright (C) 2015 Red Hat, Inc. All Rights reserved.
> >> + *
> >> + * This program is free software; you can redistribute it and/or modify
> >> + * it under the terms of the GNU General Public License as published by
> >> + * the Free Software Foundation; either version 2 of the License, or
> >> + * (at your option) any later version.
> >> + *
> >> + * This program is distributed in the hope that it will be useful,
> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >> + * GNU General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU General Public License
> >> + * along with this program; if not, write to the Free Software
> >> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
> >> + */
> >> +#include <sys/stat.h>
> >> +#include <sys/types.h>
> >> +#include <errno.h>
> >> +#include <fcntl.h>
> >> +#include <stdio.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <ctype.h>
> >> +
> >> +#include <libaio.h>
> >> +
> >> +#define BUF_SIZE	4096
> > 
> > Have you considered making the buffer size variable? As is currently
> > written, I don't think this will reproduce the problem on 512b or 1024b
> > fsb filesystems because at least some of the file-extending I/Os must
> > not be block aligned for it to trigger.
> 
> yeah, I meant to make it smaller, then got distracted, tbh ;
> 
> But I think launching 4x512 IOs should always do the trick, right?
> 

I would think so, though I don't know how many unaligned I/Os were
necessary per io_submit() to make the test effective (e.g., 3 of 4 on
4k/2k fsb vs. 2 of 4 on 1k fsb). Might be worth a try to be sure.

> And it's impossible to repro on a 512b fs block I think, because
> we can't have any sub-block/unaligned IOs?
> 
> So I think that perhaps switching it to 2048 & doing 4x512 IOs
> would suffice, right?
> 

Yeah, good point. I don't think this reproducer could trigger the
problem with 512b FSBs. I think the corruption is still possible on such
fs', but it would probably require extending I/O to a file with dirty
pages and post-eof speculative preallocation. In short, I think that's
probably a separate reproducer.

Given that and if the above is effective, perhaps it's good enough to
adjust the default buffer size and whatnot and forget the tunables and
things.

Brian

> >> +#define IO_PATTERN	0xab
> >> +
> >> +void
> >> +dump_buffer(
> >> +	void	*buf,
> >> +	off64_t	offset,
> >> +	ssize_t	len)
> >> +{
> >> +	int	i, j;
> >> +	char	*p;
> >> +	int	new;
> >> +	
> >> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
> >> +		char    *s = p;
> >> +
> >> +		if (i && !memcmp(p, p - 16, 16)) {
> >> +			new = 0;
> >> +		} else {
> >> +			if (i)
> >> +				printf("*\n");
> >> +			new = 1;
> >> +		}
> >> +
> >> +		if (!new) {
> >> +			p += 16;
> >> +			continue;
> >> +		}
> >> +
> >> +		printf("%08llx  ", (unsigned long long)offset + i);
> >> +		for (j = 0; j < 16 && i + j < len; j++, p++)
> >> +			printf("%02x ", *p);
> >> +		printf(" ");
> >> +		for (j = 0; j < 16 && i + j < len; j++, s++) {
> >> +			if (isalnum((int)*s))
> >> +				printf("%c", *s);
> >> +			else
> >> +				printf(".");
> >> +		}
> >> +		printf("\n");
> >> +
> >> +	}
> >> +	printf("%08llx\n", (unsigned long long)offset + i);
> >> +}
> >> +
> >> +int main(int argc, char *argv[])
> >> +{
> >> +	struct io_context *ctx = NULL;
> >> +	struct io_event evs[4];
> >> +	struct iocb iocb1, iocb2, iocb3, iocb4;
> >> +	struct iocb *iocbs[] = { &iocb1, &iocb2, &iocb3, &iocb4 };
> >> +	void *buf;
> >> +	struct stat statbuf;
> >> +	char cmp_buf[BUF_SIZE];
> >> +	int fd, err = 0;
> >> +	off_t eof;
> >> +
> >> +	fd = open(argv[1], O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
> >> +	if (fd == -1) {
> >> +		perror("open");
> >> +		return 1;
> >> +	}
> >> +
> >> +	err = posix_memalign(&buf, BUF_SIZE, BUF_SIZE);
> > 
> > The alignment should technically be PAGE_SIZE (even though BUF_SIZE ==
> > 4096), right?
> 
> hm, tbh this just came along from the other AIO tests I copied from ;)
> Sure, I can do it page-sized, that'd be better.
> 
> >> +	if (err) {
> >> +		fprintf(stderr, "error %s during %s\n",
> >> +			strerror(err),
> >> +			"posix_memalign");
> >> +		return 1;
> >> +	}
> >> +	memset(cmp_buf, IO_PATTERN, BUF_SIZE);
> >> +
> >> +	err = io_setup(4, &ctx);
> >> +	if (err) {
> >> +		fprintf(stderr, "error %s during %s\n",
> >> +			strerror(err),
> >> +			"io_setup");
> >> +		return 1;
> >> +	}
> >> +
> >> +	eof = 0;
> >> +
> >> +	/* Keep extending until 8MB */
> >> +	while (eof < 8 * 1024 * 1024) {
> >> +		memset(buf, IO_PATTERN, BUF_SIZE);
> >> +
> >> +		fstat(fd, &statbuf);
> >> +		eof = statbuf.st_size;
> >> +
> >> +		/*
> >> +		 * 4 ios, racing to extend EOF, combined they write full BUF_SIZE
> >> +		 */
> > 
> > I would expand on this comment with something like the following:
> > 
> > "Split the buffer into multiple I/Os to send a mix of block
> > aligned/unaligned writes as well as writes that start beyond the current
> > EOF. This stresses things like inode size management and stale block
> > zeroing for races and can lead to data corruption when not handled
> > properly."
> > 
> > ... but feel free to reword that as necessary. I just wanted to point
> > out that 1.) unaligned I/O is required and 2.) write offsets beyond EOF
> > are required.
> 
> ok
> 
>  
> >> +		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0 * BUF_SIZE/4);
> >> +		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1 * BUF_SIZE/4);
> >> +		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2 * BUF_SIZE/4);
> >> +		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3 * BUF_SIZE/4);
> >> +	
> >> +		err = io_submit(ctx, 4, iocbs);
> >> +		if (err != 4) {
> >> +			fprintf(stderr, "error %s during %s\n",
> >> +				strerror(err),
> >> +				"io_submit");
> >> +			return 1;
> >> +		}
> >> +
> >> +		err = io_getevents(ctx, 4, 4, evs, NULL);
> >> +		if (err != 4) {
> >> +			fprintf(stderr, "error %s during %s\n",
> >> +				strerror(err),
> >> +				"io_getevents");
> >> +			return 1;
> >> +		}
> >> +
> >> +		/*
> >> +		 * And then read it back.
> >> +		 *
> >> +		 * Using pread to keep it simple, but AIO has the same effect.
> >> +		 *
> >> +		 * eof is the old eof, we just wrote BUF_SIZE more
> >> +		 */
> >> +		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
> >> +			perror("pread");
> >> +			return 1;
> >> +		}
> >> +
> >> +		/*
> >> +		 * We launched 4 AIOs which, stiched together, should write
> > 
> > Nit:					     stitched
> 
> doh ;)
> 
> >> +		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block
> >> +		 */
> >> +		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
> >> +			printf("corruption while extending from %ld\n", eof);
> >> +			dump_buffer(buf, 0, BUF_SIZE);
> >> +			return 1;
> >> +		}
> >> +	}
> >> +
> >> +	printf("Success, all done.\n");
> >> +	return 0;
> >> +}
> >> diff --git a/tests/generic/326 b/tests/generic/326
> >> new file mode 100755
> >> index 0000000..7db04ac
> >> --- /dev/null
> >> +++ b/tests/generic/326
> >> @@ -0,0 +1,64 @@
> >> +#! /bin/bash
> >> +# FS QA Test No. 326
> >> +#
> >> +# Test races while extending past EOF via sub-block AIO writes
> >> +#
> >> +#-----------------------------------------------------------------------
> >> +# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
> >> +#
> >> +# This program is free software; you can redistribute it and/or
> >> +# modify it under the terms of the GNU General Public License as
> >> +# published by the Free Software Foundation.
> >> +#
> >> +# This program is distributed in the hope that it would be useful,
> >> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >> +# GNU General Public License for more details.
> >> +#
> >> +# You should have received a copy of the GNU General Public License
> >> +# along with this program; if not, write the Free Software Foundation,
> >> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> >> +#-----------------------------------------------------------------------
> >> +#
> >> +
> >> +seq=`basename $0`
> >> +seqres=$RESULT_DIR/$seq
> >> +echo "QA output created by $seq"
> >> +
> >> +here=`pwd`
> >> +tmp=/tmp/$$
> >> +status=1	# failure is the default!
> >> +trap "_cleanup; exit \$status" 0 1 2 3 15
> >> +
> >> +_cleanup()
> >> +{
> >> +    cd /
> >> +    rm -f $TEST_DIR/tst-aio-dio-eof-race
> >> +}
> >> +
> >> +# get standard environment, filters and checks
> >> +. ./common/rc
> >> +. ./common/filter
> >> +
> >> +_supported_fs generic
> >> +_supported_os Linux
> >> +
> >> +_require_test
> >> +_require_sparse_files
> >> +_require_aiodio aio-dio-eof-race
> >> +
> >> +# Test does 512 byte DIO, so make sure that'll work
> >> +logical_block_size=`_min_dio_alignment $TEST_DEV`
> >> +
> >> +if [ "$logical_block_size" -gt "512" ]; then
> >> +	_notrun "device block size: $logical_block_size greater than 512"
> >> +fi
> >> +
> > 
> > Technically the test splits a 4k buffer into four parts and so sends 1k
> > dio. Not that it really matters here, but I guess I'd update the
> > comments. :)
> > 
> >> +# If 512 isn't a sub-block IO, the test should still pass, so
> >> +# let that go.
> >> +
> > 
> > Ok, so this at least points out the limitation to the test. I think it
> > would be slightly better if the test were configurable as mentioned in
> > the first comment above. For example, the test program could take a
> > block size parameter and "do the right thing" based on that.
> > Alternatively, we could specify buffer size and iocb count as parameters
> > and let the xfstests script send the right params based on the test fs.
> > I guess the latter would complicate things slightly because the program
> > probably wants to ensure full buffers are written in each io_submit()
> > instance for verification purposes.
> > 
> > Just some thoughts... we could leave it as is too. In that case I would
> > suggest to expand the comment above to be specific about which block
> > sizes (512b, 1k) do not result in unaligned I/O.
> 
> ok, I'll make it a bit more universal one way or the other.
> 
> Thanks for the review,
> -Eric
> 
> > Brian
> > 
> >> +# This test does several extending loops internally
> >> +$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
> >> +
> >> +status=$?
> >> +exit
> >> diff --git a/tests/generic/326.out b/tests/generic/326.out
> >> new file mode 100644
> >> index 0000000..22a3e78
> >> --- /dev/null
> >> +++ b/tests/generic/326.out
> >> @@ -0,0 +1,2 @@
> >> +QA output created by 326
> >> +Success, all done.
> >> diff --git a/tests/generic/group b/tests/generic/group
> >> index 4ae256f..a5f3008 100644
> >> --- a/tests/generic/group
> >> +++ b/tests/generic/group
> >> @@ -207,3 +207,4 @@
> >>  323 auto aio stress
> >>  324 auto fsr quick
> >>  325 auto quick data log
> >> +326 auto quick aio
> >>
> >>
> >> _______________________________________________
> >> xfs mailing list
> >> xfs@oss.sgi.com
> >> http://oss.sgi.com/mailman/listinfo/xfs
> > 
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH V2] test extending sub-block AIO writes for races
  2015-09-30  1:03 [PATCH] test extending sub-block AIO writes for races Eric Sandeen
  2015-09-30 11:47 ` Brian Foster
@ 2015-09-30 15:59 ` Eric Sandeen
  2015-09-30 16:53   ` Brian Foster
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Sandeen @ 2015-09-30 15:59 UTC (permalink / raw)
  To: fstests; +Cc: xfs

This tests bfoster's
[PATCH 1/2] xfs: always drain dio before extending aio write submission
patch; it launches four adjacent 1k IOs past EOF, then reads back
to see if we have 4k worth of the data we wrote, or something else - 
possibly zeros from sub-block zeroing and eof racing.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

V2:
	do 4x512 IOs to get as much sub-block goodness as possible
	fix up comments & typos

diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
new file mode 100644
index 0000000..ce5d715
--- /dev/null
+++ b/src/aio-dio-regress/aio-dio-eof-race.c
@@ -0,0 +1,173 @@
+/*
+ * Launch 4 sub-block AIOs past EOF and ensure that we don't see
+ * corruption from racing sub-block zeroing when they're complete.
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include <libaio.h>
+
+/* Sized to allow 4 x 512 AIOs */
+#define BUF_SIZE	2048
+#define IO_PATTERN	0xab
+
+void
+dump_buffer(
+	void	*buf,
+	off64_t	offset,
+	ssize_t	len)
+{
+	int	i, j;
+	char	*p;
+	int	new;
+	
+	for (i = 0, p = (char *)buf; i < len; i += 16) {
+		char    *s = p;
+
+		if (i && !memcmp(p, p - 16, 16)) {
+			new = 0;
+		} else {
+			if (i)
+				printf("*\n");
+			new = 1;
+		}
+
+		if (!new) {
+			p += 16;
+			continue;
+		}
+
+		printf("%08llx  ", (unsigned long long)offset + i);
+		for (j = 0; j < 16 && i + j < len; j++, p++)
+			printf("%02x ", *p);
+		printf(" ");
+		for (j = 0; j < 16 && i + j < len; j++, s++) {
+			if (isalnum((int)*s))
+				printf("%c", *s);
+			else
+				printf(".");
+		}
+		printf("\n");
+
+	}
+	printf("%08llx\n", (unsigned long long)offset + i);
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_context *ctx = NULL;
+	struct io_event evs[4];
+	struct iocb iocb1, iocb2, iocb3, iocb4;
+	struct iocb *iocbs[] = { &iocb1, &iocb2, &iocb3, &iocb4 };
+	void *buf;
+	struct stat statbuf;
+	char cmp_buf[BUF_SIZE];
+	int fd, err = 0;
+	off_t eof;
+
+	fd = open(argv[1], O_DIRECT | O_CREAT | O_TRUNC | O_RDWR, 0600);
+	if (fd == -1) {
+		perror("open");
+		return 1;
+	}
+
+	err = posix_memalign(&buf, getpagesize(), BUF_SIZE);
+	if (err) {
+		fprintf(stderr, "error %s during %s\n",
+			strerror(err),
+			"posix_memalign");
+		return 1;
+	}
+	memset(cmp_buf, IO_PATTERN, BUF_SIZE);
+
+	err = io_setup(4, &ctx);
+	if (err) {
+		fprintf(stderr, "error %s during %s\n",
+			strerror(err),
+			"io_setup");
+		return 1;
+	}
+
+	eof = 0;
+
+	/* Keep extending until 8MB (fairly arbitrary) */
+	while (eof < 8 * 1024 * 1024) {
+		memset(buf, IO_PATTERN, BUF_SIZE);
+		fstat(fd, &statbuf);
+		eof = statbuf.st_size;
+
+		/*
+		 * Split the buffer into multiple I/Os to send a mix of block
+		 * aligned/unaligned writes as well as writes that start beyond
+		 * the current EOF. This stresses things like inode size
+		 * management and stale block zeroing for races and can lead to
+		 * data corruption when not handled properly.
+		 */
+		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0*BUF_SIZE/4);
+		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1*BUF_SIZE/4);
+		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2*BUF_SIZE/4);
+		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3*BUF_SIZE/4);
+	
+		err = io_submit(ctx, 4, iocbs);
+		if (err != 4) {
+			fprintf(stderr, "error %s during %s\n",
+				strerror(err),
+				"io_submit");
+			return 1;
+		}
+
+		err = io_getevents(ctx, 4, 4, evs, NULL);
+		if (err != 4) {
+			fprintf(stderr, "error %s during %s\n",
+				strerror(err),
+				"io_getevents");
+			return 1;
+		}
+
+		/*
+		 * And then read it back.
+		 *
+		 * Using pread to keep it simple, but AIO has the same effect.
+		 * eof is the prior eof; we just wrote BUF_SIZE more.
+		 */
+		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
+			perror("pread");
+			return 1;
+		}
+
+		/*
+		 * We launched 4 AIOs which, stitched together, should write
+		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block.
+		 */
+		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
+			printf("corruption while extending from %ld\n", eof);
+			dump_buffer(buf, 0, BUF_SIZE);
+			return 1;
+		}
+	}
+
+	printf("Success, all done.\n");
+	return 0;
+}
diff --git a/tests/generic/326 b/tests/generic/326
new file mode 100755
index 0000000..f20375a
--- /dev/null
+++ b/tests/generic/326
@@ -0,0 +1,65 @@
+#! /bin/bash
+# FS QA Test No. 326
+#
+# Test races while extending past EOF via sub-block AIO writes
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+    cd /
+    rm -f $TEST_DIR/tst-aio-dio-eof-race
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+_supported_fs generic
+_supported_os Linux
+
+_require_test
+_require_sparse_files
+_require_aiodio aio-dio-eof-race
+
+# Test does 512 byte DIO, so make sure that'll work
+logical_block_size=`_min_dio_alignment $TEST_DEV`
+
+if [ "$logical_block_size" -gt "512" ]; then
+	_notrun "device block size: $logical_block_size greater than 512"
+fi
+
+# We don't mind 512-byte fs blocks; the IOs won't be sub-block,
+# but the test should still pass, even if it doesn't stress the code
+# we're targeting.
+
+# Note, this test does several extending loops internally
+$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
+
+status=$?
+exit
diff --git a/tests/generic/326.out b/tests/generic/326.out
new file mode 100644
index 0000000..22a3e78
--- /dev/null
+++ b/tests/generic/326.out
@@ -0,0 +1,2 @@
+QA output created by 326
+Success, all done.
diff --git a/tests/generic/group b/tests/generic/group
index 4ae256f..a5f3008 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -207,3 +207,4 @@
 323 auto aio stress
 324 auto fsr quick
 325 auto quick data log
+326 auto quick aio

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH V2] test extending sub-block AIO writes for races
  2015-09-30 15:59 ` [PATCH V2] " Eric Sandeen
@ 2015-09-30 16:53   ` Brian Foster
  0 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2015-09-30 16:53 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: fstests, xfs

On Wed, Sep 30, 2015 at 10:59:31AM -0500, Eric Sandeen wrote:
> This tests bfoster's
> [PATCH 1/2] xfs: always drain dio before extending aio write submission
> patch; it launches four adjacent 1k IOs past EOF, then reads back
> to see if we have 4k worth of the data we wrote, or something else - 
> possibly zeros from sub-block zeroing and eof racing.
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> V2:
> 	do 4x512 IOs to get as much sub-block goodness as possible
> 	fix up comments & typos
> 
> diff --git a/src/aio-dio-regress/aio-dio-eof-race.c b/src/aio-dio-regress/aio-dio-eof-race.c
> new file mode 100644
> index 0000000..ce5d715
> --- /dev/null
> +++ b/src/aio-dio-regress/aio-dio-eof-race.c
> @@ -0,0 +1,173 @@
...
> +void
> +dump_buffer(
> +	void	*buf,
> +	off64_t	offset,
> +	ssize_t	len)
> +{
> +	int	i, j;
> +	char	*p;
> +	int	new;
> +	

Whitespace damage on the line above...

> +	for (i = 0, p = (char *)buf; i < len; i += 16) {
> +		char    *s = p;
> +
...
> +
> +	}
> +	printf("%08llx\n", (unsigned long long)offset + i);
> +}
> +
> +int main(int argc, char *argv[])
> +{
...
> +		/*
> +		 * Split the buffer into multiple I/Os to send a mix of block
> +		 * aligned/unaligned writes as well as writes that start beyond
> +		 * the current EOF. This stresses things like inode size
> +		 * management and stale block zeroing for races and can lead to
> +		 * data corruption when not handled properly.
> +		 */
> +		io_prep_pwrite(&iocb1, fd, buf, BUF_SIZE/4, eof + 0*BUF_SIZE/4);
> +		io_prep_pwrite(&iocb2, fd, buf, BUF_SIZE/4, eof + 1*BUF_SIZE/4);
> +		io_prep_pwrite(&iocb3, fd, buf, BUF_SIZE/4, eof + 2*BUF_SIZE/4);
> +		io_prep_pwrite(&iocb4, fd, buf, BUF_SIZE/4, eof + 3*BUF_SIZE/4);
> +	

... and above here as well. Otherwise looks good to me. Thanks again!

Reviewed-by: Brian Foster <bfoster@redhat.com>

> +		err = io_submit(ctx, 4, iocbs);
> +		if (err != 4) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_submit");
> +			return 1;
> +		}
> +
> +		err = io_getevents(ctx, 4, 4, evs, NULL);
> +		if (err != 4) {
> +			fprintf(stderr, "error %s during %s\n",
> +				strerror(err),
> +				"io_getevents");
> +			return 1;
> +		}
> +
> +		/*
> +		 * And then read it back.
> +		 *
> +		 * Using pread to keep it simple, but AIO has the same effect.
> +		 * eof is the prior eof; we just wrote BUF_SIZE more.
> +		 */
> +		if (pread(fd, buf, BUF_SIZE, eof) != BUF_SIZE) {
> +			perror("pread");
> +			return 1;
> +		}
> +
> +		/*
> +		 * We launched 4 AIOs which, stitched together, should write
> +		 * a seamless BUF_SIZE worth of IO_PATTERN to the last block.
> +		 */
> +		if (memcmp(buf, cmp_buf, BUF_SIZE)) {
> +			printf("corruption while extending from %ld\n", eof);
> +			dump_buffer(buf, 0, BUF_SIZE);
> +			return 1;
> +		}
> +	}
> +
> +	printf("Success, all done.\n");
> +	return 0;
> +}
> diff --git a/tests/generic/326 b/tests/generic/326
> new file mode 100755
> index 0000000..f20375a
> --- /dev/null
> +++ b/tests/generic/326
> @@ -0,0 +1,65 @@
> +#! /bin/bash
> +# FS QA Test No. 326
> +#
> +# Test races while extending past EOF via sub-block AIO writes
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2015 Red Hat, Inc.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +    cd /
> +    rm -f $TEST_DIR/tst-aio-dio-eof-race
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +_supported_fs generic
> +_supported_os Linux
> +
> +_require_test
> +_require_sparse_files
> +_require_aiodio aio-dio-eof-race
> +
> +# Test does 512 byte DIO, so make sure that'll work
> +logical_block_size=`_min_dio_alignment $TEST_DEV`
> +
> +if [ "$logical_block_size" -gt "512" ]; then
> +	_notrun "device block size: $logical_block_size greater than 512"
> +fi
> +
> +# We don't mind 512-byte fs blocks; the IOs won't be sub-block,
> +# but the test should still pass, even if it doesn't stress the code
> +# we're targeting.
> +
> +# Note, this test does several extending loops internally
> +$AIO_TEST $TEST_DIR/tst-aio-dio-eof-race
> +
> +status=$?
> +exit
> diff --git a/tests/generic/326.out b/tests/generic/326.out
> new file mode 100644
> index 0000000..22a3e78
> --- /dev/null
> +++ b/tests/generic/326.out
> @@ -0,0 +1,2 @@
> +QA output created by 326
> +Success, all done.
> diff --git a/tests/generic/group b/tests/generic/group
> index 4ae256f..a5f3008 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -207,3 +207,4 @@
>  323 auto aio stress
>  324 auto fsr quick
>  325 auto quick data log
> +326 auto quick aio
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-09-30 16:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-30  1:03 [PATCH] test extending sub-block AIO writes for races Eric Sandeen
2015-09-30 11:47 ` Brian Foster
2015-09-30 12:53   ` Eric Sandeen
2015-09-30 15:24     ` Brian Foster
2015-09-30 15:59 ` [PATCH V2] " Eric Sandeen
2015-09-30 16:53   ` Brian Foster

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox