public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET 0/1] fstests: updates for Linux 6.7
@ 2023-11-08 21:29 Darrick J. Wong
  2023-11-08 21:29 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-08 21:29 UTC (permalink / raw)
  To: djwong, zlang
  Cc: Christoph Hellwig, Catherine Hoang, guan, fstests, linux-xfs

Hi all,

This is pending fixes for things that are going to get merged in 6.7.

If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.

This has been running on the djcloud for months with no problems.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=xfs-merge-6.7

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=xfs-merge-6.7
---
 configure.ac              |    1 
 include/builddefs.in      |    1 
 m4/package_libcdev.m4     |   12 ++
 src/Makefile              |    4 +
 src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1953        |   74 ++++++++++
 tests/generic/1953.out    |    6 +
 7 files changed, 437 insertions(+)
 create mode 100644 src/t_reflink_read_race.c
 create mode 100755 tests/generic/1953
 create mode 100644 tests/generic/1953.out


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-08 21:29 [PATCHSET 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
@ 2023-11-08 21:29 ` Darrick J. Wong
  2023-11-10 14:40   ` Zorro Lang
  2023-11-11 22:22   ` [PATCH v2 " Darrick J. Wong
  0 siblings, 2 replies; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-08 21:29 UTC (permalink / raw)
  To: djwong, zlang
  Cc: Christoph Hellwig, Catherine Hoang, guan, fstests, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

XFS has a rather slow reflink operation.  While a reflink operation is
running, other programs cannot read the contents of the source file,
which is causing latency spikes.  Catherine Hoang wrote a patch to
permit reads, since the source file contents do not change.  This is a
functionality test for that patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
---
 configure.ac              |    1 
 include/builddefs.in      |    1 
 m4/package_libcdev.m4     |   12 ++
 src/Makefile              |    4 +
 src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1953        |   74 ++++++++++
 tests/generic/1953.out    |    6 +
 7 files changed, 437 insertions(+)
 create mode 100644 src/t_reflink_read_race.c
 create mode 100755 tests/generic/1953
 create mode 100644 tests/generic/1953.out


diff --git a/configure.ac b/configure.ac
index 4687d8a3c0..7333045330 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
 AC_HAVE_NFTW
 AC_HAVE_RLIMIT_NOFILE
 AC_HAVE_FIEXCHANGE
+AC_HAVE_FICLONE
 
 AC_CHECK_FUNCS([renameat2])
 AC_CHECK_FUNCS([reallocarray])
diff --git a/include/builddefs.in b/include/builddefs.in
index 969acf0da2..446350d5fc 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
 HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
 HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
 HAVE_FIEXCHANGE = @have_fiexchange@
+HAVE_FICLONE = @have_ficlone@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 7f73104405..91eb64db21 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -173,4 +173,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
     ]])],[have_fiexchange=yes
        AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
     AC_SUBST(have_fiexchange)
+
+# Check if we have FICLONE
+AC_DEFUN([AC_HAVE_FICLONE],
+  [ AC_MSG_CHECKING([for FICLONE])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+    ]], [[
+	 ioctl(-1, FICLONE, -1);
+    ]])],[have_ficlone=yes
+       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+    AC_SUBST(have_ficlone)
   ])
diff --git a/src/Makefile b/src/Makefile
index 2815f919b2..49dd2f6c1e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
  endif
 endif
 
+ifeq ($(HAVE_FICLONE),yes)
+     TARGETS += t_reflink_read_race
+endif
+
 CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS) fssum
 
diff --git a/src/t_reflink_read_race.c b/src/t_reflink_read_race.c
new file mode 100644
index 0000000000..00c19d7c05
--- /dev/null
+++ b/src/t_reflink_read_race.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Race reads with reflink to see if reads continue while we're cloning.
+ * Copyright 2023 Oracle.  All rights reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t mypid;
+
+static FILE *outcome_fp;
+
+/* Significant timestamps.  Initialized to zero */
+static double program_start, clone_start, clone_end;
+
+/* Coordinates access to timestamps */
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct readinfo {
+	int fd;
+	unsigned int blocksize;
+	char *descr;
+};
+
+struct cloneinfo {
+	int src_fd, dst_fd;
+};
+
+#define MSEC_PER_SEC	1000
+#define NSEC_PER_MSEC	1000000
+
+/*
+ * Assume that it shouldn't take longer than 100ms for the FICLONE ioctl to
+ * enter the kernel and take locks on an uncontended file.  This is also the
+ * required CLOCK_MONOTONIC granularity.
+ */
+#define EARLIEST_READ_MS	(MSEC_PER_SEC / 10)
+
+/*
+ * We want to be reasonably sure that the FICLONE takes long enough that any
+ * read initiated after clone operation locked the source file could have
+ * completed a disk IO before the clone finishes.  Therefore, we require that
+ * the clone operation must take at least 4x the maximum setup time.
+ */
+#define MINIMUM_CLONE_MS	(EARLIEST_READ_MS * 5)
+
+static double timespec_to_msec(const struct timespec *ts)
+{
+	return (ts->tv_sec * (double)MSEC_PER_SEC) +
+	       (ts->tv_nsec / (double)NSEC_PER_MSEC);
+}
+
+static void sleep_ms(unsigned int len)
+{
+	struct timespec time = {
+		.tv_nsec = len * NSEC_PER_MSEC,
+	};
+
+	nanosleep(&time, NULL);
+}
+
+static void outcome(const char *str)
+{
+	fprintf(outcome_fp, "%s\n", str);
+	fflush(outcome_fp);
+}
+
+static void *reader_fn(void *arg)
+{
+	struct timespec now;
+	struct readinfo *ri = arg;
+	char *buf = malloc(ri->blocksize);
+	loff_t pos = 0;
+	ssize_t copied;
+	int ret;
+
+	if (!buf) {
+		perror("alloc buffer");
+		goto kill_error;
+	}
+
+	/* Wait for the FICLONE to start */
+	pthread_mutex_lock(&lock);
+	while (clone_start < program_start) {
+		pthread_mutex_unlock(&lock);
+#ifdef DEBUG
+		printf("%s read waiting for clone to start; cs=%.2f ps=%.2f\n",
+				ri->descr, clone_start, program_start);
+		fflush(stdout);
+#endif
+		sleep_ms(1);
+		pthread_mutex_lock(&lock);
+	}
+	pthread_mutex_unlock(&lock);
+	sleep_ms(EARLIEST_READ_MS);
+
+	/* Read from the file... */
+	while ((copied = read(ri->fd, buf, ri->blocksize)) > 0) {
+		double read_completion;
+
+		ret = clock_gettime(CLOCK_MONOTONIC, &now);
+		if (ret) {
+			perror("clock_gettime");
+			goto kill_error;
+		}
+		read_completion = timespec_to_msec(&now);
+
+		/*
+		 * If clone_end is still zero, the FICLONE is still running.
+		 * If the read completion occurred a safe duration after the
+		 * start of the ioctl, then report that as an early read
+		 * completion.
+		 */
+		pthread_mutex_lock(&lock);
+		if (clone_end < program_start &&
+		    read_completion > clone_start + EARLIEST_READ_MS) {
+			pthread_mutex_unlock(&lock);
+
+			/* clone still running... */
+			printf("finished %s read early at %.2fms\n",
+					ri->descr,
+					read_completion - program_start);
+			fflush(stdout);
+			outcome("finished read early");
+			goto kill_done;
+		}
+		pthread_mutex_unlock(&lock);
+
+		sleep_ms(1);
+		pos += copied;
+	}
+	if (copied < 0) {
+		perror("read");
+		goto kill_error;
+	}
+
+	return NULL;
+kill_error:
+	outcome("reader error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+static void *clone_fn(void *arg)
+{
+	struct timespec now;
+	struct cloneinfo *ci = arg;
+	int ret;
+
+	/* Record the approximate start time of this thread */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone start");
+		goto kill_error;
+	}
+	pthread_mutex_lock(&lock);
+	clone_start = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("started clone at %.2fms\n", clone_start - program_start);
+	fflush(stdout);
+
+	/* Kernel call, only killable with a fatal signal */
+	ret = ioctl(ci->dst_fd, FICLONE, ci->src_fd);
+	if (ret) {
+		perror("FICLONE");
+		goto kill_error;
+	}
+
+	/* If the ioctl completes, note the completion time */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone end");
+		goto kill_error;
+	}
+
+	pthread_mutex_lock(&lock);
+	clone_end = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("finished clone at %.2fms\n",
+			clone_end - program_start);
+	fflush(stdout);
+
+	/* Complain if we didn't take long enough to clone. */
+	if (clone_end < clone_start + MINIMUM_CLONE_MS) {
+		printf("clone did not take enough time (%.2fms)\n",
+				clone_end - clone_start);
+		fflush(stdout);
+		outcome("clone too fast");
+		goto kill_done;
+	}
+
+	outcome("clone finished before any reads");
+	goto kill_done;
+
+kill_error:
+	outcome("clone error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct cloneinfo ci;
+	struct readinfo bufio = { .descr = "buffered" };
+	struct readinfo directio = { .descr = "directio" };
+	struct timespec now;
+	pthread_t clone_thread, bufio_thread, directio_thread;
+	double clockres;
+	int ret;
+
+	if (argc != 4) {
+		printf("Usage: %s src_file dst_file outcome_file\n", argv[0]);
+		return 1;
+	}
+
+	mypid = getpid();
+
+	/* Check for sufficient clock precision. */
+	ret = clock_getres(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_getres MONOTONIC");
+		return 2;
+	}
+	printf("CLOCK_MONOTONIC resolution: %llus %luns\n",
+			(unsigned long long)now.tv_sec,
+			(unsigned long)now.tv_nsec);
+	fflush(stdout);
+
+	clockres = timespec_to_msec(&now);
+	if (clockres > EARLIEST_READ_MS) {
+		fprintf(stderr, "insufficient CLOCK_MONOTONIC resolution\n");
+		return 2;
+	}
+
+	/*
+	 * Measure program start time since the MONOTONIC time base is not
+	 * all that well defined.
+	 */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime MONOTONIC");
+		return 2;
+	}
+	if (now.tv_sec == 0 && now.tv_nsec == 0) {
+		fprintf(stderr, "Uhoh, start time is zero?!\n");
+		return 2;
+	}
+	program_start = timespec_to_msec(&now);
+
+	outcome_fp = fopen(argv[3], "w");
+	if (!outcome_fp) {
+		perror(argv[3]);
+		return 2;
+	}
+
+	/* Open separate fds for each thread */
+	ci.src_fd = open(argv[1], O_RDONLY);
+	if (ci.src_fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+
+	ci.dst_fd = open(argv[2], O_RDWR | O_CREAT, 0600);
+	if (ci.dst_fd < 0) {
+		perror(argv[2]);
+		return 2;
+	}
+
+	bufio.fd = open(argv[1], O_RDONLY);
+	if (bufio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	bufio.blocksize = 37;
+
+	directio.fd = open(argv[1], O_RDONLY | O_DIRECT);
+	if (directio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	directio.blocksize = 512;
+
+	/* Start threads */
+	ret = pthread_create(&clone_thread, NULL, clone_fn, &ci);
+	if (ret) {
+		fprintf(stderr, "create clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&bufio_thread, NULL, reader_fn, &bufio);
+	if (ret) {
+		fprintf(stderr, "create bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&directio_thread, NULL, reader_fn, &directio);
+	if (ret) {
+		fprintf(stderr, "create directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	/* Wait for threads */
+	ret = pthread_join(clone_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(bufio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(directio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	printf("Program should have killed itself?\n");
+	outcome("program failed to end early");
+	return 0;
+}
diff --git a/tests/generic/1953 b/tests/generic/1953
new file mode 100755
index 0000000000..058538e6fe
--- /dev/null
+++ b/tests/generic/1953
@@ -0,0 +1,74 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
+#
+# FS QA Test No. 1953
+#
+# Race file reads with a very slow reflink operation to see if the reads
+# actually complete while the reflink is ongoing.  This is a functionality
+# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
+# concurrently".
+#
+. ./common/preamble
+_begin_fstest auto clone punch
+
+# Import common functions.
+. ./common/filter
+. ./common/attr
+. ./common/reflink
+
+# real QA test starts here
+_require_scratch_reflink
+_require_cp_reflink
+_require_xfs_io_command "fpunch"
+_require_test_program "punch-alternating"
+_require_test_program "t_reflink_read_race"
+
+rm -f "$seqres.full"
+
+echo "Format and mount"
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount >> "$seqres.full" 2>&1
+
+testdir="$SCRATCH_MNT/test-$seq"
+mkdir "$testdir"
+
+calc_space() {
+	blocks_needed=$(( 2 ** (fnr + 1) ))
+	space_needed=$((blocks_needed * blksz * 5 / 4))
+}
+
+# Figure out the number of blocks that we need to get the reflink runtime above
+# 1 seconds
+echo "Create a many-block file"
+for ((fnr = 1; fnr < 40; fnr++)); do
+	free_blocks=$(stat -f -c '%a' "$testdir")
+	blksz=$(_get_file_block_size "$testdir")
+	space_avail=$((free_blocks * blksz))
+	calc_space
+	test $space_needed -gt $space_avail && \
+		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
+
+	off=$(( (2 ** fnr) * blksz))
+	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
+	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
+
+	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
+done
+echo "fnr=$fnr" >> $seqres.full
+
+echo "Reflink the big file"
+$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
+	"$testdir/outcome" &>> $seqres.full
+
+if [ ! -e "$testdir/outcome" ]; then
+	echo "Could not set up program"
+elif grep -q "finished read early" "$testdir/outcome"; then
+	echo "test completed successfully"
+else
+	cat "$testdir/outcome"
+fi
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/1953.out b/tests/generic/1953.out
new file mode 100644
index 0000000000..8eaacb7ff0
--- /dev/null
+++ b/tests/generic/1953.out
@@ -0,0 +1,6 @@
+QA output created by 1953
+Format and mount
+Create a many-block file
+Reflink the big file
+Terminated
+test completed successfully


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-08 21:29 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
@ 2023-11-10 14:40   ` Zorro Lang
  2023-11-11 22:20     ` Darrick J. Wong
  2023-11-11 22:22   ` [PATCH v2 " Darrick J. Wong
  1 sibling, 1 reply; 10+ messages in thread
From: Zorro Lang @ 2023-11-10 14:40 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, Catherine Hoang, fstests, linux-xfs

On Wed, Nov 08, 2023 at 01:29:28PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> XFS has a rather slow reflink operation.  While a reflink operation is
> running, other programs cannot read the contents of the source file,
> which is causing latency spikes.  Catherine Hoang wrote a patch to
> permit reads, since the source file contents do not change.  This is a
> functionality test for that patch.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
> ---
>  configure.ac              |    1 
>  include/builddefs.in      |    1 
>  m4/package_libcdev.m4     |   12 ++
>  src/Makefile              |    4 +
>  src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/1953        |   74 ++++++++++
>  tests/generic/1953.out    |    6 +
>  7 files changed, 437 insertions(+)
>  create mode 100644 src/t_reflink_read_race.c
>  create mode 100755 tests/generic/1953
>  create mode 100644 tests/generic/1953.out
> 
> 
> diff --git a/configure.ac b/configure.ac
> index 4687d8a3c0..7333045330 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
>  AC_HAVE_NFTW
>  AC_HAVE_RLIMIT_NOFILE
>  AC_HAVE_FIEXCHANGE
> +AC_HAVE_FICLONE
>  
>  AC_CHECK_FUNCS([renameat2])
>  AC_CHECK_FUNCS([reallocarray])
> diff --git a/include/builddefs.in b/include/builddefs.in
> index 969acf0da2..446350d5fc 100644
> --- a/include/builddefs.in
> +++ b/include/builddefs.in
> @@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
>  HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
>  HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
>  HAVE_FIEXCHANGE = @have_fiexchange@
> +HAVE_FICLONE = @have_ficlone@
>  
>  GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
>  
> diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
> index 7f73104405..91eb64db21 100644
> --- a/m4/package_libcdev.m4
> +++ b/m4/package_libcdev.m4
> @@ -173,4 +173,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
>      ]])],[have_fiexchange=yes
>         AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
>      AC_SUBST(have_fiexchange)
> +
> +# Check if we have FICLONE
> +AC_DEFUN([AC_HAVE_FICLONE],
> +  [ AC_MSG_CHECKING([for FICLONE])
> +    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +    ]], [[
> +	 ioctl(-1, FICLONE, -1);
> +    ]])],[have_ficlone=yes
> +       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
> +    AC_SUBST(have_ficlone)

I got below build error:
  libtoolize: Consider adding '-I m4' to ACLOCAL_AMFLAGS in Makefile.am.
  cp include/install-sh .
  aclocal -I m4
  /usr/bin/m4:m4/package_libcdev.m4:162: ERROR: end of file in string
  autom4te: error: /usr/bin/m4 failed with exit status: 1
  aclocal: error: autom4te failed with exit status: 1
  make: *** [Makefile:66: configure] Error 1

I think you missed a "])" at here, below "])" belong to last AC_DEFUN.

>    ])
> diff --git a/src/Makefile b/src/Makefile
> index 2815f919b2..49dd2f6c1e 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
>   endif
>  endif
>  
> +ifeq ($(HAVE_FICLONE),yes)
> +     TARGETS += t_reflink_read_race
> +endif
> +
>  CFILES = $(TARGETS:=.c)
>  LDIRT = $(TARGETS) fssum
>  

[snip]

> diff --git a/tests/generic/1953 b/tests/generic/1953
> new file mode 100755
> index 0000000000..058538e6fe
> --- /dev/null
> +++ b/tests/generic/1953
> @@ -0,0 +1,74 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
> +#
> +# FS QA Test No. 1953
> +#
> +# Race file reads with a very slow reflink operation to see if the reads
> +# actually complete while the reflink is ongoing.  This is a functionality
> +# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
> +# concurrently".
> +#
> +. ./common/preamble
> +_begin_fstest auto clone punch
> +
> +# Import common functions.
> +. ./common/filter
> +. ./common/attr
> +. ./common/reflink
> +
> +# real QA test starts here
> +_require_scratch_reflink
> +_require_cp_reflink
> +_require_xfs_io_command "fpunch"
> +_require_test_program "punch-alternating"
> +_require_test_program "t_reflink_read_race"
> +
> +rm -f "$seqres.full"
> +
> +echo "Format and mount"
> +_scratch_mkfs > "$seqres.full" 2>&1
> +_scratch_mount >> "$seqres.full" 2>&1
> +
> +testdir="$SCRATCH_MNT/test-$seq"
> +mkdir "$testdir"
> +
> +calc_space() {
> +	blocks_needed=$(( 2 ** (fnr + 1) ))
> +	space_needed=$((blocks_needed * blksz * 5 / 4))
> +}
> +
> +# Figure out the number of blocks that we need to get the reflink runtime above
> +# 1 seconds
> +echo "Create a many-block file"
> +for ((fnr = 1; fnr < 40; fnr++)); do
> +	free_blocks=$(stat -f -c '%a' "$testdir")
> +	blksz=$(_get_file_block_size "$testdir")
> +	space_avail=$((free_blocks * blksz))
> +	calc_space
> +	test $space_needed -gt $space_avail && \
> +		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
> +
> +	off=$(( (2 ** fnr) * blksz))
> +	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
> +	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
> +
> +	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break

To make sure we have this program:
  _require_command "$TIMEOUT_PROG" timeout
then use $TIMEOUT_PROG at here.

Others looks good to me, if it expects to get below failure on linux 6.6.0-rc6+.

# ./check generic/1953
FSTYP         -- xfs (debug)
PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.6.0-rc6-mainline+ #7 SMP PREEMPT_DYNAMIC Thu Oct 19 22:34:28 CST 2023
MKFS_OPTIONS  -- -f /dev/loop0
MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/loop0 /mnt/scratch

generic/1953       - output mismatch (see /root/git/xfstests/results//generic/1953.out.bad)
    --- tests/generic/1953.out  2023-11-10 22:22:12.379185281 +0800
    +++ /root/git/xfstests/results//generic/1953.out.bad        2023-11-10 22:28:27.041801352 +0800
    @@ -3,4 +3,4 @@
     Create a many-block file
     Reflink the big file
     Terminated
    -test completed successfully
    +clone finished before any reads
    ...
    (Run 'diff -u /root/git/xfstests/tests/generic/1953.out /root/git/xfstests/results//generic/1953.out.bad'  to see the entire diff)
Ran: generic/1953
Failures: generic/1953
Failed 1 of 1 tests

Thanks,
Zorro

> +done
> +echo "fnr=$fnr" >> $seqres.full
> +
> +echo "Reflink the big file"
> +$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
> +	"$testdir/outcome" &>> $seqres.full
> +
> +if [ ! -e "$testdir/outcome" ]; then
> +	echo "Could not set up program"
> +elif grep -q "finished read early" "$testdir/outcome"; then
> +	echo "test completed successfully"
> +else
> +	cat "$testdir/outcome"
> +fi
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/1953.out b/tests/generic/1953.out
> new file mode 100644
> index 0000000000..8eaacb7ff0
> --- /dev/null
> +++ b/tests/generic/1953.out
> @@ -0,0 +1,6 @@
> +QA output created by 1953
> +Format and mount
> +Create a many-block file
> +Reflink the big file
> +Terminated
> +test completed successfully
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-10 14:40   ` Zorro Lang
@ 2023-11-11 22:20     ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-11 22:20 UTC (permalink / raw)
  To: Zorro Lang; +Cc: Christoph Hellwig, Catherine Hoang, fstests, linux-xfs

On Fri, Nov 10, 2023 at 10:40:19PM +0800, Zorro Lang wrote:
> On Wed, Nov 08, 2023 at 01:29:28PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > XFS has a rather slow reflink operation.  While a reflink operation is
> > running, other programs cannot read the contents of the source file,
> > which is causing latency spikes.  Catherine Hoang wrote a patch to
> > permit reads, since the source file contents do not change.  This is a
> > functionality test for that patch.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
> > ---
> >  configure.ac              |    1 
> >  include/builddefs.in      |    1 
> >  m4/package_libcdev.m4     |   12 ++
> >  src/Makefile              |    4 +
> >  src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
> >  tests/generic/1953        |   74 ++++++++++
> >  tests/generic/1953.out    |    6 +
> >  7 files changed, 437 insertions(+)
> >  create mode 100644 src/t_reflink_read_race.c
> >  create mode 100755 tests/generic/1953
> >  create mode 100644 tests/generic/1953.out
> > 
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 4687d8a3c0..7333045330 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
> >  AC_HAVE_NFTW
> >  AC_HAVE_RLIMIT_NOFILE
> >  AC_HAVE_FIEXCHANGE
> > +AC_HAVE_FICLONE
> >  
> >  AC_CHECK_FUNCS([renameat2])
> >  AC_CHECK_FUNCS([reallocarray])
> > diff --git a/include/builddefs.in b/include/builddefs.in
> > index 969acf0da2..446350d5fc 100644
> > --- a/include/builddefs.in
> > +++ b/include/builddefs.in
> > @@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
> >  HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
> >  HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
> >  HAVE_FIEXCHANGE = @have_fiexchange@
> > +HAVE_FICLONE = @have_ficlone@
> >  
> >  GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
> >  
> > diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
> > index 7f73104405..91eb64db21 100644
> > --- a/m4/package_libcdev.m4
> > +++ b/m4/package_libcdev.m4
> > @@ -173,4 +173,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
> >      ]])],[have_fiexchange=yes
> >         AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
> >      AC_SUBST(have_fiexchange)
> > +
> > +# Check if we have FICLONE
> > +AC_DEFUN([AC_HAVE_FICLONE],
> > +  [ AC_MSG_CHECKING([for FICLONE])
> > +    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
> > +#include <sys/ioctl.h>
> > +#include <linux/fs.h>
> > +    ]], [[
> > +	 ioctl(-1, FICLONE, -1);
> > +    ]])],[have_ficlone=yes
> > +       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
> > +    AC_SUBST(have_ficlone)
> 
> I got below build error:
>   libtoolize: Consider adding '-I m4' to ACLOCAL_AMFLAGS in Makefile.am.
>   cp include/install-sh .
>   aclocal -I m4
>   /usr/bin/m4:m4/package_libcdev.m4:162: ERROR: end of file in string
>   autom4te: error: /usr/bin/m4 failed with exit status: 1
>   aclocal: error: autom4te failed with exit status: 1
>   make: *** [Makefile:66: configure] Error 1
> 
> I think you missed a "])" at here, below "])" belong to last AC_DEFUN.

Yep, that was a porting bug when I reshuffled the patches.

> >    ])
> > diff --git a/src/Makefile b/src/Makefile
> > index 2815f919b2..49dd2f6c1e 100644
> > --- a/src/Makefile
> > +++ b/src/Makefile
> > @@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
> >   endif
> >  endif
> >  
> > +ifeq ($(HAVE_FICLONE),yes)
> > +     TARGETS += t_reflink_read_race
> > +endif
> > +
> >  CFILES = $(TARGETS:=.c)
> >  LDIRT = $(TARGETS) fssum
> >  
> 
> [snip]
> 
> > diff --git a/tests/generic/1953 b/tests/generic/1953
> > new file mode 100755
> > index 0000000000..058538e6fe
> > --- /dev/null
> > +++ b/tests/generic/1953
> > @@ -0,0 +1,74 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 1953
> > +#
> > +# Race file reads with a very slow reflink operation to see if the reads
> > +# actually complete while the reflink is ongoing.  This is a functionality
> > +# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
> > +# concurrently".
> > +#
> > +. ./common/preamble
> > +_begin_fstest auto clone punch
> > +
> > +# Import common functions.
> > +. ./common/filter
> > +. ./common/attr
> > +. ./common/reflink
> > +
> > +# real QA test starts here
> > +_require_scratch_reflink
> > +_require_cp_reflink
> > +_require_xfs_io_command "fpunch"
> > +_require_test_program "punch-alternating"
> > +_require_test_program "t_reflink_read_race"
> > +
> > +rm -f "$seqres.full"
> > +
> > +echo "Format and mount"
> > +_scratch_mkfs > "$seqres.full" 2>&1
> > +_scratch_mount >> "$seqres.full" 2>&1
> > +
> > +testdir="$SCRATCH_MNT/test-$seq"
> > +mkdir "$testdir"
> > +
> > +calc_space() {
> > +	blocks_needed=$(( 2 ** (fnr + 1) ))
> > +	space_needed=$((blocks_needed * blksz * 5 / 4))
> > +}
> > +
> > +# Figure out the number of blocks that we need to get the reflink runtime above
> > +# 1 seconds
> > +echo "Create a many-block file"
> > +for ((fnr = 1; fnr < 40; fnr++)); do
> > +	free_blocks=$(stat -f -c '%a' "$testdir")
> > +	blksz=$(_get_file_block_size "$testdir")
> > +	space_avail=$((free_blocks * blksz))
> > +	calc_space
> > +	test $space_needed -gt $space_avail && \
> > +		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
> > +
> > +	off=$(( (2 ** fnr) * blksz))
> > +	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
> > +	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
> > +
> > +	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
> 
> To make sure we have this program:
>   _require_command "$TIMEOUT_PROG" timeout
> then use $TIMEOUT_PROG at here.

<shrug> timeout is in coreutils, is that not a requirement?

Will add anyway.

> Others looks good to me, if it expects to get below failure on linux 6.6.0-rc6+.
> 
> # ./check generic/1953
> FSTYP         -- xfs (debug)
> PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.6.0-rc6-mainline+ #7 SMP PREEMPT_DYNAMIC Thu Oct 19 22:34:28 CST 2023
> MKFS_OPTIONS  -- -f /dev/loop0
> MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/loop0 /mnt/scratch
> 
> generic/1953       - output mismatch (see /root/git/xfstests/results//generic/1953.out.bad)
>     --- tests/generic/1953.out  2023-11-10 22:22:12.379185281 +0800
>     +++ /root/git/xfstests/results//generic/1953.out.bad        2023-11-10 22:28:27.041801352 +0800
>     @@ -3,4 +3,4 @@
>      Create a many-block file
>      Reflink the big file
>      Terminated
>     -test completed successfully
>     +clone finished before any reads
>     ...
>     (Run 'diff -u /root/git/xfstests/tests/generic/1953.out /root/git/xfstests/results//generic/1953.out.bad'  to see the entire diff)
> Ran: generic/1953
> Failures: generic/1953
> Failed 1 of 1 tests

This output is correct for 6.6; the functionality examined by this test
was merged for 6.7-rc1.

--D

> 
> Thanks,
> Zorro
> 
> > +done
> > +echo "fnr=$fnr" >> $seqres.full
> > +
> > +echo "Reflink the big file"
> > +$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
> > +	"$testdir/outcome" &>> $seqres.full
> > +
> > +if [ ! -e "$testdir/outcome" ]; then
> > +	echo "Could not set up program"
> > +elif grep -q "finished read early" "$testdir/outcome"; then
> > +	echo "test completed successfully"
> > +else
> > +	cat "$testdir/outcome"
> > +fi
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/generic/1953.out b/tests/generic/1953.out
> > new file mode 100644
> > index 0000000000..8eaacb7ff0
> > --- /dev/null
> > +++ b/tests/generic/1953.out
> > @@ -0,0 +1,6 @@
> > +QA output created by 1953
> > +Format and mount
> > +Create a many-block file
> > +Reflink the big file
> > +Terminated
> > +test completed successfully
> > 
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/1] generic: test reads racing with slow reflink operations
  2023-11-08 21:29 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
  2023-11-10 14:40   ` Zorro Lang
@ 2023-11-11 22:22   ` Darrick J. Wong
  2023-11-12  2:42     ` Zorro Lang
  1 sibling, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-11 22:22 UTC (permalink / raw)
  To: zlang; +Cc: Christoph Hellwig, Catherine Hoang, guan, fstests, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

XFS has a rather slow reflink operation.  While a reflink operation is
running, other programs cannot read the contents of the source file,
which is causing latency spikes.  Catherine Hoang wrote a patch to
permit reads, since the source file contents do not change.  This is a
functionality test for that patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
---
v2: fix a couple of things pointed out by zorro
---
 configure.ac              |    1 
 include/builddefs.in      |    1 
 m4/package_libcdev.m4     |   13 ++
 src/Makefile              |    4 +
 src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1953        |   75 ++++++++++
 tests/generic/1953.out    |    6 +
 tests/xfs/1872            |   10 +
 tests/xfs/1873            |   10 +
 9 files changed, 447 insertions(+), 12 deletions(-)
 create mode 100644 src/t_reflink_read_race.c
 create mode 100755 tests/generic/1953
 create mode 100644 tests/generic/1953.out

diff --git a/configure.ac b/configure.ac
index 4687d8a3c0..7333045330 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
 AC_HAVE_NFTW
 AC_HAVE_RLIMIT_NOFILE
 AC_HAVE_FIEXCHANGE
+AC_HAVE_FICLONE
 
 AC_CHECK_FUNCS([renameat2])
 AC_CHECK_FUNCS([reallocarray])
diff --git a/include/builddefs.in b/include/builddefs.in
index 969acf0da2..446350d5fc 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
 HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
 HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
 HAVE_FIEXCHANGE = @have_fiexchange@
+HAVE_FICLONE = @have_ficlone@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 7f73104405..0f4b8063f3 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -174,3 +174,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
        AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
     AC_SUBST(have_fiexchange)
   ])
+
+# Check if we have FICLONE
+AC_DEFUN([AC_HAVE_FICLONE],
+  [ AC_MSG_CHECKING([for FICLONE])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+    ]], [[
+	 ioctl(-1, FICLONE, -1);
+    ]])],[have_ficlone=yes
+       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+    AC_SUBST(have_ficlone)
+  ])
diff --git a/src/Makefile b/src/Makefile
index 2815f919b2..49dd2f6c1e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
  endif
 endif
 
+ifeq ($(HAVE_FICLONE),yes)
+     TARGETS += t_reflink_read_race
+endif
+
 CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS) fssum
 
diff --git a/src/t_reflink_read_race.c b/src/t_reflink_read_race.c
new file mode 100644
index 0000000000..00c19d7c05
--- /dev/null
+++ b/src/t_reflink_read_race.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Race reads with reflink to see if reads continue while we're cloning.
+ * Copyright 2023 Oracle.  All rights reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t mypid;
+
+static FILE *outcome_fp;
+
+/* Significant timestamps.  Initialized to zero */
+static double program_start, clone_start, clone_end;
+
+/* Coordinates access to timestamps */
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct readinfo {
+	int fd;
+	unsigned int blocksize;
+	char *descr;
+};
+
+struct cloneinfo {
+	int src_fd, dst_fd;
+};
+
+#define MSEC_PER_SEC	1000
+#define NSEC_PER_MSEC	1000000
+
+/*
+ * Assume that it shouldn't take longer than 100ms for the FICLONE ioctl to
+ * enter the kernel and take locks on an uncontended file.  This is also the
+ * required CLOCK_MONOTONIC granularity.
+ */
+#define EARLIEST_READ_MS	(MSEC_PER_SEC / 10)
+
+/*
+ * We want to be reasonably sure that the FICLONE takes long enough that any
+ * read initiated after clone operation locked the source file could have
+ * completed a disk IO before the clone finishes.  Therefore, we require that
+ * the clone operation must take at least 4x the maximum setup time.
+ */
+#define MINIMUM_CLONE_MS	(EARLIEST_READ_MS * 5)
+
+static double timespec_to_msec(const struct timespec *ts)
+{
+	return (ts->tv_sec * (double)MSEC_PER_SEC) +
+	       (ts->tv_nsec / (double)NSEC_PER_MSEC);
+}
+
+static void sleep_ms(unsigned int len)
+{
+	struct timespec time = {
+		.tv_nsec = len * NSEC_PER_MSEC,
+	};
+
+	nanosleep(&time, NULL);
+}
+
+static void outcome(const char *str)
+{
+	fprintf(outcome_fp, "%s\n", str);
+	fflush(outcome_fp);
+}
+
+static void *reader_fn(void *arg)
+{
+	struct timespec now;
+	struct readinfo *ri = arg;
+	char *buf = malloc(ri->blocksize);
+	loff_t pos = 0;
+	ssize_t copied;
+	int ret;
+
+	if (!buf) {
+		perror("alloc buffer");
+		goto kill_error;
+	}
+
+	/* Wait for the FICLONE to start */
+	pthread_mutex_lock(&lock);
+	while (clone_start < program_start) {
+		pthread_mutex_unlock(&lock);
+#ifdef DEBUG
+		printf("%s read waiting for clone to start; cs=%.2f ps=%.2f\n",
+				ri->descr, clone_start, program_start);
+		fflush(stdout);
+#endif
+		sleep_ms(1);
+		pthread_mutex_lock(&lock);
+	}
+	pthread_mutex_unlock(&lock);
+	sleep_ms(EARLIEST_READ_MS);
+
+	/* Read from the file... */
+	while ((copied = read(ri->fd, buf, ri->blocksize)) > 0) {
+		double read_completion;
+
+		ret = clock_gettime(CLOCK_MONOTONIC, &now);
+		if (ret) {
+			perror("clock_gettime");
+			goto kill_error;
+		}
+		read_completion = timespec_to_msec(&now);
+
+		/*
+		 * If clone_end is still zero, the FICLONE is still running.
+		 * If the read completion occurred a safe duration after the
+		 * start of the ioctl, then report that as an early read
+		 * completion.
+		 */
+		pthread_mutex_lock(&lock);
+		if (clone_end < program_start &&
+		    read_completion > clone_start + EARLIEST_READ_MS) {
+			pthread_mutex_unlock(&lock);
+
+			/* clone still running... */
+			printf("finished %s read early at %.2fms\n",
+					ri->descr,
+					read_completion - program_start);
+			fflush(stdout);
+			outcome("finished read early");
+			goto kill_done;
+		}
+		pthread_mutex_unlock(&lock);
+
+		sleep_ms(1);
+		pos += copied;
+	}
+	if (copied < 0) {
+		perror("read");
+		goto kill_error;
+	}
+
+	return NULL;
+kill_error:
+	outcome("reader error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+static void *clone_fn(void *arg)
+{
+	struct timespec now;
+	struct cloneinfo *ci = arg;
+	int ret;
+
+	/* Record the approximate start time of this thread */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone start");
+		goto kill_error;
+	}
+	pthread_mutex_lock(&lock);
+	clone_start = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("started clone at %.2fms\n", clone_start - program_start);
+	fflush(stdout);
+
+	/* Kernel call, only killable with a fatal signal */
+	ret = ioctl(ci->dst_fd, FICLONE, ci->src_fd);
+	if (ret) {
+		perror("FICLONE");
+		goto kill_error;
+	}
+
+	/* If the ioctl completes, note the completion time */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone end");
+		goto kill_error;
+	}
+
+	pthread_mutex_lock(&lock);
+	clone_end = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("finished clone at %.2fms\n",
+			clone_end - program_start);
+	fflush(stdout);
+
+	/* Complain if we didn't take long enough to clone. */
+	if (clone_end < clone_start + MINIMUM_CLONE_MS) {
+		printf("clone did not take enough time (%.2fms)\n",
+				clone_end - clone_start);
+		fflush(stdout);
+		outcome("clone too fast");
+		goto kill_done;
+	}
+
+	outcome("clone finished before any reads");
+	goto kill_done;
+
+kill_error:
+	outcome("clone error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct cloneinfo ci;
+	struct readinfo bufio = { .descr = "buffered" };
+	struct readinfo directio = { .descr = "directio" };
+	struct timespec now;
+	pthread_t clone_thread, bufio_thread, directio_thread;
+	double clockres;
+	int ret;
+
+	if (argc != 4) {
+		printf("Usage: %s src_file dst_file outcome_file\n", argv[0]);
+		return 1;
+	}
+
+	mypid = getpid();
+
+	/* Check for sufficient clock precision. */
+	ret = clock_getres(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_getres MONOTONIC");
+		return 2;
+	}
+	printf("CLOCK_MONOTONIC resolution: %llus %luns\n",
+			(unsigned long long)now.tv_sec,
+			(unsigned long)now.tv_nsec);
+	fflush(stdout);
+
+	clockres = timespec_to_msec(&now);
+	if (clockres > EARLIEST_READ_MS) {
+		fprintf(stderr, "insufficient CLOCK_MONOTONIC resolution\n");
+		return 2;
+	}
+
+	/*
+	 * Measure program start time since the MONOTONIC time base is not
+	 * all that well defined.
+	 */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime MONOTONIC");
+		return 2;
+	}
+	if (now.tv_sec == 0 && now.tv_nsec == 0) {
+		fprintf(stderr, "Uhoh, start time is zero?!\n");
+		return 2;
+	}
+	program_start = timespec_to_msec(&now);
+
+	outcome_fp = fopen(argv[3], "w");
+	if (!outcome_fp) {
+		perror(argv[3]);
+		return 2;
+	}
+
+	/* Open separate fds for each thread */
+	ci.src_fd = open(argv[1], O_RDONLY);
+	if (ci.src_fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+
+	ci.dst_fd = open(argv[2], O_RDWR | O_CREAT, 0600);
+	if (ci.dst_fd < 0) {
+		perror(argv[2]);
+		return 2;
+	}
+
+	bufio.fd = open(argv[1], O_RDONLY);
+	if (bufio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	bufio.blocksize = 37;
+
+	directio.fd = open(argv[1], O_RDONLY | O_DIRECT);
+	if (directio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	directio.blocksize = 512;
+
+	/* Start threads */
+	ret = pthread_create(&clone_thread, NULL, clone_fn, &ci);
+	if (ret) {
+		fprintf(stderr, "create clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&bufio_thread, NULL, reader_fn, &bufio);
+	if (ret) {
+		fprintf(stderr, "create bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&directio_thread, NULL, reader_fn, &directio);
+	if (ret) {
+		fprintf(stderr, "create directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	/* Wait for threads */
+	ret = pthread_join(clone_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(bufio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(directio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	printf("Program should have killed itself?\n");
+	outcome("program failed to end early");
+	return 0;
+}
diff --git a/tests/generic/1953 b/tests/generic/1953
new file mode 100755
index 0000000000..70aefc736b
--- /dev/null
+++ b/tests/generic/1953
@@ -0,0 +1,75 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
+#
+# FS QA Test No. 1953
+#
+# Race file reads with a very slow reflink operation to see if the reads
+# actually complete while the reflink is ongoing.  This is a functionality
+# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
+# concurrently".
+#
+. ./common/preamble
+_begin_fstest auto clone punch
+
+# Import common functions.
+. ./common/filter
+. ./common/attr
+. ./common/reflink
+
+# real QA test starts here
+_require_scratch_reflink
+_require_cp_reflink
+_require_xfs_io_command "fpunch"
+_require_test_program "punch-alternating"
+_require_test_program "t_reflink_read_race"
+_require_command "$TIMEOUT_PROG" timeout
+
+rm -f "$seqres.full"
+
+echo "Format and mount"
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount >> "$seqres.full" 2>&1
+
+testdir="$SCRATCH_MNT/test-$seq"
+mkdir "$testdir"
+
+calc_space() {
+	blocks_needed=$(( 2 ** (fnr + 1) ))
+	space_needed=$((blocks_needed * blksz * 5 / 4))
+}
+
+# Figure out the number of blocks that we need to get the reflink runtime above
+# 1 seconds
+echo "Create a many-block file"
+for ((fnr = 1; fnr < 40; fnr++)); do
+	free_blocks=$(stat -f -c '%a' "$testdir")
+	blksz=$(_get_file_block_size "$testdir")
+	space_avail=$((free_blocks * blksz))
+	calc_space
+	test $space_needed -gt $space_avail && \
+		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
+
+	off=$(( (2 ** fnr) * blksz))
+	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
+	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
+
+	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
+done
+echo "fnr=$fnr" >> $seqres.full
+
+echo "Reflink the big file"
+$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
+	"$testdir/outcome" &>> $seqres.full
+
+if [ ! -e "$testdir/outcome" ]; then
+	echo "Could not set up program"
+elif grep -q "finished read early" "$testdir/outcome"; then
+	echo "test completed successfully"
+else
+	cat "$testdir/outcome"
+fi
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/1953.out b/tests/generic/1953.out
new file mode 100644
index 0000000000..8eaacb7ff0
--- /dev/null
+++ b/tests/generic/1953.out
@@ -0,0 +1,6 @@
+QA output created by 1953
+Format and mount
+Create a many-block file
+Reflink the big file
+Terminated
+test completed successfully
diff --git a/tests/xfs/1872 b/tests/xfs/1872
index 004e99176e..289fc99612 100755
--- a/tests/xfs/1872
+++ b/tests/xfs/1872
@@ -10,15 +10,13 @@
 . ./common/preamble
 _begin_fstest auto quick unlink
 
-# Import common functions.
-source ./common/filter
-source ./common/fuzzy
-source ./common/quota
+. ./common/filter
+. ./common/fuzzy
+. ./common/quota
 
 # real QA test starts here
 
-# Modify as appropriate.
-_supported_fs generic
+_supported_fs xfs
 _require_xfs_db_command iunlink
 _require_scratch_nocheck	# we'll run repair ourselves
 
diff --git a/tests/xfs/1873 b/tests/xfs/1873
index 46af16f5d1..5d9fc620dc 100755
--- a/tests/xfs/1873
+++ b/tests/xfs/1873
@@ -10,15 +10,13 @@
 . ./common/preamble
 _begin_fstest auto online_repair
 
-# Import common functions.
-source ./common/filter
-source ./common/fuzzy
-source ./common/quota
+. ./common/filter
+. ./common/fuzzy
+. ./common/quota
 
 # real QA test starts here
 
-# Modify as appropriate.
-_supported_fs generic
+_supported_fs xfs
 _require_xfs_db_command iunlink
 # The iunlink bucket repair code wasn't added to the AGI repair code
 # until after the directory repair code was merged

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/1] generic: test reads racing with slow reflink operations
  2023-11-11 22:22   ` [PATCH v2 " Darrick J. Wong
@ 2023-11-12  2:42     ` Zorro Lang
  2023-11-13 16:46       ` Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Zorro Lang @ 2023-11-12  2:42 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: fstests, linux-xfs

On Sat, Nov 11, 2023 at 02:22:57PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> XFS has a rather slow reflink operation.  While a reflink operation is
> running, other programs cannot read the contents of the source file,
> which is causing latency spikes.  Catherine Hoang wrote a patch to
> permit reads, since the source file contents do not change.  This is a
> functionality test for that patch.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
> ---
> v2: fix a couple of things pointed out by zorro
> ---
>  configure.ac              |    1 
>  include/builddefs.in      |    1 
>  m4/package_libcdev.m4     |   13 ++
>  src/Makefile              |    4 +
>  src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/1953        |   75 ++++++++++
>  tests/generic/1953.out    |    6 +
>  tests/xfs/1872            |   10 +
>  tests/xfs/1873            |   10 +

This xfs/1953 looks good to me, but about the changes on xfs/1872 and 1873, I think
those might be another patch :)

>  9 files changed, 447 insertions(+), 12 deletions(-)
>  create mode 100644 src/t_reflink_read_race.c
>  create mode 100755 tests/generic/1953
>  create mode 100644 tests/generic/1953.out
> 

[snip]

> diff --git a/tests/generic/1953 b/tests/generic/1953
> new file mode 100755
> index 0000000000..70aefc736b
> --- /dev/null
> +++ b/tests/generic/1953
> @@ -0,0 +1,75 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
> +#
> +# FS QA Test No. 1953
> +#
> +# Race file reads with a very slow reflink operation to see if the reads
> +# actually complete while the reflink is ongoing.  This is a functionality
> +# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
> +# concurrently".
> +#
> +. ./common/preamble
> +_begin_fstest auto clone punch
> +
> +# Import common functions.
> +. ./common/filter
> +. ./common/attr
> +. ./common/reflink
> +
> +# real QA test starts here
> +_require_scratch_reflink
> +_require_cp_reflink
> +_require_xfs_io_command "fpunch"
> +_require_test_program "punch-alternating"
> +_require_test_program "t_reflink_read_race"
> +_require_command "$TIMEOUT_PROG" timeout
> +
> +rm -f "$seqres.full"
> +
> +echo "Format and mount"
> +_scratch_mkfs > "$seqres.full" 2>&1
> +_scratch_mount >> "$seqres.full" 2>&1
> +
> +testdir="$SCRATCH_MNT/test-$seq"
> +mkdir "$testdir"
> +
> +calc_space() {
> +	blocks_needed=$(( 2 ** (fnr + 1) ))
> +	space_needed=$((blocks_needed * blksz * 5 / 4))
> +}
> +
> +# Figure out the number of blocks that we need to get the reflink runtime above
> +# 1 seconds
> +echo "Create a many-block file"
> +for ((fnr = 1; fnr < 40; fnr++)); do
> +	free_blocks=$(stat -f -c '%a' "$testdir")
> +	blksz=$(_get_file_block_size "$testdir")
> +	space_avail=$((free_blocks * blksz))
> +	calc_space
> +	test $space_needed -gt $space_avail && \
> +		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
> +
> +	off=$(( (2 ** fnr) * blksz))
> +	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
> +	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
> +
> +	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break

As there's `_require_command "$TIMEOUT_PROG" timeout`, so better to use $TIMEOUT_PROG
at here. I can help to change this, but the 1872 and 1873 need to be removed, they
block the patch merging.

Thanks,
Zorro

> +done
> +echo "fnr=$fnr" >> $seqres.full
> +
> +echo "Reflink the big file"
> +$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
> +	"$testdir/outcome" &>> $seqres.full
> +
> +if [ ! -e "$testdir/outcome" ]; then
> +	echo "Could not set up program"
> +elif grep -q "finished read early" "$testdir/outcome"; then
> +	echo "test completed successfully"
> +else
> +	cat "$testdir/outcome"
> +fi
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/1953.out b/tests/generic/1953.out
> new file mode 100644
> index 0000000000..8eaacb7ff0
> --- /dev/null
> +++ b/tests/generic/1953.out
> @@ -0,0 +1,6 @@
> +QA output created by 1953
> +Format and mount
> +Create a many-block file
> +Reflink the big file
> +Terminated
> +test completed successfully
> diff --git a/tests/xfs/1872 b/tests/xfs/1872
> index 004e99176e..289fc99612 100755
> --- a/tests/xfs/1872
> +++ b/tests/xfs/1872
> @@ -10,15 +10,13 @@
>  . ./common/preamble
>  _begin_fstest auto quick unlink
>  
> -# Import common functions.
> -source ./common/filter
> -source ./common/fuzzy
> -source ./common/quota
> +. ./common/filter
> +. ./common/fuzzy
> +. ./common/quota
>  
>  # real QA test starts here
>  
> -# Modify as appropriate.
> -_supported_fs generic
> +_supported_fs xfs
>  _require_xfs_db_command iunlink
>  _require_scratch_nocheck	# we'll run repair ourselves
>  
> diff --git a/tests/xfs/1873 b/tests/xfs/1873
> index 46af16f5d1..5d9fc620dc 100755
> --- a/tests/xfs/1873
> +++ b/tests/xfs/1873
> @@ -10,15 +10,13 @@
>  . ./common/preamble
>  _begin_fstest auto online_repair
>  
> -# Import common functions.
> -source ./common/filter
> -source ./common/fuzzy
> -source ./common/quota
> +. ./common/filter
> +. ./common/fuzzy
> +. ./common/quota
>  
>  # real QA test starts here
>  
> -# Modify as appropriate.
> -_supported_fs generic
> +_supported_fs xfs
>  _require_xfs_db_command iunlink
>  # The iunlink bucket repair code wasn't added to the AGI repair code
>  # until after the directory repair code was merged
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/1] generic: test reads racing with slow reflink operations
  2023-11-12  2:42     ` Zorro Lang
@ 2023-11-13 16:46       ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-13 16:46 UTC (permalink / raw)
  To: Zorro Lang; +Cc: fstests, linux-xfs

On Sun, Nov 12, 2023 at 10:42:09AM +0800, Zorro Lang wrote:
> On Sat, Nov 11, 2023 at 02:22:57PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > XFS has a rather slow reflink operation.  While a reflink operation is
> > running, other programs cannot read the contents of the source file,
> > which is causing latency spikes.  Catherine Hoang wrote a patch to
> > permit reads, since the source file contents do not change.  This is a
> > functionality test for that patch.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
> > ---
> > v2: fix a couple of things pointed out by zorro
> > ---
> >  configure.ac              |    1 
> >  include/builddefs.in      |    1 
> >  m4/package_libcdev.m4     |   13 ++
> >  src/Makefile              |    4 +
> >  src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
> >  tests/generic/1953        |   75 ++++++++++
> >  tests/generic/1953.out    |    6 +
> >  tests/xfs/1872            |   10 +
> >  tests/xfs/1873            |   10 +
> 
> This xfs/1953 looks good to me, but about the changes on xfs/1872 and 1873, I think
> those might be another patch :)

Aw, shucks, I must've applied those changes to the wrong patch.

> >  9 files changed, 447 insertions(+), 12 deletions(-)
> >  create mode 100644 src/t_reflink_read_race.c
> >  create mode 100755 tests/generic/1953
> >  create mode 100644 tests/generic/1953.out
> > 
> 
> [snip]
> 
> > diff --git a/tests/generic/1953 b/tests/generic/1953
> > new file mode 100755
> > index 0000000000..70aefc736b
> > --- /dev/null
> > +++ b/tests/generic/1953
> > @@ -0,0 +1,75 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 1953
> > +#
> > +# Race file reads with a very slow reflink operation to see if the reads
> > +# actually complete while the reflink is ongoing.  This is a functionality
> > +# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
> > +# concurrently".
> > +#
> > +. ./common/preamble
> > +_begin_fstest auto clone punch
> > +
> > +# Import common functions.
> > +. ./common/filter
> > +. ./common/attr
> > +. ./common/reflink
> > +
> > +# real QA test starts here
> > +_require_scratch_reflink
> > +_require_cp_reflink
> > +_require_xfs_io_command "fpunch"
> > +_require_test_program "punch-alternating"
> > +_require_test_program "t_reflink_read_race"
> > +_require_command "$TIMEOUT_PROG" timeout
> > +
> > +rm -f "$seqres.full"
> > +
> > +echo "Format and mount"
> > +_scratch_mkfs > "$seqres.full" 2>&1
> > +_scratch_mount >> "$seqres.full" 2>&1
> > +
> > +testdir="$SCRATCH_MNT/test-$seq"
> > +mkdir "$testdir"
> > +
> > +calc_space() {
> > +	blocks_needed=$(( 2 ** (fnr + 1) ))
> > +	space_needed=$((blocks_needed * blksz * 5 / 4))
> > +}
> > +
> > +# Figure out the number of blocks that we need to get the reflink runtime above
> > +# 1 seconds
> > +echo "Create a many-block file"
> > +for ((fnr = 1; fnr < 40; fnr++)); do
> > +	free_blocks=$(stat -f -c '%a' "$testdir")
> > +	blksz=$(_get_file_block_size "$testdir")
> > +	space_avail=$((free_blocks * blksz))
> > +	calc_space
> > +	test $space_needed -gt $space_avail && \
> > +		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
> > +
> > +	off=$(( (2 ** fnr) * blksz))
> > +	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
> > +	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
> > +
> > +	timeout 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
> 
> As there's `_require_command "$TIMEOUT_PROG" timeout`, so better to use $TIMEOUT_PROG
> at here. I can help to change this, but the 1872 and 1873 need to be removed, they
> block the patch merging.

Eh, now I have to respin and resubmit two complete series, so you might
as well wait for me to retest the whole stupid mess.

--D

> Thanks,
> Zorro
> 
> > +done
> > +echo "fnr=$fnr" >> $seqres.full
> > +
> > +echo "Reflink the big file"
> > +$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
> > +	"$testdir/outcome" &>> $seqres.full
> > +
> > +if [ ! -e "$testdir/outcome" ]; then
> > +	echo "Could not set up program"
> > +elif grep -q "finished read early" "$testdir/outcome"; then
> > +	echo "test completed successfully"
> > +else
> > +	cat "$testdir/outcome"
> > +fi
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/generic/1953.out b/tests/generic/1953.out
> > new file mode 100644
> > index 0000000000..8eaacb7ff0
> > --- /dev/null
> > +++ b/tests/generic/1953.out
> > @@ -0,0 +1,6 @@
> > +QA output created by 1953
> > +Format and mount
> > +Create a many-block file
> > +Reflink the big file
> > +Terminated
> > +test completed successfully
> > diff --git a/tests/xfs/1872 b/tests/xfs/1872
> > index 004e99176e..289fc99612 100755
> > --- a/tests/xfs/1872
> > +++ b/tests/xfs/1872
> > @@ -10,15 +10,13 @@
> >  . ./common/preamble
> >  _begin_fstest auto quick unlink
> >  
> > -# Import common functions.
> > -source ./common/filter
> > -source ./common/fuzzy
> > -source ./common/quota
> > +. ./common/filter
> > +. ./common/fuzzy
> > +. ./common/quota
> >  
> >  # real QA test starts here
> >  
> > -# Modify as appropriate.
> > -_supported_fs generic
> > +_supported_fs xfs
> >  _require_xfs_db_command iunlink
> >  _require_scratch_nocheck	# we'll run repair ourselves
> >  
> > diff --git a/tests/xfs/1873 b/tests/xfs/1873
> > index 46af16f5d1..5d9fc620dc 100755
> > --- a/tests/xfs/1873
> > +++ b/tests/xfs/1873
> > @@ -10,15 +10,13 @@
> >  . ./common/preamble
> >  _begin_fstest auto online_repair
> >  
> > -# Import common functions.
> > -source ./common/filter
> > -source ./common/fuzzy
> > -source ./common/quota
> > +. ./common/filter
> > +. ./common/fuzzy
> > +. ./common/quota
> >  
> >  # real QA test starts here
> >  
> > -# Modify as appropriate.
> > -_supported_fs generic
> > +_supported_fs xfs
> >  _require_xfs_db_command iunlink
> >  # The iunlink bucket repair code wasn't added to the AGI repair code
> >  # until after the directory repair code was merged
> > 
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-13 17:08 [PATCHSET v2 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
@ 2023-11-13 17:08 ` Darrick J. Wong
  2023-11-16  7:31   ` Zorro Lang
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-13 17:08 UTC (permalink / raw)
  To: zlang, djwong
  Cc: Christoph Hellwig, Catherine Hoang, linux-xfs, fstests, guan

From: Darrick J. Wong <djwong@kernel.org>

XFS has a rather slow reflink operation.  While a reflink operation is
running, other programs cannot read the contents of the source file,
which is causing latency spikes.  Catherine Hoang wrote a patch to
permit reads, since the source file contents do not change.  This is a
functionality test for that patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
---
 configure.ac              |    1 
 include/builddefs.in      |    1 
 m4/package_libcdev.m4     |   13 ++
 src/Makefile              |    4 +
 src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1953        |   75 ++++++++++
 tests/generic/1953.out    |    6 +
 7 files changed, 439 insertions(+)
 create mode 100644 src/t_reflink_read_race.c
 create mode 100755 tests/generic/1953
 create mode 100644 tests/generic/1953.out


diff --git a/configure.ac b/configure.ac
index 4687d8a3c0..7333045330 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
 AC_HAVE_NFTW
 AC_HAVE_RLIMIT_NOFILE
 AC_HAVE_FIEXCHANGE
+AC_HAVE_FICLONE
 
 AC_CHECK_FUNCS([renameat2])
 AC_CHECK_FUNCS([reallocarray])
diff --git a/include/builddefs.in b/include/builddefs.in
index 969acf0da2..446350d5fc 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
 HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
 HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
 HAVE_FIEXCHANGE = @have_fiexchange@
+HAVE_FICLONE = @have_ficlone@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 7f73104405..0f4b8063f3 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -174,3 +174,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
        AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
     AC_SUBST(have_fiexchange)
   ])
+
+# Check if we have FICLONE
+AC_DEFUN([AC_HAVE_FICLONE],
+  [ AC_MSG_CHECKING([for FICLONE])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+    ]], [[
+	 ioctl(-1, FICLONE, -1);
+    ]])],[have_ficlone=yes
+       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+    AC_SUBST(have_ficlone)
+  ])
diff --git a/src/Makefile b/src/Makefile
index 2815f919b2..49dd2f6c1e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
  endif
 endif
 
+ifeq ($(HAVE_FICLONE),yes)
+     TARGETS += t_reflink_read_race
+endif
+
 CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS) fssum
 
diff --git a/src/t_reflink_read_race.c b/src/t_reflink_read_race.c
new file mode 100644
index 0000000000..00c19d7c05
--- /dev/null
+++ b/src/t_reflink_read_race.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Race reads with reflink to see if reads continue while we're cloning.
+ * Copyright 2023 Oracle.  All rights reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t mypid;
+
+static FILE *outcome_fp;
+
+/* Significant timestamps.  Initialized to zero */
+static double program_start, clone_start, clone_end;
+
+/* Coordinates access to timestamps */
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct readinfo {
+	int fd;
+	unsigned int blocksize;
+	char *descr;
+};
+
+struct cloneinfo {
+	int src_fd, dst_fd;
+};
+
+#define MSEC_PER_SEC	1000
+#define NSEC_PER_MSEC	1000000
+
+/*
+ * Assume that it shouldn't take longer than 100ms for the FICLONE ioctl to
+ * enter the kernel and take locks on an uncontended file.  This is also the
+ * required CLOCK_MONOTONIC granularity.
+ */
+#define EARLIEST_READ_MS	(MSEC_PER_SEC / 10)
+
+/*
+ * We want to be reasonably sure that the FICLONE takes long enough that any
+ * read initiated after clone operation locked the source file could have
+ * completed a disk IO before the clone finishes.  Therefore, we require that
+ * the clone operation must take at least 4x the maximum setup time.
+ */
+#define MINIMUM_CLONE_MS	(EARLIEST_READ_MS * 5)
+
+static double timespec_to_msec(const struct timespec *ts)
+{
+	return (ts->tv_sec * (double)MSEC_PER_SEC) +
+	       (ts->tv_nsec / (double)NSEC_PER_MSEC);
+}
+
+static void sleep_ms(unsigned int len)
+{
+	struct timespec time = {
+		.tv_nsec = len * NSEC_PER_MSEC,
+	};
+
+	nanosleep(&time, NULL);
+}
+
+static void outcome(const char *str)
+{
+	fprintf(outcome_fp, "%s\n", str);
+	fflush(outcome_fp);
+}
+
+static void *reader_fn(void *arg)
+{
+	struct timespec now;
+	struct readinfo *ri = arg;
+	char *buf = malloc(ri->blocksize);
+	loff_t pos = 0;
+	ssize_t copied;
+	int ret;
+
+	if (!buf) {
+		perror("alloc buffer");
+		goto kill_error;
+	}
+
+	/* Wait for the FICLONE to start */
+	pthread_mutex_lock(&lock);
+	while (clone_start < program_start) {
+		pthread_mutex_unlock(&lock);
+#ifdef DEBUG
+		printf("%s read waiting for clone to start; cs=%.2f ps=%.2f\n",
+				ri->descr, clone_start, program_start);
+		fflush(stdout);
+#endif
+		sleep_ms(1);
+		pthread_mutex_lock(&lock);
+	}
+	pthread_mutex_unlock(&lock);
+	sleep_ms(EARLIEST_READ_MS);
+
+	/* Read from the file... */
+	while ((copied = read(ri->fd, buf, ri->blocksize)) > 0) {
+		double read_completion;
+
+		ret = clock_gettime(CLOCK_MONOTONIC, &now);
+		if (ret) {
+			perror("clock_gettime");
+			goto kill_error;
+		}
+		read_completion = timespec_to_msec(&now);
+
+		/*
+		 * If clone_end is still zero, the FICLONE is still running.
+		 * If the read completion occurred a safe duration after the
+		 * start of the ioctl, then report that as an early read
+		 * completion.
+		 */
+		pthread_mutex_lock(&lock);
+		if (clone_end < program_start &&
+		    read_completion > clone_start + EARLIEST_READ_MS) {
+			pthread_mutex_unlock(&lock);
+
+			/* clone still running... */
+			printf("finished %s read early at %.2fms\n",
+					ri->descr,
+					read_completion - program_start);
+			fflush(stdout);
+			outcome("finished read early");
+			goto kill_done;
+		}
+		pthread_mutex_unlock(&lock);
+
+		sleep_ms(1);
+		pos += copied;
+	}
+	if (copied < 0) {
+		perror("read");
+		goto kill_error;
+	}
+
+	return NULL;
+kill_error:
+	outcome("reader error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+static void *clone_fn(void *arg)
+{
+	struct timespec now;
+	struct cloneinfo *ci = arg;
+	int ret;
+
+	/* Record the approximate start time of this thread */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone start");
+		goto kill_error;
+	}
+	pthread_mutex_lock(&lock);
+	clone_start = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("started clone at %.2fms\n", clone_start - program_start);
+	fflush(stdout);
+
+	/* Kernel call, only killable with a fatal signal */
+	ret = ioctl(ci->dst_fd, FICLONE, ci->src_fd);
+	if (ret) {
+		perror("FICLONE");
+		goto kill_error;
+	}
+
+	/* If the ioctl completes, note the completion time */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone end");
+		goto kill_error;
+	}
+
+	pthread_mutex_lock(&lock);
+	clone_end = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("finished clone at %.2fms\n",
+			clone_end - program_start);
+	fflush(stdout);
+
+	/* Complain if we didn't take long enough to clone. */
+	if (clone_end < clone_start + MINIMUM_CLONE_MS) {
+		printf("clone did not take enough time (%.2fms)\n",
+				clone_end - clone_start);
+		fflush(stdout);
+		outcome("clone too fast");
+		goto kill_done;
+	}
+
+	outcome("clone finished before any reads");
+	goto kill_done;
+
+kill_error:
+	outcome("clone error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct cloneinfo ci;
+	struct readinfo bufio = { .descr = "buffered" };
+	struct readinfo directio = { .descr = "directio" };
+	struct timespec now;
+	pthread_t clone_thread, bufio_thread, directio_thread;
+	double clockres;
+	int ret;
+
+	if (argc != 4) {
+		printf("Usage: %s src_file dst_file outcome_file\n", argv[0]);
+		return 1;
+	}
+
+	mypid = getpid();
+
+	/* Check for sufficient clock precision. */
+	ret = clock_getres(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_getres MONOTONIC");
+		return 2;
+	}
+	printf("CLOCK_MONOTONIC resolution: %llus %luns\n",
+			(unsigned long long)now.tv_sec,
+			(unsigned long)now.tv_nsec);
+	fflush(stdout);
+
+	clockres = timespec_to_msec(&now);
+	if (clockres > EARLIEST_READ_MS) {
+		fprintf(stderr, "insufficient CLOCK_MONOTONIC resolution\n");
+		return 2;
+	}
+
+	/*
+	 * Measure program start time since the MONOTONIC time base is not
+	 * all that well defined.
+	 */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime MONOTONIC");
+		return 2;
+	}
+	if (now.tv_sec == 0 && now.tv_nsec == 0) {
+		fprintf(stderr, "Uhoh, start time is zero?!\n");
+		return 2;
+	}
+	program_start = timespec_to_msec(&now);
+
+	outcome_fp = fopen(argv[3], "w");
+	if (!outcome_fp) {
+		perror(argv[3]);
+		return 2;
+	}
+
+	/* Open separate fds for each thread */
+	ci.src_fd = open(argv[1], O_RDONLY);
+	if (ci.src_fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+
+	ci.dst_fd = open(argv[2], O_RDWR | O_CREAT, 0600);
+	if (ci.dst_fd < 0) {
+		perror(argv[2]);
+		return 2;
+	}
+
+	bufio.fd = open(argv[1], O_RDONLY);
+	if (bufio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	bufio.blocksize = 37;
+
+	directio.fd = open(argv[1], O_RDONLY | O_DIRECT);
+	if (directio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	directio.blocksize = 512;
+
+	/* Start threads */
+	ret = pthread_create(&clone_thread, NULL, clone_fn, &ci);
+	if (ret) {
+		fprintf(stderr, "create clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&bufio_thread, NULL, reader_fn, &bufio);
+	if (ret) {
+		fprintf(stderr, "create bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&directio_thread, NULL, reader_fn, &directio);
+	if (ret) {
+		fprintf(stderr, "create directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	/* Wait for threads */
+	ret = pthread_join(clone_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(bufio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(directio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	printf("Program should have killed itself?\n");
+	outcome("program failed to end early");
+	return 0;
+}
diff --git a/tests/generic/1953 b/tests/generic/1953
new file mode 100755
index 0000000000..c14de7060d
--- /dev/null
+++ b/tests/generic/1953
@@ -0,0 +1,75 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
+#
+# FS QA Test No. 1953
+#
+# Race file reads with a very slow reflink operation to see if the reads
+# actually complete while the reflink is ongoing.  This is a functionality
+# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
+# concurrently".
+#
+. ./common/preamble
+_begin_fstest auto clone punch
+
+# Import common functions.
+. ./common/filter
+. ./common/attr
+. ./common/reflink
+
+# real QA test starts here
+_require_scratch_reflink
+_require_cp_reflink
+_require_xfs_io_command "fpunch"
+_require_test_program "punch-alternating"
+_require_test_program "t_reflink_read_race"
+_require_command "$TIMEOUT_PROG" timeout
+
+rm -f "$seqres.full"
+
+echo "Format and mount"
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount >> "$seqres.full" 2>&1
+
+testdir="$SCRATCH_MNT/test-$seq"
+mkdir "$testdir"
+
+calc_space() {
+	blocks_needed=$(( 2 ** (fnr + 1) ))
+	space_needed=$((blocks_needed * blksz * 5 / 4))
+}
+
+# Figure out the number of blocks that we need to get the reflink runtime above
+# 1 seconds
+echo "Create a many-block file"
+for ((fnr = 1; fnr < 40; fnr++)); do
+	free_blocks=$(stat -f -c '%a' "$testdir")
+	blksz=$(_get_file_block_size "$testdir")
+	space_avail=$((free_blocks * blksz))
+	calc_space
+	test $space_needed -gt $space_avail && \
+		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
+
+	off=$(( (2 ** fnr) * blksz))
+	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
+	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
+
+	$TIMEOUT_PROG 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
+done
+echo "fnr=$fnr" >> $seqres.full
+
+echo "Reflink the big file"
+$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
+	"$testdir/outcome" &>> $seqres.full
+
+if [ ! -e "$testdir/outcome" ]; then
+	echo "Could not set up program"
+elif grep -q "finished read early" "$testdir/outcome"; then
+	echo "test completed successfully"
+else
+	cat "$testdir/outcome"
+fi
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/1953.out b/tests/generic/1953.out
new file mode 100644
index 0000000000..8eaacb7ff0
--- /dev/null
+++ b/tests/generic/1953.out
@@ -0,0 +1,6 @@
+QA output created by 1953
+Format and mount
+Create a many-block file
+Reflink the big file
+Terminated
+test completed successfully


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-13 17:08 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
@ 2023-11-16  7:31   ` Zorro Lang
  0 siblings, 0 replies; 10+ messages in thread
From: Zorro Lang @ 2023-11-16  7:31 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, fstests

On Mon, Nov 13, 2023 at 09:08:39AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> XFS has a rather slow reflink operation.  While a reflink operation is
> running, other programs cannot read the contents of the source file,
> which is causing latency spikes.  Catherine Hoang wrote a patch to
> permit reads, since the source file contents do not change.  This is a
> functionality test for that patch.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
> ---

Thanks Darrick, this version looks good to me.

Reviewed-by: Zorro Lang <zlang@redhat.com>

>  configure.ac              |    1 
>  include/builddefs.in      |    1 
>  m4/package_libcdev.m4     |   13 ++
>  src/Makefile              |    4 +
>  src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/1953        |   75 ++++++++++
>  tests/generic/1953.out    |    6 +
>  7 files changed, 439 insertions(+)
>  create mode 100644 src/t_reflink_read_race.c
>  create mode 100755 tests/generic/1953
>  create mode 100644 tests/generic/1953.out
> 
> 
> diff --git a/configure.ac b/configure.ac
> index 4687d8a3c0..7333045330 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
>  AC_HAVE_NFTW
>  AC_HAVE_RLIMIT_NOFILE
>  AC_HAVE_FIEXCHANGE
> +AC_HAVE_FICLONE
>  
>  AC_CHECK_FUNCS([renameat2])
>  AC_CHECK_FUNCS([reallocarray])
> diff --git a/include/builddefs.in b/include/builddefs.in
> index 969acf0da2..446350d5fc 100644
> --- a/include/builddefs.in
> +++ b/include/builddefs.in
> @@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
>  HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
>  HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
>  HAVE_FIEXCHANGE = @have_fiexchange@
> +HAVE_FICLONE = @have_ficlone@
>  
>  GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
>  
> diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
> index 7f73104405..0f4b8063f3 100644
> --- a/m4/package_libcdev.m4
> +++ b/m4/package_libcdev.m4
> @@ -174,3 +174,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
>         AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
>      AC_SUBST(have_fiexchange)
>    ])
> +
> +# Check if we have FICLONE
> +AC_DEFUN([AC_HAVE_FICLONE],
> +  [ AC_MSG_CHECKING([for FICLONE])
> +    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +    ]], [[
> +	 ioctl(-1, FICLONE, -1);
> +    ]])],[have_ficlone=yes
> +       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
> +    AC_SUBST(have_ficlone)
> +  ])
> diff --git a/src/Makefile b/src/Makefile
> index 2815f919b2..49dd2f6c1e 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
>   endif
>  endif
>  
> +ifeq ($(HAVE_FICLONE),yes)
> +     TARGETS += t_reflink_read_race
> +endif
> +
>  CFILES = $(TARGETS:=.c)
>  LDIRT = $(TARGETS) fssum
>  
> diff --git a/src/t_reflink_read_race.c b/src/t_reflink_read_race.c
> new file mode 100644
> index 0000000000..00c19d7c05
> --- /dev/null
> +++ b/src/t_reflink_read_race.c
> @@ -0,0 +1,339 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Race reads with reflink to see if reads continue while we're cloning.
> + * Copyright 2023 Oracle.  All rights reserved.
> + */
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +#include <time.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <signal.h>
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <errno.h>
> +#include <string.h>
> +
> +static pid_t mypid;
> +
> +static FILE *outcome_fp;
> +
> +/* Significant timestamps.  Initialized to zero */
> +static double program_start, clone_start, clone_end;
> +
> +/* Coordinates access to timestamps */
> +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +struct readinfo {
> +	int fd;
> +	unsigned int blocksize;
> +	char *descr;
> +};
> +
> +struct cloneinfo {
> +	int src_fd, dst_fd;
> +};
> +
> +#define MSEC_PER_SEC	1000
> +#define NSEC_PER_MSEC	1000000
> +
> +/*
> + * Assume that it shouldn't take longer than 100ms for the FICLONE ioctl to
> + * enter the kernel and take locks on an uncontended file.  This is also the
> + * required CLOCK_MONOTONIC granularity.
> + */
> +#define EARLIEST_READ_MS	(MSEC_PER_SEC / 10)
> +
> +/*
> + * We want to be reasonably sure that the FICLONE takes long enough that any
> + * read initiated after clone operation locked the source file could have
> + * completed a disk IO before the clone finishes.  Therefore, we require that
> + * the clone operation must take at least 4x the maximum setup time.
> + */
> +#define MINIMUM_CLONE_MS	(EARLIEST_READ_MS * 5)
> +
> +static double timespec_to_msec(const struct timespec *ts)
> +{
> +	return (ts->tv_sec * (double)MSEC_PER_SEC) +
> +	       (ts->tv_nsec / (double)NSEC_PER_MSEC);
> +}
> +
> +static void sleep_ms(unsigned int len)
> +{
> +	struct timespec time = {
> +		.tv_nsec = len * NSEC_PER_MSEC,
> +	};
> +
> +	nanosleep(&time, NULL);
> +}
> +
> +static void outcome(const char *str)
> +{
> +	fprintf(outcome_fp, "%s\n", str);
> +	fflush(outcome_fp);
> +}
> +
> +static void *reader_fn(void *arg)
> +{
> +	struct timespec now;
> +	struct readinfo *ri = arg;
> +	char *buf = malloc(ri->blocksize);
> +	loff_t pos = 0;
> +	ssize_t copied;
> +	int ret;
> +
> +	if (!buf) {
> +		perror("alloc buffer");
> +		goto kill_error;
> +	}
> +
> +	/* Wait for the FICLONE to start */
> +	pthread_mutex_lock(&lock);
> +	while (clone_start < program_start) {
> +		pthread_mutex_unlock(&lock);
> +#ifdef DEBUG
> +		printf("%s read waiting for clone to start; cs=%.2f ps=%.2f\n",
> +				ri->descr, clone_start, program_start);
> +		fflush(stdout);
> +#endif
> +		sleep_ms(1);
> +		pthread_mutex_lock(&lock);
> +	}
> +	pthread_mutex_unlock(&lock);
> +	sleep_ms(EARLIEST_READ_MS);
> +
> +	/* Read from the file... */
> +	while ((copied = read(ri->fd, buf, ri->blocksize)) > 0) {
> +		double read_completion;
> +
> +		ret = clock_gettime(CLOCK_MONOTONIC, &now);
> +		if (ret) {
> +			perror("clock_gettime");
> +			goto kill_error;
> +		}
> +		read_completion = timespec_to_msec(&now);
> +
> +		/*
> +		 * If clone_end is still zero, the FICLONE is still running.
> +		 * If the read completion occurred a safe duration after the
> +		 * start of the ioctl, then report that as an early read
> +		 * completion.
> +		 */
> +		pthread_mutex_lock(&lock);
> +		if (clone_end < program_start &&
> +		    read_completion > clone_start + EARLIEST_READ_MS) {
> +			pthread_mutex_unlock(&lock);
> +
> +			/* clone still running... */
> +			printf("finished %s read early at %.2fms\n",
> +					ri->descr,
> +					read_completion - program_start);
> +			fflush(stdout);
> +			outcome("finished read early");
> +			goto kill_done;
> +		}
> +		pthread_mutex_unlock(&lock);
> +
> +		sleep_ms(1);
> +		pos += copied;
> +	}
> +	if (copied < 0) {
> +		perror("read");
> +		goto kill_error;
> +	}
> +
> +	return NULL;
> +kill_error:
> +	outcome("reader error");
> +kill_done:
> +	kill(mypid, SIGTERM);
> +	return NULL;
> +}
> +
> +static void *clone_fn(void *arg)
> +{
> +	struct timespec now;
> +	struct cloneinfo *ci = arg;
> +	int ret;
> +
> +	/* Record the approximate start time of this thread */
> +	ret = clock_gettime(CLOCK_MONOTONIC, &now);
> +	if (ret) {
> +		perror("clock_gettime clone start");
> +		goto kill_error;
> +	}
> +	pthread_mutex_lock(&lock);
> +	clone_start = timespec_to_msec(&now);
> +	pthread_mutex_unlock(&lock);
> +
> +	printf("started clone at %.2fms\n", clone_start - program_start);
> +	fflush(stdout);
> +
> +	/* Kernel call, only killable with a fatal signal */
> +	ret = ioctl(ci->dst_fd, FICLONE, ci->src_fd);
> +	if (ret) {
> +		perror("FICLONE");
> +		goto kill_error;
> +	}
> +
> +	/* If the ioctl completes, note the completion time */
> +	ret = clock_gettime(CLOCK_MONOTONIC, &now);
> +	if (ret) {
> +		perror("clock_gettime clone end");
> +		goto kill_error;
> +	}
> +
> +	pthread_mutex_lock(&lock);
> +	clone_end = timespec_to_msec(&now);
> +	pthread_mutex_unlock(&lock);
> +
> +	printf("finished clone at %.2fms\n",
> +			clone_end - program_start);
> +	fflush(stdout);
> +
> +	/* Complain if we didn't take long enough to clone. */
> +	if (clone_end < clone_start + MINIMUM_CLONE_MS) {
> +		printf("clone did not take enough time (%.2fms)\n",
> +				clone_end - clone_start);
> +		fflush(stdout);
> +		outcome("clone too fast");
> +		goto kill_done;
> +	}
> +
> +	outcome("clone finished before any reads");
> +	goto kill_done;
> +
> +kill_error:
> +	outcome("clone error");
> +kill_done:
> +	kill(mypid, SIGTERM);
> +	return NULL;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct cloneinfo ci;
> +	struct readinfo bufio = { .descr = "buffered" };
> +	struct readinfo directio = { .descr = "directio" };
> +	struct timespec now;
> +	pthread_t clone_thread, bufio_thread, directio_thread;
> +	double clockres;
> +	int ret;
> +
> +	if (argc != 4) {
> +		printf("Usage: %s src_file dst_file outcome_file\n", argv[0]);
> +		return 1;
> +	}
> +
> +	mypid = getpid();
> +
> +	/* Check for sufficient clock precision. */
> +	ret = clock_getres(CLOCK_MONOTONIC, &now);
> +	if (ret) {
> +		perror("clock_getres MONOTONIC");
> +		return 2;
> +	}
> +	printf("CLOCK_MONOTONIC resolution: %llus %luns\n",
> +			(unsigned long long)now.tv_sec,
> +			(unsigned long)now.tv_nsec);
> +	fflush(stdout);
> +
> +	clockres = timespec_to_msec(&now);
> +	if (clockres > EARLIEST_READ_MS) {
> +		fprintf(stderr, "insufficient CLOCK_MONOTONIC resolution\n");
> +		return 2;
> +	}
> +
> +	/*
> +	 * Measure program start time since the MONOTONIC time base is not
> +	 * all that well defined.
> +	 */
> +	ret = clock_gettime(CLOCK_MONOTONIC, &now);
> +	if (ret) {
> +		perror("clock_gettime MONOTONIC");
> +		return 2;
> +	}
> +	if (now.tv_sec == 0 && now.tv_nsec == 0) {
> +		fprintf(stderr, "Uhoh, start time is zero?!\n");
> +		return 2;
> +	}
> +	program_start = timespec_to_msec(&now);
> +
> +	outcome_fp = fopen(argv[3], "w");
> +	if (!outcome_fp) {
> +		perror(argv[3]);
> +		return 2;
> +	}
> +
> +	/* Open separate fds for each thread */
> +	ci.src_fd = open(argv[1], O_RDONLY);
> +	if (ci.src_fd < 0) {
> +		perror(argv[1]);
> +		return 2;
> +	}
> +
> +	ci.dst_fd = open(argv[2], O_RDWR | O_CREAT, 0600);
> +	if (ci.dst_fd < 0) {
> +		perror(argv[2]);
> +		return 2;
> +	}
> +
> +	bufio.fd = open(argv[1], O_RDONLY);
> +	if (bufio.fd < 0) {
> +		perror(argv[1]);
> +		return 2;
> +	}
> +	bufio.blocksize = 37;
> +
> +	directio.fd = open(argv[1], O_RDONLY | O_DIRECT);
> +	if (directio.fd < 0) {
> +		perror(argv[1]);
> +		return 2;
> +	}
> +	directio.blocksize = 512;
> +
> +	/* Start threads */
> +	ret = pthread_create(&clone_thread, NULL, clone_fn, &ci);
> +	if (ret) {
> +		fprintf(stderr, "create clone thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	ret = pthread_create(&bufio_thread, NULL, reader_fn, &bufio);
> +	if (ret) {
> +		fprintf(stderr, "create bufio thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	ret = pthread_create(&directio_thread, NULL, reader_fn, &directio);
> +	if (ret) {
> +		fprintf(stderr, "create directio thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	/* Wait for threads */
> +	ret = pthread_join(clone_thread, NULL);
> +	if (ret) {
> +		fprintf(stderr, "join clone thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	ret = pthread_join(bufio_thread, NULL);
> +	if (ret) {
> +		fprintf(stderr, "join bufio thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	ret = pthread_join(directio_thread, NULL);
> +	if (ret) {
> +		fprintf(stderr, "join directio thread: %s\n", strerror(ret));
> +		return 2;
> +	}
> +
> +	printf("Program should have killed itself?\n");
> +	outcome("program failed to end early");
> +	return 0;
> +}
> diff --git a/tests/generic/1953 b/tests/generic/1953
> new file mode 100755
> index 0000000000..c14de7060d
> --- /dev/null
> +++ b/tests/generic/1953
> @@ -0,0 +1,75 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
> +#
> +# FS QA Test No. 1953
> +#
> +# Race file reads with a very slow reflink operation to see if the reads
> +# actually complete while the reflink is ongoing.  This is a functionality
> +# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
> +# concurrently".
> +#
> +. ./common/preamble
> +_begin_fstest auto clone punch
> +
> +# Import common functions.
> +. ./common/filter
> +. ./common/attr
> +. ./common/reflink
> +
> +# real QA test starts here
> +_require_scratch_reflink
> +_require_cp_reflink
> +_require_xfs_io_command "fpunch"
> +_require_test_program "punch-alternating"
> +_require_test_program "t_reflink_read_race"
> +_require_command "$TIMEOUT_PROG" timeout
> +
> +rm -f "$seqres.full"
> +
> +echo "Format and mount"
> +_scratch_mkfs > "$seqres.full" 2>&1
> +_scratch_mount >> "$seqres.full" 2>&1
> +
> +testdir="$SCRATCH_MNT/test-$seq"
> +mkdir "$testdir"
> +
> +calc_space() {
> +	blocks_needed=$(( 2 ** (fnr + 1) ))
> +	space_needed=$((blocks_needed * blksz * 5 / 4))
> +}
> +
> +# Figure out the number of blocks that we need to get the reflink runtime above
> +# 1 seconds
> +echo "Create a many-block file"
> +for ((fnr = 1; fnr < 40; fnr++)); do
> +	free_blocks=$(stat -f -c '%a' "$testdir")
> +	blksz=$(_get_file_block_size "$testdir")
> +	space_avail=$((free_blocks * blksz))
> +	calc_space
> +	test $space_needed -gt $space_avail && \
> +		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
> +
> +	off=$(( (2 ** fnr) * blksz))
> +	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
> +	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
> +
> +	$TIMEOUT_PROG 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
> +done
> +echo "fnr=$fnr" >> $seqres.full
> +
> +echo "Reflink the big file"
> +$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
> +	"$testdir/outcome" &>> $seqres.full
> +
> +if [ ! -e "$testdir/outcome" ]; then
> +	echo "Could not set up program"
> +elif grep -q "finished read early" "$testdir/outcome"; then
> +	echo "test completed successfully"
> +else
> +	cat "$testdir/outcome"
> +fi
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/1953.out b/tests/generic/1953.out
> new file mode 100644
> index 0000000000..8eaacb7ff0
> --- /dev/null
> +++ b/tests/generic/1953.out
> @@ -0,0 +1,6 @@
> +QA output created by 1953
> +Format and mount
> +Create a many-block file
> +Reflink the big file
> +Terminated
> +test completed successfully
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/1] generic: test reads racing with slow reflink operations
  2023-11-16 17:30 [PATCHSET v3 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
@ 2023-11-16 17:30 ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2023-11-16 17:30 UTC (permalink / raw)
  To: zlang, djwong
  Cc: Christoph Hellwig, Catherine Hoang, fstests, guan, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

XFS has a rather slow reflink operation.  While a reflink operation is
running, other programs cannot read the contents of the source file,
which is causing latency spikes.  Catherine Hoang wrote a patch to
permit reads, since the source file contents do not change.  This is a
functionality test for that patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Catherine Hoang <catherine.hoang@oracle.com>
Reviewed-by: Zorro Lang <zlang@redhat.com>
---
 configure.ac              |    1 
 include/builddefs.in      |    1 
 m4/package_libcdev.m4     |   13 ++
 src/Makefile              |    4 +
 src/t_reflink_read_race.c |  339 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1953        |   75 ++++++++++
 tests/generic/1953.out    |    6 +
 7 files changed, 439 insertions(+)
 create mode 100644 src/t_reflink_read_race.c
 create mode 100755 tests/generic/1953
 create mode 100644 tests/generic/1953.out


diff --git a/configure.ac b/configure.ac
index 4687d8a3c0..7333045330 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,6 +71,7 @@ AC_HAVE_BMV_OF_SHARED
 AC_HAVE_NFTW
 AC_HAVE_RLIMIT_NOFILE
 AC_HAVE_FIEXCHANGE
+AC_HAVE_FICLONE
 
 AC_CHECK_FUNCS([renameat2])
 AC_CHECK_FUNCS([reallocarray])
diff --git a/include/builddefs.in b/include/builddefs.in
index 969acf0da2..446350d5fc 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -73,6 +73,7 @@ HAVE_NFTW = @have_nftw@
 HAVE_BMV_OF_SHARED = @have_bmv_of_shared@
 HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@
 HAVE_FIEXCHANGE = @have_fiexchange@
+HAVE_FICLONE = @have_ficlone@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 7f73104405..0f4b8063f3 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -174,3 +174,16 @@ AC_DEFUN([AC_HAVE_FIEXCHANGE],
        AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
     AC_SUBST(have_fiexchange)
   ])
+
+# Check if we have FICLONE
+AC_DEFUN([AC_HAVE_FICLONE],
+  [ AC_MSG_CHECKING([for FICLONE])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+    ]], [[
+	 ioctl(-1, FICLONE, -1);
+    ]])],[have_ficlone=yes
+       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+    AC_SUBST(have_ficlone)
+  ])
diff --git a/src/Makefile b/src/Makefile
index 2815f919b2..49dd2f6c1e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -94,6 +94,10 @@ ifeq ($(HAVE_SEEK_DATA), yes)
  endif
 endif
 
+ifeq ($(HAVE_FICLONE),yes)
+     TARGETS += t_reflink_read_race
+endif
+
 CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS) fssum
 
diff --git a/src/t_reflink_read_race.c b/src/t_reflink_read_race.c
new file mode 100644
index 0000000000..00c19d7c05
--- /dev/null
+++ b/src/t_reflink_read_race.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Race reads with reflink to see if reads continue while we're cloning.
+ * Copyright 2023 Oracle.  All rights reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t mypid;
+
+static FILE *outcome_fp;
+
+/* Significant timestamps.  Initialized to zero */
+static double program_start, clone_start, clone_end;
+
+/* Coordinates access to timestamps */
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct readinfo {
+	int fd;
+	unsigned int blocksize;
+	char *descr;
+};
+
+struct cloneinfo {
+	int src_fd, dst_fd;
+};
+
+#define MSEC_PER_SEC	1000
+#define NSEC_PER_MSEC	1000000
+
+/*
+ * Assume that it shouldn't take longer than 100ms for the FICLONE ioctl to
+ * enter the kernel and take locks on an uncontended file.  This is also the
+ * required CLOCK_MONOTONIC granularity.
+ */
+#define EARLIEST_READ_MS	(MSEC_PER_SEC / 10)
+
+/*
+ * We want to be reasonably sure that the FICLONE takes long enough that any
+ * read initiated after clone operation locked the source file could have
+ * completed a disk IO before the clone finishes.  Therefore, we require that
+ * the clone operation must take at least 4x the maximum setup time.
+ */
+#define MINIMUM_CLONE_MS	(EARLIEST_READ_MS * 5)
+
+static double timespec_to_msec(const struct timespec *ts)
+{
+	return (ts->tv_sec * (double)MSEC_PER_SEC) +
+	       (ts->tv_nsec / (double)NSEC_PER_MSEC);
+}
+
+static void sleep_ms(unsigned int len)
+{
+	struct timespec time = {
+		.tv_nsec = len * NSEC_PER_MSEC,
+	};
+
+	nanosleep(&time, NULL);
+}
+
+static void outcome(const char *str)
+{
+	fprintf(outcome_fp, "%s\n", str);
+	fflush(outcome_fp);
+}
+
+static void *reader_fn(void *arg)
+{
+	struct timespec now;
+	struct readinfo *ri = arg;
+	char *buf = malloc(ri->blocksize);
+	loff_t pos = 0;
+	ssize_t copied;
+	int ret;
+
+	if (!buf) {
+		perror("alloc buffer");
+		goto kill_error;
+	}
+
+	/* Wait for the FICLONE to start */
+	pthread_mutex_lock(&lock);
+	while (clone_start < program_start) {
+		pthread_mutex_unlock(&lock);
+#ifdef DEBUG
+		printf("%s read waiting for clone to start; cs=%.2f ps=%.2f\n",
+				ri->descr, clone_start, program_start);
+		fflush(stdout);
+#endif
+		sleep_ms(1);
+		pthread_mutex_lock(&lock);
+	}
+	pthread_mutex_unlock(&lock);
+	sleep_ms(EARLIEST_READ_MS);
+
+	/* Read from the file... */
+	while ((copied = read(ri->fd, buf, ri->blocksize)) > 0) {
+		double read_completion;
+
+		ret = clock_gettime(CLOCK_MONOTONIC, &now);
+		if (ret) {
+			perror("clock_gettime");
+			goto kill_error;
+		}
+		read_completion = timespec_to_msec(&now);
+
+		/*
+		 * If clone_end is still zero, the FICLONE is still running.
+		 * If the read completion occurred a safe duration after the
+		 * start of the ioctl, then report that as an early read
+		 * completion.
+		 */
+		pthread_mutex_lock(&lock);
+		if (clone_end < program_start &&
+		    read_completion > clone_start + EARLIEST_READ_MS) {
+			pthread_mutex_unlock(&lock);
+
+			/* clone still running... */
+			printf("finished %s read early at %.2fms\n",
+					ri->descr,
+					read_completion - program_start);
+			fflush(stdout);
+			outcome("finished read early");
+			goto kill_done;
+		}
+		pthread_mutex_unlock(&lock);
+
+		sleep_ms(1);
+		pos += copied;
+	}
+	if (copied < 0) {
+		perror("read");
+		goto kill_error;
+	}
+
+	return NULL;
+kill_error:
+	outcome("reader error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+static void *clone_fn(void *arg)
+{
+	struct timespec now;
+	struct cloneinfo *ci = arg;
+	int ret;
+
+	/* Record the approximate start time of this thread */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone start");
+		goto kill_error;
+	}
+	pthread_mutex_lock(&lock);
+	clone_start = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("started clone at %.2fms\n", clone_start - program_start);
+	fflush(stdout);
+
+	/* Kernel call, only killable with a fatal signal */
+	ret = ioctl(ci->dst_fd, FICLONE, ci->src_fd);
+	if (ret) {
+		perror("FICLONE");
+		goto kill_error;
+	}
+
+	/* If the ioctl completes, note the completion time */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime clone end");
+		goto kill_error;
+	}
+
+	pthread_mutex_lock(&lock);
+	clone_end = timespec_to_msec(&now);
+	pthread_mutex_unlock(&lock);
+
+	printf("finished clone at %.2fms\n",
+			clone_end - program_start);
+	fflush(stdout);
+
+	/* Complain if we didn't take long enough to clone. */
+	if (clone_end < clone_start + MINIMUM_CLONE_MS) {
+		printf("clone did not take enough time (%.2fms)\n",
+				clone_end - clone_start);
+		fflush(stdout);
+		outcome("clone too fast");
+		goto kill_done;
+	}
+
+	outcome("clone finished before any reads");
+	goto kill_done;
+
+kill_error:
+	outcome("clone error");
+kill_done:
+	kill(mypid, SIGTERM);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct cloneinfo ci;
+	struct readinfo bufio = { .descr = "buffered" };
+	struct readinfo directio = { .descr = "directio" };
+	struct timespec now;
+	pthread_t clone_thread, bufio_thread, directio_thread;
+	double clockres;
+	int ret;
+
+	if (argc != 4) {
+		printf("Usage: %s src_file dst_file outcome_file\n", argv[0]);
+		return 1;
+	}
+
+	mypid = getpid();
+
+	/* Check for sufficient clock precision. */
+	ret = clock_getres(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_getres MONOTONIC");
+		return 2;
+	}
+	printf("CLOCK_MONOTONIC resolution: %llus %luns\n",
+			(unsigned long long)now.tv_sec,
+			(unsigned long)now.tv_nsec);
+	fflush(stdout);
+
+	clockres = timespec_to_msec(&now);
+	if (clockres > EARLIEST_READ_MS) {
+		fprintf(stderr, "insufficient CLOCK_MONOTONIC resolution\n");
+		return 2;
+	}
+
+	/*
+	 * Measure program start time since the MONOTONIC time base is not
+	 * all that well defined.
+	 */
+	ret = clock_gettime(CLOCK_MONOTONIC, &now);
+	if (ret) {
+		perror("clock_gettime MONOTONIC");
+		return 2;
+	}
+	if (now.tv_sec == 0 && now.tv_nsec == 0) {
+		fprintf(stderr, "Uhoh, start time is zero?!\n");
+		return 2;
+	}
+	program_start = timespec_to_msec(&now);
+
+	outcome_fp = fopen(argv[3], "w");
+	if (!outcome_fp) {
+		perror(argv[3]);
+		return 2;
+	}
+
+	/* Open separate fds for each thread */
+	ci.src_fd = open(argv[1], O_RDONLY);
+	if (ci.src_fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+
+	ci.dst_fd = open(argv[2], O_RDWR | O_CREAT, 0600);
+	if (ci.dst_fd < 0) {
+		perror(argv[2]);
+		return 2;
+	}
+
+	bufio.fd = open(argv[1], O_RDONLY);
+	if (bufio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	bufio.blocksize = 37;
+
+	directio.fd = open(argv[1], O_RDONLY | O_DIRECT);
+	if (directio.fd < 0) {
+		perror(argv[1]);
+		return 2;
+	}
+	directio.blocksize = 512;
+
+	/* Start threads */
+	ret = pthread_create(&clone_thread, NULL, clone_fn, &ci);
+	if (ret) {
+		fprintf(stderr, "create clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&bufio_thread, NULL, reader_fn, &bufio);
+	if (ret) {
+		fprintf(stderr, "create bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_create(&directio_thread, NULL, reader_fn, &directio);
+	if (ret) {
+		fprintf(stderr, "create directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	/* Wait for threads */
+	ret = pthread_join(clone_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join clone thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(bufio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join bufio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	ret = pthread_join(directio_thread, NULL);
+	if (ret) {
+		fprintf(stderr, "join directio thread: %s\n", strerror(ret));
+		return 2;
+	}
+
+	printf("Program should have killed itself?\n");
+	outcome("program failed to end early");
+	return 0;
+}
diff --git a/tests/generic/1953 b/tests/generic/1953
new file mode 100755
index 0000000000..c14de7060d
--- /dev/null
+++ b/tests/generic/1953
@@ -0,0 +1,75 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023, Oracle and/or its affiliates.  All Rights Reserved.
+#
+# FS QA Test No. 1953
+#
+# Race file reads with a very slow reflink operation to see if the reads
+# actually complete while the reflink is ongoing.  This is a functionality
+# test for XFS commit f3ba4762fa56 "xfs: allow read IO and FICLONE to run
+# concurrently".
+#
+. ./common/preamble
+_begin_fstest auto clone punch
+
+# Import common functions.
+. ./common/filter
+. ./common/attr
+. ./common/reflink
+
+# real QA test starts here
+_require_scratch_reflink
+_require_cp_reflink
+_require_xfs_io_command "fpunch"
+_require_test_program "punch-alternating"
+_require_test_program "t_reflink_read_race"
+_require_command "$TIMEOUT_PROG" timeout
+
+rm -f "$seqres.full"
+
+echo "Format and mount"
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount >> "$seqres.full" 2>&1
+
+testdir="$SCRATCH_MNT/test-$seq"
+mkdir "$testdir"
+
+calc_space() {
+	blocks_needed=$(( 2 ** (fnr + 1) ))
+	space_needed=$((blocks_needed * blksz * 5 / 4))
+}
+
+# Figure out the number of blocks that we need to get the reflink runtime above
+# 1 seconds
+echo "Create a many-block file"
+for ((fnr = 1; fnr < 40; fnr++)); do
+	free_blocks=$(stat -f -c '%a' "$testdir")
+	blksz=$(_get_file_block_size "$testdir")
+	space_avail=$((free_blocks * blksz))
+	calc_space
+	test $space_needed -gt $space_avail && \
+		_notrun "Insufficient space for stress test; would only create $blocks_needed extents."
+
+	off=$(( (2 ** fnr) * blksz))
+	$XFS_IO_PROG -f -c "pwrite -S 0x61 -b 4194304 $off $off" "$testdir/file1" >> "$seqres.full"
+	"$here/src/punch-alternating" "$testdir/file1" >> "$seqres.full"
+
+	$TIMEOUT_PROG 1s cp --reflink=always "$testdir/file1" "$testdir/garbage" || break
+done
+echo "fnr=$fnr" >> $seqres.full
+
+echo "Reflink the big file"
+$here/src/t_reflink_read_race "$testdir/file1" "$testdir/file2" \
+	"$testdir/outcome" &>> $seqres.full
+
+if [ ! -e "$testdir/outcome" ]; then
+	echo "Could not set up program"
+elif grep -q "finished read early" "$testdir/outcome"; then
+	echo "test completed successfully"
+else
+	cat "$testdir/outcome"
+fi
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/1953.out b/tests/generic/1953.out
new file mode 100644
index 0000000000..8eaacb7ff0
--- /dev/null
+++ b/tests/generic/1953.out
@@ -0,0 +1,6 @@
+QA output created by 1953
+Format and mount
+Create a many-block file
+Reflink the big file
+Terminated
+test completed successfully


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-11-16 17:30 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-08 21:29 [PATCHSET 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
2023-11-08 21:29 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
2023-11-10 14:40   ` Zorro Lang
2023-11-11 22:20     ` Darrick J. Wong
2023-11-11 22:22   ` [PATCH v2 " Darrick J. Wong
2023-11-12  2:42     ` Zorro Lang
2023-11-13 16:46       ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2023-11-13 17:08 [PATCHSET v2 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
2023-11-13 17:08 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong
2023-11-16  7:31   ` Zorro Lang
2023-11-16 17:30 [PATCHSET v3 0/1] fstests: updates for Linux 6.7 Darrick J. Wong
2023-11-16 17:30 ` [PATCH 1/1] generic: test reads racing with slow reflink operations Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox