public inbox for fstests@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] generic/208: do not leave pending async ios behind during process exit
@ 2016-05-19 22:19 Tahsin Erdogan
  2016-05-20 14:40 ` Jeff Moyer
  2016-06-11 15:28 ` Eryu Guan
  0 siblings, 2 replies; 6+ messages in thread
From: Tahsin Erdogan @ 2016-05-19 22:19 UTC (permalink / raw)
  To: fstests; +Cc: Theodore Ts'o, Tahsin Erdogan

An inflight async io could keep the filesystem busy and cause umount
-EBUSY errors after process exit. When the async io process is killed
forcibly with SIGKILL, it doesn't get a chance to wait for ios to
complete.

With this patch, instead of killing the children processes, we let them
exit on their own after the timeout expires.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
index 24f3e3c..474a83c 100644
--- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
+++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c
@@ -62,6 +62,26 @@ static unsigned char buf[GINORMOUS] __attribute((aligned (4096)));
 	exit(1);		\
 } while (0)
 
+
+volatile int timeout_reached = 0;
+
+static void alarm_handler(int signum)
+{
+	timeout_reached = 1;
+}
+
+void set_timeout(unsigned int seconds)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = alarm_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGALRM, &sa, NULL) == -1)
+		fail("sigaction: %d\n", errno);
+
+	alarm(seconds);
+}
+
 void spin_dio(int fd)
 {
 	io_context_t ctx;
@@ -70,18 +90,28 @@ void spin_dio(int fd)
 	struct io_event event;
 	int ret;
 
+	set_timeout(SECONDS);
+
         io_prep_pwrite(&iocb, fd, buf, GINORMOUS, 0);
 
 	ret = io_queue_init(1, &ctx);
 	if (ret)
 		fail("io_queue_init returned %d", ret);
 
-	while (1) {
+	while (!timeout_reached) {
 		ret = io_submit(ctx, 1, iocbs);
 		if (ret != 1)
 			fail("io_submit returned %d instead of 1", ret);
 
-		ret = io_getevents(ctx, 1, 1, &event, NULL);
+		/*
+		 * Retry io_getevents() if it gets interrupted by alarm signal.
+		 * We don't want to leave behind uncompleted async io requests
+		 * that could cause umount -EBUSY errors.
+		 */
+		do {
+			ret = io_getevents(ctx, 1, 1, &event, NULL);
+		} while (ret == -EINTR && timeout_reached);
+
 		if (ret != 1)
 			fail("io_getevents returned %d instead of 1", ret);
 
@@ -99,17 +129,15 @@ void spin_buffered(int fd)
 {
 	int ret;
 
-	while (1) {
+	set_timeout(SECONDS);
+
+	while (!timeout_reached) {
 		ret = pwrite(fd, buf, GINORMOUS, 0);
 		if (ret != GINORMOUS)
 			fail("buffered write returned %d", ret);
 	}
 }
 
-static void alarm_handler(int signum)
-{
-}
-
 int main(int argc, char **argv)
 {
 	pid_t buffered_pid;
@@ -118,7 +146,8 @@ int main(int argc, char **argv)
 	int fd;
 	int fd2;
 	int status;
-	struct sigaction sa;
+	int dio_exit;
+	int buffered_exit;
 
 	if (argc != 2)
 		fail("only arg should be file name");
@@ -152,38 +181,28 @@ int main(int argc, char **argv)
 		exit(0);
 	}
 
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = alarm_handler;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(SIGALRM, &sa, NULL) == -1)
-		fail("sigaction: %d\n", errno);
 
-	alarm(SECONDS);
+	/* Child processes will exit on their own when timeout expires. */
+	pid = waitpid(dio_pid, &status, 0);
+	printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
 
-	pid = wait(&status);
-	if (pid < 0 && errno == EINTR) {
-		/* if we timed out then we're done */
-		kill(buffered_pid, SIGKILL);
-		kill(dio_pid, SIGKILL);
+	dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
+			WEXITSTATUS(status) : 1;
 
-		waitpid(buffered_pid, NULL, 0);
-		waitpid(dio_pid, NULL, 0);
+	pid = waitpid(buffered_pid, &status, 0);
+	printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);
 
-		printf("ran for %d seconds without error, passing\n", SECONDS);
-		exit(0);
-	}
+	buffered_exit = (pid == buffered_pid && WIFEXITED(status)) ?
+			WEXITSTATUS(status) : 1;
 
-	if (pid == dio_pid) {
-		kill(buffered_pid, SIGKILL);
-		waitpid(buffered_pid, NULL, 0);
-	} else {
-		kill(dio_pid, SIGKILL);
-		waitpid(dio_pid, NULL, 0);
+	if (dio_exit || buffered_exit) {
+		/*
+		 * pass on the child's pass/fail return code or fail if the
+		 * child didn't exit cleanly.
+		 */
+		exit(dio_exit || buffered_exit);
 	}
 
-	/* 
-	 * pass on the child's pass/fail return code or fail if the child 
-	 * didn't exit cleanly.
-	 */
-	exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1);
+	printf("ran for %d seconds without error, passing\n", SECONDS);
+	exit(0);
 }
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
  2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
@ 2016-05-20 14:40 ` Jeff Moyer
  2016-05-20 17:00   ` Tahsin Erdogan
  2016-06-11 15:28 ` Eryu Guan
  1 sibling, 1 reply; 6+ messages in thread
From: Jeff Moyer @ 2016-05-20 14:40 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o

Tahsin Erdogan <tahsin@google.com> writes:

> An inflight async io could keep the filesystem busy and cause umount
> -EBUSY errors after process exit. When the async io process is killed
> forcibly with SIGKILL, it doesn't get a chance to wait for ios to
> complete.

exit_aio will wait for all outstanding I/O before it returns, so you
shouldn't run into this.

-Jeff

>
> With this patch, instead of killing the children processes, we let them
> exit on their own after the timeout expires.
>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
>  src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
>  1 file changed, 54 insertions(+), 35 deletions(-)
>
> diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> index 24f3e3c..474a83c 100644
> --- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
> +++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> @@ -62,6 +62,26 @@ static unsigned char buf[GINORMOUS] __attribute((aligned (4096)));
>  	exit(1);		\
>  } while (0)
>  
> +
> +volatile int timeout_reached = 0;
> +
> +static void alarm_handler(int signum)
> +{
> +	timeout_reached = 1;
> +}
> +
> +void set_timeout(unsigned int seconds)
> +{
> +	struct sigaction sa;
> +	memset(&sa, 0, sizeof(sa));
> +	sa.sa_handler = alarm_handler;
> +	sigemptyset(&sa.sa_mask);
> +	if (sigaction(SIGALRM, &sa, NULL) == -1)
> +		fail("sigaction: %d\n", errno);
> +
> +	alarm(seconds);
> +}
> +
>  void spin_dio(int fd)
>  {
>  	io_context_t ctx;
> @@ -70,18 +90,28 @@ void spin_dio(int fd)
>  	struct io_event event;
>  	int ret;
>  
> +	set_timeout(SECONDS);
> +
>          io_prep_pwrite(&iocb, fd, buf, GINORMOUS, 0);
>  
>  	ret = io_queue_init(1, &ctx);
>  	if (ret)
>  		fail("io_queue_init returned %d", ret);
>  
> -	while (1) {
> +	while (!timeout_reached) {
>  		ret = io_submit(ctx, 1, iocbs);
>  		if (ret != 1)
>  			fail("io_submit returned %d instead of 1", ret);
>  
> -		ret = io_getevents(ctx, 1, 1, &event, NULL);
> +		/*
> +		 * Retry io_getevents() if it gets interrupted by alarm signal.
> +		 * We don't want to leave behind uncompleted async io requests
> +		 * that could cause umount -EBUSY errors.
> +		 */
> +		do {
> +			ret = io_getevents(ctx, 1, 1, &event, NULL);
> +		} while (ret == -EINTR && timeout_reached);
> +
>  		if (ret != 1)
>  			fail("io_getevents returned %d instead of 1", ret);
>  
> @@ -99,17 +129,15 @@ void spin_buffered(int fd)
>  {
>  	int ret;
>  
> -	while (1) {
> +	set_timeout(SECONDS);
> +
> +	while (!timeout_reached) {
>  		ret = pwrite(fd, buf, GINORMOUS, 0);
>  		if (ret != GINORMOUS)
>  			fail("buffered write returned %d", ret);
>  	}
>  }
>  
> -static void alarm_handler(int signum)
> -{
> -}
> -
>  int main(int argc, char **argv)
>  {
>  	pid_t buffered_pid;
> @@ -118,7 +146,8 @@ int main(int argc, char **argv)
>  	int fd;
>  	int fd2;
>  	int status;
> -	struct sigaction sa;
> +	int dio_exit;
> +	int buffered_exit;
>  
>  	if (argc != 2)
>  		fail("only arg should be file name");
> @@ -152,38 +181,28 @@ int main(int argc, char **argv)
>  		exit(0);
>  	}
>  
> -	memset(&sa, 0, sizeof(sa));
> -	sa.sa_handler = alarm_handler;
> -	sigemptyset(&sa.sa_mask);
> -	if (sigaction(SIGALRM, &sa, NULL) == -1)
> -		fail("sigaction: %d\n", errno);
>  
> -	alarm(SECONDS);
> +	/* Child processes will exit on their own when timeout expires. */
> +	pid = waitpid(dio_pid, &status, 0);
> +	printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
>  
> -	pid = wait(&status);
> -	if (pid < 0 && errno == EINTR) {
> -		/* if we timed out then we're done */
> -		kill(buffered_pid, SIGKILL);
> -		kill(dio_pid, SIGKILL);
> +	dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
> +			WEXITSTATUS(status) : 1;
>  
> -		waitpid(buffered_pid, NULL, 0);
> -		waitpid(dio_pid, NULL, 0);
> +	pid = waitpid(buffered_pid, &status, 0);
> +	printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);
>  
> -		printf("ran for %d seconds without error, passing\n", SECONDS);
> -		exit(0);
> -	}
> +	buffered_exit = (pid == buffered_pid && WIFEXITED(status)) ?
> +			WEXITSTATUS(status) : 1;
>  
> -	if (pid == dio_pid) {
> -		kill(buffered_pid, SIGKILL);
> -		waitpid(buffered_pid, NULL, 0);
> -	} else {
> -		kill(dio_pid, SIGKILL);
> -		waitpid(dio_pid, NULL, 0);
> +	if (dio_exit || buffered_exit) {
> +		/*
> +		 * pass on the child's pass/fail return code or fail if the
> +		 * child didn't exit cleanly.
> +		 */
> +		exit(dio_exit || buffered_exit);
>  	}
>  
> -	/* 
> -	 * pass on the child's pass/fail return code or fail if the child 
> -	 * didn't exit cleanly.
> -	 */
> -	exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1);
> +	printf("ran for %d seconds without error, passing\n", SECONDS);
> +	exit(0);
>  }

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
  2016-05-20 14:40 ` Jeff Moyer
@ 2016-05-20 17:00   ` Tahsin Erdogan
  2016-05-20 17:21     ` Jeff Moyer
  0 siblings, 1 reply; 6+ messages in thread
From: Tahsin Erdogan @ 2016-05-20 17:00 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: fstests, Theodore Ts'o

On Fri, May 20, 2016 at 7:40 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> exit_aio will wait for all outstanding I/O before it returns, so you
> shouldn't run into this.

Thanks Jeff.

I was seeing this when running generic/208 on an older kernel. I see
that it got fixed in more recent kernels:

commit 6098b45b32e6baeacc04790773ced9340601d511("aio: block exit_aio()
until all context requests are completed")

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
  2016-05-20 17:00   ` Tahsin Erdogan
@ 2016-05-20 17:21     ` Jeff Moyer
  0 siblings, 0 replies; 6+ messages in thread
From: Jeff Moyer @ 2016-05-20 17:21 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o

Tahsin Erdogan <tahsin@google.com> writes:

> On Fri, May 20, 2016 at 7:40 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
>> exit_aio will wait for all outstanding I/O before it returns, so you
>> shouldn't run into this.
>
> Thanks Jeff.
>
> I was seeing this when running generic/208 on an older kernel. I see
> that it got fixed in more recent kernels:
>
> commit 6098b45b32e6baeacc04790773ced9340601d511("aio: block exit_aio()
> until all context requests are completed")

OK, phew!  Thanks for the follow-up!

-Jeff

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
  2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
  2016-05-20 14:40 ` Jeff Moyer
@ 2016-06-11 15:28 ` Eryu Guan
  2016-06-11 19:02   ` Tahsin Erdogan
  1 sibling, 1 reply; 6+ messages in thread
From: Eryu Guan @ 2016-06-11 15:28 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o

On Thu, May 19, 2016 at 03:19:40PM -0700, Tahsin Erdogan wrote:
> An inflight async io could keep the filesystem busy and cause umount
> -EBUSY errors after process exit. When the async io process is killed
> forcibly with SIGKILL, it doesn't get a chance to wait for ios to
> complete.
> 
> With this patch, instead of killing the children processes, we let them
> exit on their own after the timeout expires.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
>  src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
>  1 file changed, 54 insertions(+), 35 deletions(-)
> 
> diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> index 24f3e3c..474a83c 100644
> --- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
> +++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c

[snip]

> @@ -152,38 +181,28 @@ int main(int argc, char **argv)
>  		exit(0);
>  	}
>  
> -	memset(&sa, 0, sizeof(sa));
> -	sa.sa_handler = alarm_handler;
> -	sigemptyset(&sa.sa_mask);
> -	if (sigaction(SIGALRM, &sa, NULL) == -1)
> -		fail("sigaction: %d\n", errno);
>  
> -	alarm(SECONDS);
> +	/* Child processes will exit on their own when timeout expires. */
> +	pid = waitpid(dio_pid, &status, 0);
> +	printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
>  
> -	pid = wait(&status);
> -	if (pid < 0 && errno == EINTR) {
> -		/* if we timed out then we're done */
> -		kill(buffered_pid, SIGKILL);
> -		kill(dio_pid, SIGKILL);
> +	dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
> +			WEXITSTATUS(status) : 1;
>  
> -		waitpid(buffered_pid, NULL, 0);
> -		waitpid(dio_pid, NULL, 0);
> +	pid = waitpid(buffered_pid, &status, 0);
> +	printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);

These two "printf" breaks generic/208

--- tests/generic/208.out       2016-06-02 12:38:40.111000000 +0800
+++ /root/workspace/xfstests/results//ext3_2k/generic/208.out.bad 2016-06-11 23:10:01.578000000 +0800
@@ -1,2 +1,4 @@
 QA output created by 208
 +dio_pid 17541, pid 17541, status 0
 +buffered_pid 17540, pid 17540, status 0
  ran for 200 seconds without error, passing

Thanks,
Eryu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
  2016-06-11 15:28 ` Eryu Guan
@ 2016-06-11 19:02   ` Tahsin Erdogan
  0 siblings, 0 replies; 6+ messages in thread
From: Tahsin Erdogan @ 2016-06-11 19:02 UTC (permalink / raw)
  To: Eryu Guan; +Cc: fstests, Theodore Ts'o

> These two "printf" breaks generic/208

You're right, 2 extra printf lines were left in patch by mistake.
I have abandoned this patch since latest kernels do not have this
problem, but in case someone needs this patch for an old kernel,
removing 2 extra printfs should fix the problem.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-06-11 19:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
2016-05-20 14:40 ` Jeff Moyer
2016-05-20 17:00   ` Tahsin Erdogan
2016-05-20 17:21     ` Jeff Moyer
2016-06-11 15:28 ` Eryu Guan
2016-06-11 19:02   ` Tahsin Erdogan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox