* [PATCH] generic/208: do not leave pending async ios behind during process exit
@ 2016-05-19 22:19 Tahsin Erdogan
2016-05-20 14:40 ` Jeff Moyer
2016-06-11 15:28 ` Eryu Guan
0 siblings, 2 replies; 6+ messages in thread
From: Tahsin Erdogan @ 2016-05-19 22:19 UTC (permalink / raw)
To: fstests; +Cc: Theodore Ts'o, Tahsin Erdogan
An inflight async io could keep the filesystem busy and cause umount
-EBUSY errors after process exit. When the async io process is killed
forcibly with SIGKILL, it doesn't get a chance to wait for ios to
complete.
With this patch, instead of killing the children processes, we let them
exit on their own after the timeout expires.
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
1 file changed, 54 insertions(+), 35 deletions(-)
diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
index 24f3e3c..474a83c 100644
--- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
+++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c
@@ -62,6 +62,26 @@ static unsigned char buf[GINORMOUS] __attribute((aligned (4096)));
exit(1); \
} while (0)
+
+volatile int timeout_reached = 0;
+
+static void alarm_handler(int signum)
+{
+ timeout_reached = 1;
+}
+
+void set_timeout(unsigned int seconds)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = alarm_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGALRM, &sa, NULL) == -1)
+ fail("sigaction: %d\n", errno);
+
+ alarm(seconds);
+}
+
void spin_dio(int fd)
{
io_context_t ctx;
@@ -70,18 +90,28 @@ void spin_dio(int fd)
struct io_event event;
int ret;
+ set_timeout(SECONDS);
+
io_prep_pwrite(&iocb, fd, buf, GINORMOUS, 0);
ret = io_queue_init(1, &ctx);
if (ret)
fail("io_queue_init returned %d", ret);
- while (1) {
+ while (!timeout_reached) {
ret = io_submit(ctx, 1, iocbs);
if (ret != 1)
fail("io_submit returned %d instead of 1", ret);
- ret = io_getevents(ctx, 1, 1, &event, NULL);
+ /*
+ * Retry io_getevents() if it gets interrupted by alarm signal.
+ * We don't want to leave behind uncompleted async io requests
+ * that could cause umount -EBUSY errors.
+ */
+ do {
+ ret = io_getevents(ctx, 1, 1, &event, NULL);
+ } while (ret == -EINTR && timeout_reached);
+
if (ret != 1)
fail("io_getevents returned %d instead of 1", ret);
@@ -99,17 +129,15 @@ void spin_buffered(int fd)
{
int ret;
- while (1) {
+ set_timeout(SECONDS);
+
+ while (!timeout_reached) {
ret = pwrite(fd, buf, GINORMOUS, 0);
if (ret != GINORMOUS)
fail("buffered write returned %d", ret);
}
}
-static void alarm_handler(int signum)
-{
-}
-
int main(int argc, char **argv)
{
pid_t buffered_pid;
@@ -118,7 +146,8 @@ int main(int argc, char **argv)
int fd;
int fd2;
int status;
- struct sigaction sa;
+ int dio_exit;
+ int buffered_exit;
if (argc != 2)
fail("only arg should be file name");
@@ -152,38 +181,28 @@ int main(int argc, char **argv)
exit(0);
}
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = alarm_handler;
- sigemptyset(&sa.sa_mask);
- if (sigaction(SIGALRM, &sa, NULL) == -1)
- fail("sigaction: %d\n", errno);
- alarm(SECONDS);
+ /* Child processes will exit on their own when timeout expires. */
+ pid = waitpid(dio_pid, &status, 0);
+ printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
- pid = wait(&status);
- if (pid < 0 && errno == EINTR) {
- /* if we timed out then we're done */
- kill(buffered_pid, SIGKILL);
- kill(dio_pid, SIGKILL);
+ dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
+ WEXITSTATUS(status) : 1;
- waitpid(buffered_pid, NULL, 0);
- waitpid(dio_pid, NULL, 0);
+ pid = waitpid(buffered_pid, &status, 0);
+ printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);
- printf("ran for %d seconds without error, passing\n", SECONDS);
- exit(0);
- }
+ buffered_exit = (pid == buffered_pid && WIFEXITED(status)) ?
+ WEXITSTATUS(status) : 1;
- if (pid == dio_pid) {
- kill(buffered_pid, SIGKILL);
- waitpid(buffered_pid, NULL, 0);
- } else {
- kill(dio_pid, SIGKILL);
- waitpid(dio_pid, NULL, 0);
+ if (dio_exit || buffered_exit) {
+ /*
+ * pass on the child's pass/fail return code or fail if the
+ * child didn't exit cleanly.
+ */
+ exit(dio_exit || buffered_exit);
}
- /*
- * pass on the child's pass/fail return code or fail if the child
- * didn't exit cleanly.
- */
- exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1);
+ printf("ran for %d seconds without error, passing\n", SECONDS);
+ exit(0);
}
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
@ 2016-05-20 14:40 ` Jeff Moyer
2016-05-20 17:00 ` Tahsin Erdogan
2016-06-11 15:28 ` Eryu Guan
1 sibling, 1 reply; 6+ messages in thread
From: Jeff Moyer @ 2016-05-20 14:40 UTC (permalink / raw)
To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o
Tahsin Erdogan <tahsin@google.com> writes:
> An inflight async io could keep the filesystem busy and cause umount
> -EBUSY errors after process exit. When the async io process is killed
> forcibly with SIGKILL, it doesn't get a chance to wait for ios to
> complete.
exit_aio will wait for all outstanding I/O before it returns, so you
shouldn't run into this.
-Jeff
>
> With this patch, instead of killing the children processes, we let them
> exit on their own after the timeout expires.
>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
> 1 file changed, 54 insertions(+), 35 deletions(-)
>
> diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> index 24f3e3c..474a83c 100644
> --- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
> +++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> @@ -62,6 +62,26 @@ static unsigned char buf[GINORMOUS] __attribute((aligned (4096)));
> exit(1); \
> } while (0)
>
> +
> +volatile int timeout_reached = 0;
> +
> +static void alarm_handler(int signum)
> +{
> + timeout_reached = 1;
> +}
> +
> +void set_timeout(unsigned int seconds)
> +{
> + struct sigaction sa;
> + memset(&sa, 0, sizeof(sa));
> + sa.sa_handler = alarm_handler;
> + sigemptyset(&sa.sa_mask);
> + if (sigaction(SIGALRM, &sa, NULL) == -1)
> + fail("sigaction: %d\n", errno);
> +
> + alarm(seconds);
> +}
> +
> void spin_dio(int fd)
> {
> io_context_t ctx;
> @@ -70,18 +90,28 @@ void spin_dio(int fd)
> struct io_event event;
> int ret;
>
> + set_timeout(SECONDS);
> +
> io_prep_pwrite(&iocb, fd, buf, GINORMOUS, 0);
>
> ret = io_queue_init(1, &ctx);
> if (ret)
> fail("io_queue_init returned %d", ret);
>
> - while (1) {
> + while (!timeout_reached) {
> ret = io_submit(ctx, 1, iocbs);
> if (ret != 1)
> fail("io_submit returned %d instead of 1", ret);
>
> - ret = io_getevents(ctx, 1, 1, &event, NULL);
> + /*
> + * Retry io_getevents() if it gets interrupted by alarm signal.
> + * We don't want to leave behind uncompleted async io requests
> + * that could cause umount -EBUSY errors.
> + */
> + do {
> + ret = io_getevents(ctx, 1, 1, &event, NULL);
> + } while (ret == -EINTR && timeout_reached);
> +
> if (ret != 1)
> fail("io_getevents returned %d instead of 1", ret);
>
> @@ -99,17 +129,15 @@ void spin_buffered(int fd)
> {
> int ret;
>
> - while (1) {
> + set_timeout(SECONDS);
> +
> + while (!timeout_reached) {
> ret = pwrite(fd, buf, GINORMOUS, 0);
> if (ret != GINORMOUS)
> fail("buffered write returned %d", ret);
> }
> }
>
> -static void alarm_handler(int signum)
> -{
> -}
> -
> int main(int argc, char **argv)
> {
> pid_t buffered_pid;
> @@ -118,7 +146,8 @@ int main(int argc, char **argv)
> int fd;
> int fd2;
> int status;
> - struct sigaction sa;
> + int dio_exit;
> + int buffered_exit;
>
> if (argc != 2)
> fail("only arg should be file name");
> @@ -152,38 +181,28 @@ int main(int argc, char **argv)
> exit(0);
> }
>
> - memset(&sa, 0, sizeof(sa));
> - sa.sa_handler = alarm_handler;
> - sigemptyset(&sa.sa_mask);
> - if (sigaction(SIGALRM, &sa, NULL) == -1)
> - fail("sigaction: %d\n", errno);
>
> - alarm(SECONDS);
> + /* Child processes will exit on their own when timeout expires. */
> + pid = waitpid(dio_pid, &status, 0);
> + printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
>
> - pid = wait(&status);
> - if (pid < 0 && errno == EINTR) {
> - /* if we timed out then we're done */
> - kill(buffered_pid, SIGKILL);
> - kill(dio_pid, SIGKILL);
> + dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
> + WEXITSTATUS(status) : 1;
>
> - waitpid(buffered_pid, NULL, 0);
> - waitpid(dio_pid, NULL, 0);
> + pid = waitpid(buffered_pid, &status, 0);
> + printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);
>
> - printf("ran for %d seconds without error, passing\n", SECONDS);
> - exit(0);
> - }
> + buffered_exit = (pid == buffered_pid && WIFEXITED(status)) ?
> + WEXITSTATUS(status) : 1;
>
> - if (pid == dio_pid) {
> - kill(buffered_pid, SIGKILL);
> - waitpid(buffered_pid, NULL, 0);
> - } else {
> - kill(dio_pid, SIGKILL);
> - waitpid(dio_pid, NULL, 0);
> + if (dio_exit || buffered_exit) {
> + /*
> + * pass on the child's pass/fail return code or fail if the
> + * child didn't exit cleanly.
> + */
> + exit(dio_exit || buffered_exit);
> }
>
> - /*
> - * pass on the child's pass/fail return code or fail if the child
> - * didn't exit cleanly.
> - */
> - exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1);
> + printf("ran for %d seconds without error, passing\n", SECONDS);
> + exit(0);
> }
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
2016-05-20 14:40 ` Jeff Moyer
@ 2016-05-20 17:00 ` Tahsin Erdogan
2016-05-20 17:21 ` Jeff Moyer
0 siblings, 1 reply; 6+ messages in thread
From: Tahsin Erdogan @ 2016-05-20 17:00 UTC (permalink / raw)
To: Jeff Moyer; +Cc: fstests, Theodore Ts'o
On Fri, May 20, 2016 at 7:40 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> exit_aio will wait for all outstanding I/O before it returns, so you
> shouldn't run into this.
Thanks Jeff.
I was seeing this when running generic/208 on an older kernel. I see
that it got fixed in more recent kernels:
commit 6098b45b32e6baeacc04790773ced9340601d511("aio: block exit_aio()
until all context requests are completed")
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
2016-05-20 17:00 ` Tahsin Erdogan
@ 2016-05-20 17:21 ` Jeff Moyer
0 siblings, 0 replies; 6+ messages in thread
From: Jeff Moyer @ 2016-05-20 17:21 UTC (permalink / raw)
To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o
Tahsin Erdogan <tahsin@google.com> writes:
> On Fri, May 20, 2016 at 7:40 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
>> exit_aio will wait for all outstanding I/O before it returns, so you
>> shouldn't run into this.
>
> Thanks Jeff.
>
> I was seeing this when running generic/208 on an older kernel. I see
> that it got fixed in more recent kernels:
>
> commit 6098b45b32e6baeacc04790773ced9340601d511("aio: block exit_aio()
> until all context requests are completed")
OK, phew! Thanks for the follow-up!
-Jeff
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
2016-05-20 14:40 ` Jeff Moyer
@ 2016-06-11 15:28 ` Eryu Guan
2016-06-11 19:02 ` Tahsin Erdogan
1 sibling, 1 reply; 6+ messages in thread
From: Eryu Guan @ 2016-06-11 15:28 UTC (permalink / raw)
To: Tahsin Erdogan; +Cc: fstests, Theodore Ts'o
On Thu, May 19, 2016 at 03:19:40PM -0700, Tahsin Erdogan wrote:
> An inflight async io could keep the filesystem busy and cause umount
> -EBUSY errors after process exit. When the async io process is killed
> forcibly with SIGKILL, it doesn't get a chance to wait for ios to
> complete.
>
> With this patch, instead of killing the children processes, we let them
> exit on their own after the timeout expires.
>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> src/aio-dio-regress/aio-dio-invalidate-failure.c | 89 ++++++++++++++----------
> 1 file changed, 54 insertions(+), 35 deletions(-)
>
> diff --git a/src/aio-dio-regress/aio-dio-invalidate-failure.c b/src/aio-dio-regress/aio-dio-invalidate-failure.c
> index 24f3e3c..474a83c 100644
> --- a/src/aio-dio-regress/aio-dio-invalidate-failure.c
> +++ b/src/aio-dio-regress/aio-dio-invalidate-failure.c
[snip]
> @@ -152,38 +181,28 @@ int main(int argc, char **argv)
> exit(0);
> }
>
> - memset(&sa, 0, sizeof(sa));
> - sa.sa_handler = alarm_handler;
> - sigemptyset(&sa.sa_mask);
> - if (sigaction(SIGALRM, &sa, NULL) == -1)
> - fail("sigaction: %d\n", errno);
>
> - alarm(SECONDS);
> + /* Child processes will exit on their own when timeout expires. */
> + pid = waitpid(dio_pid, &status, 0);
> + printf("dio_pid %d, pid %d, status %#x\n", dio_pid, pid, status);
>
> - pid = wait(&status);
> - if (pid < 0 && errno == EINTR) {
> - /* if we timed out then we're done */
> - kill(buffered_pid, SIGKILL);
> - kill(dio_pid, SIGKILL);
> + dio_exit = (pid == dio_pid && WIFEXITED(status)) ?
> + WEXITSTATUS(status) : 1;
>
> - waitpid(buffered_pid, NULL, 0);
> - waitpid(dio_pid, NULL, 0);
> + pid = waitpid(buffered_pid, &status, 0);
> + printf("buffered_pid %d, pid %d, status %#x\n", buffered_pid, pid, status);
These two "printf" breaks generic/208
--- tests/generic/208.out 2016-06-02 12:38:40.111000000 +0800
+++ /root/workspace/xfstests/results//ext3_2k/generic/208.out.bad 2016-06-11 23:10:01.578000000 +0800
@@ -1,2 +1,4 @@
QA output created by 208
+dio_pid 17541, pid 17541, status 0
+buffered_pid 17540, pid 17540, status 0
ran for 200 seconds without error, passing
Thanks,
Eryu
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] generic/208: do not leave pending async ios behind during process exit
2016-06-11 15:28 ` Eryu Guan
@ 2016-06-11 19:02 ` Tahsin Erdogan
0 siblings, 0 replies; 6+ messages in thread
From: Tahsin Erdogan @ 2016-06-11 19:02 UTC (permalink / raw)
To: Eryu Guan; +Cc: fstests, Theodore Ts'o
> These two "printf" breaks generic/208
You're right, 2 extra printf lines were left in patch by mistake.
I have abandoned this patch since latest kernels do not have this
problem, but in case someone needs this patch for an old kernel,
removing 2 extra printfs should fix the problem.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2016-06-11 19:02 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-19 22:19 [PATCH] generic/208: do not leave pending async ios behind during process exit Tahsin Erdogan
2016-05-20 14:40 ` Jeff Moyer
2016-05-20 17:00 ` Tahsin Erdogan
2016-05-20 17:21 ` Jeff Moyer
2016-06-11 15:28 ` Eryu Guan
2016-06-11 19:02 ` Tahsin Erdogan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox