From: "Ser, Simon" <simon.ser@intel.com>
To: "Hiler, Arkadiusz" <arkadiusz.hiler@intel.com>,
"igt-dev@lists.freedesktop.org" <igt-dev@lists.freedesktop.org>
Cc: "Latvala, Petri" <petri.latvala@intel.com>
Subject: Re: [igt-dev] [PATCH i-g-t 3/3] runner: Make sure that we are closing watchdogs on signals
Date: Thu, 18 Jul 2019 10:57:20 +0000 [thread overview]
Message-ID: <98bbef8de64288888e554de33526e0e7367fbe95.camel@intel.com> (raw)
In-Reply-To: <20190709122345.29180-3-arkadiusz.hiler@intel.com>
On Tue, 2019-07-09 at 15:23 +0300, Arkadiusz Hiler wrote:
> There are few short windows of opportunity when watchdogs are primed but
> there is no signal handling in place, so the process may exit without
> proper shutdown sequence.
>
> This patch rearranges the existing code so that we set up the signalfd
> and BLOCK the signals before setting up watchdogs and UNBLOCK only after
> the watchdogs are closed properly.
>
> If igt_runner exits due to signal, non-zero status code is returned.
>
> Cc: Petri Latvala <petri.latvala@intel.com>
> Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
> ---
> runner/executor.c | 100 +++++++++++++++++++++++++++++++++-------------
> 1 file changed, 73 insertions(+), 27 deletions(-)
>
> diff --git a/runner/executor.c b/runner/executor.c
> index 6463ab96..62303ff8 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -7,6 +7,7 @@
> #include <string.h>
> #include <sys/ioctl.h>
> #include <sys/select.h>
> +#include <sys/poll.h>
> #include <sys/signalfd.h>
> #include <sys/stat.h>
> #include <sys/time.h>
> @@ -604,7 +605,6 @@ static int monitor_output(pid_t child,
> close(outfd);
> close(errfd);
> close(kmsgfd);
> - close(sigfd);
> return -1;
> }
>
> @@ -776,9 +776,8 @@ static int monitor_output(pid_t child,
> *time_spent = time;
> }
>
> - close(sigfd);
> - sigfd = -1;
> child = 0;
> + sigfd = -1; /* we are dying, no signal handling for now */
> }
> }
>
> @@ -790,7 +789,6 @@ static int monitor_output(pid_t child,
> close(outfd);
> close(errfd);
> close(kmsgfd);
> - close(sigfd);
>
> if (aborting)
> return -1;
> @@ -908,13 +906,12 @@ static int execute_next_entry(struct execute_state *state,
> double *time_spent,
> struct settings *settings,
> struct job_list_entry *entry,
> - int testdirfd, int resdirfd)
> + int testdirfd, int resdirfd,
> + int sigfd, sigset_t *sigmask)
> {
> int dirfd;
> int outputs[_F_LAST];
> int kmsgfd;
> - int sigfd;
> - sigset_t mask;
> int outpipe[2] = { -1, -1 };
> int errpipe[2] = { -1, -1 };
> int outfd, errfd;
> @@ -954,21 +951,6 @@ static int execute_next_entry(struct execute_state *state,
> lseek(kmsgfd, 0, SEEK_END);
> }
>
> - sigemptyset(&mask);
> - sigaddset(&mask, SIGCHLD);
> - sigaddset(&mask, SIGINT);
> - sigaddset(&mask, SIGTERM);
> - sigaddset(&mask, SIGQUIT);
> - sigaddset(&mask, SIGHUP);
> - sigprocmask(SIG_BLOCK, &mask, NULL);
> - sigfd = signalfd(-1, &mask, O_CLOEXEC);
> -
> - if (sigfd < 0) {
> - /* TODO: Handle better */
> - fprintf(stderr, "Cannot monitor child process with signalfd\n");
> - result = -1;
> - goto out_kmsgfd;
> - }
>
> if (settings->log_level >= LOG_LEVEL_NORMAL) {
> char *displayname;
> @@ -1002,7 +984,7 @@ static int execute_next_entry(struct execute_state *state,
> close(outpipe[0]);
> close(errpipe[0]);
>
> - sigprocmask(SIG_UNBLOCK, &mask, NULL);
> + sigprocmask(SIG_UNBLOCK, sigmask, NULL);
>
> setenv("IGT_SENTINEL_ON_STDERR", "1", 1);
>
> @@ -1261,12 +1243,41 @@ static void oom_immortal(void)
> close(fd);
> }
>
> +static bool should_die_because_signal(int sigfd)
> +{
> + struct signalfd_siginfo siginfo;
> + int ret;
> +
> + struct pollfd sigpoll = { .fd = sigfd, .events = POLLIN | POLLRDBAND };
> +
> + if ((ret = poll(&sigpoll, 1, 0)) != 0) {
> + if (ret == -1) {
Seems like this is unintentionally left blank.
> + }
> +
> + ret = read(sigfd, &siginfo, sizeof(siginfo));
Error handling is missing (ret == sizeof(siginfo)).
> + if (siginfo.ssi_signo == SIGCHLD) {
> + fprintf(stderr, "Runner got stray SIGCHLD while not executing any tests.\n");
> +
Nit: extra blank line here
> + } else {
> + fprintf(stderr, "Runner is being killed by %s\n",
> + strsignal(siginfo.ssi_signo));
> + return true;
> + }
> +
> + }
> +
> + return false;
> +}
> +
> bool execute(struct execute_state *state,
> struct settings *settings,
> struct job_list *job_list)
> {
> struct utsname unamebuf;
> int resdirfd, testdirfd, unamefd, timefd;
> + sigset_t sigmask;
> + int sigfd;
> double time_spent = 0.0;
> bool status = true;
>
> @@ -1310,6 +1321,22 @@ bool execute(struct execute_state *state,
>
> oom_immortal();
>
> + sigemptyset(&sigmask);
> + sigaddset(&sigmask, SIGCHLD);
> + sigaddset(&sigmask, SIGINT);
> + sigaddset(&sigmask, SIGTERM);
> + sigaddset(&sigmask, SIGQUIT);
> + sigaddset(&sigmask, SIGHUP);
> + sigfd = signalfd(-1, &sigmask, O_CLOEXEC);
> + sigprocmask(SIG_BLOCK, &sigmask, NULL);
> +
> + if (sigfd < 0) {
> + /* TODO: Handle better */
> + fprintf(stderr, "Cannot mask signals\n");
> + status = -1;
This should probably be `status = false`. -1 is a truthy value.
> + goto end;
> + }
> +
> init_watchdogs(settings);
>
> if (!uname(&unamebuf)) {
> @@ -1345,12 +1372,18 @@ bool execute(struct execute_state *state,
> char *reason;
> int result;
>
> + if (should_die_because_signal(sigfd)) {
> + status = false;
Should we close_watchdogs at this point?
> + goto end;
> + }
> +
> result = execute_next_entry(state,
> job_list->size,
> &time_spent,
> settings,
> &job_list->entries[state->next],
> - testdirfd, resdirfd);
> + testdirfd, resdirfd,
> + sigfd, &sigmask);
The argument list is getting quite large. At some point it may be worth
it to put everything (or part of these) in a struct.
(In general I feel like this file could be improved a lot, these long
functions are hard to read.)
>
> if (result < 0) {
> status = false;
> @@ -1383,8 +1416,15 @@ bool execute(struct execute_state *state,
> if (result > 0) {
> double time_left = state->time_left;
>
> - close(testdirfd);
> close_watchdogs(settings);
> + sigprocmask(SIG_UNBLOCK, &sigmask, NULL);
> + /* make sure that we do not leave any signals unhandled */
> + if (should_die_because_signal(sigfd)){
Style nit: missing space between `)` and `{`.
> + status = false;
> + goto end_post_signal_restore;
> + }
> + close(sigfd);
> + close(testdirfd);
> initialize_execute_state_from_resume(resdirfd, state, settings, job_list);
> state->time_left = time_left;
> return execute(state, settings, job_list);
> @@ -1397,8 +1437,14 @@ bool execute(struct execute_state *state,
> }
>
> end:
> - close(testdirfd);
> - close(resdirfd);
> close_watchdogs(settings);
> + sigprocmask(SIG_UNBLOCK, &sigmask, NULL);
> + /* make sure that we do not leave any signals unhandled */
> + if (should_die_because_signal(sigfd))
> + status = false;
> + end_post_signal_restore:
> + close(sigfd);
> + close(testdirfd);
> + close(resdirfd);
> return status;
> }
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
next prev parent reply other threads:[~2019-07-18 10:57 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-09 12:23 [igt-dev] [PATCH i-g-t 1/3] runner: Make sure we don't close watchdogs twice Arkadiusz Hiler
2019-07-09 12:23 ` [igt-dev] [PATCH i-g-t 2/3] runner: Warn when watchdogs are being closed from the exit handler Arkadiusz Hiler
2019-07-10 8:20 ` Ser, Simon
2019-07-09 12:23 ` [igt-dev] [PATCH i-g-t 3/3] runner: Make sure that we are closing watchdogs on signals Arkadiusz Hiler
2019-07-18 10:57 ` Ser, Simon [this message]
2019-07-19 11:25 ` Arkadiusz Hiler
2019-07-19 11:31 ` Ser, Simon
2019-07-19 12:01 ` [igt-dev] [PATCH v2 " Arkadiusz Hiler
2019-07-19 12:04 ` Ser, Simon
2019-07-09 13:04 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/3] runner: Make sure we don't close watchdogs twice Patchwork
2019-07-10 0:24 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2019-07-10 8:18 ` [igt-dev] [PATCH i-g-t 1/3] " Ser, Simon
2019-07-19 13:31 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/3] runner: Make sure we don't close watchdogs twice (rev2) Patchwork
2019-07-19 16:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2019-07-22 10:24 ` [igt-dev] [PATCH i-g-t 1/3] runner: Make sure we don't close watchdogs twice Petri Latvala
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=98bbef8de64288888e554de33526e0e7367fbe95.camel@intel.com \
--to=simon.ser@intel.com \
--cc=arkadiusz.hiler@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=petri.latvala@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox