public inbox for util-linux@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] deadlock in script
@ 2014-05-30  6:30 Csaba Kos
  2014-05-30  9:15 ` Karel Zak
  0 siblings, 1 reply; 7+ messages in thread
From: Csaba Kos @ 2014-05-30  6:30 UTC (permalink / raw)
  To: util-linux

[-- Attachment #1: Type: text/plain, Size: 386 bytes --]

Hi,

I use the "script" command to save the output of certain jobs on a
heavily loaded Linux cluster. Every now and then the "script" command
hangs.

I made some modifications to util-linux 2.23 and have been using the
patched "script" command without problems for about a year.

Attached I'm submitting the patches (updated to the current master) for review.

Best regards,

Csaba Kos

[-- Attachment #2: 0001-script-fix-a-rare-deadlock-after-child-termination.patch --]
[-- Type: text/x-diff, Size: 3273 bytes --]

From fad482427ddd819a92d1e636e20bbf8adaf721dd Mon Sep 17 00:00:00 2001
From: Csaba Kos <csaba.kos@gmail.com>
Date: Fri, 30 May 2014 14:33:32 +0900
Subject: [PATCH 1/2] script: fix a rare deadlock after child termination

---
 term-utils/script.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/term-utils/script.c b/term-utils/script.c
index e5d239c..32906d0 100644
--- a/term-utils/script.c
+++ b/term-utils/script.c
@@ -36,6 +36,9 @@
  * - added Native Language Support
  *
  * 2000-07-30 Per Andreas Buer <per@linpro.no> - added "q"-option
+ *
+ * 2014-05-30 Csaba Kos <csaba.kos@gmail.com>
+ * - fixed a rare deadlock after child termination
  */
 
 /*
@@ -114,6 +117,8 @@ int	tflg = 0;
 int	forceflg = 0;
 int	isterm;
 
+sigset_t block_mask, unblock_mask;
+
 int die;
 int resized;
 
@@ -306,6 +311,7 @@ doinput(void) {
 	int errsv = 0;
 	ssize_t cc = 0;
 	char ibuf[BUFSIZ];
+	fd_set readfds;
 
 	/* close things irrelevant for this process */
 	if (fscript)
@@ -314,14 +320,27 @@ doinput(void) {
 		fclose(timingfd);
 	fscript = timingfd = NULL;
 
+	FD_ZERO(&readfds);
+
+	/* block SIGCHLD */
+	sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask);
+
 	while (die == 0) {
-		if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) {
-			if (write_all(master, ibuf, cc)) {
-				warn (_("write failed"));
-				fail();
+		FD_SET(STDIN_FILENO, &readfds);
+
+		/* wait for input or signal (including SIGCHLD) */
+		if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL,
+			&unblock_mask)) > 0) {
+
+			if ((cc = read(STDIN_FILENO, ibuf, BUFSIZ)) > 0) {
+				if (write_all(master, ibuf, cc)) {
+					warn (_("write failed"));
+					fail();
+				}
 			}
 		}
-		else if (cc < 0 && errno == EINTR && resized)
+
+		if (cc < 0 && errno == EINTR && resized)
 		{
 			/* transmit window change information to the child */
 			if (isterm) {
@@ -330,12 +349,15 @@ doinput(void) {
 			}
 			resized = 0;
 
-		} else {
+		} else if (cc <= 0) {
 			errsv = errno;
 			break;
 		}
 	}
 
+	/* unblock SIGCHLD */
+	sigprocmask(SIG_SETMASK, &unblock_mask, NULL);
+
 	/* To be sure that we don't miss any data */
 	wait_for_empty_fd(slave);
 	wait_for_empty_fd(master);
@@ -404,6 +426,7 @@ dooutput(void) {
 	struct timeval tv;
 	double oldtime=time(NULL), newtime;
 	int errsv = 0;
+	fd_set readfds;
 
 	close(STDIN_FILENO);
 #ifdef HAVE_LIBUTIL
@@ -416,6 +439,8 @@ dooutput(void) {
 	my_strftime(obuf, sizeof obuf, "%c\n", localtime(&tvec));
 	fprintf(fscript, _("Script started on %s"), obuf);
 
+	FD_ZERO(&readfds);
+
 	do {
 		if (die || errsv == EINTR) {
 			struct pollfd fds[] = {{ .fd = master, .events = POLLIN }};
@@ -423,10 +448,23 @@ dooutput(void) {
 				break;
 		}
 
+		/* block SIGCHLD */
+		sigprocmask(SIG_SETMASK, &block_mask, &unblock_mask);
+
+		FD_SET(master, &readfds);
 		errno = 0;
-		cc = read(master, obuf, sizeof (obuf));
+
+		/* wait for input or signal (including SIGCHLD) */
+		if ((cc = pselect(master+1, &readfds, NULL, NULL, NULL,
+			&unblock_mask)) > 0) {
+
+			cc = read(master, obuf, sizeof (obuf));
+		}
 		errsv = errno;
 
+		/* unblock SIGCHLD */
+		sigprocmask(SIG_SETMASK, &unblock_mask, NULL);
+
 		if (tflg)
 			gettimeofday(&tv, NULL);
 
-- 
1.8.5.rc3.2.gc302941


[-- Attachment #3: 0002-script-fix-spurious-exit-from-input-read-loop-on-EIN.patch --]
[-- Type: text/x-diff, Size: 891 bytes --]

From a2dd4df349f426c6605e4b151aafccce4b2ea8e7 Mon Sep 17 00:00:00 2001
From: Csaba Kos <csaba.kos@gmail.com>
Date: Fri, 30 May 2014 14:51:38 +0900
Subject: [PATCH 2/2] script: fix spurious exit from input read loop on EINTR.

---
 term-utils/script.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/term-utils/script.c b/term-utils/script.c
index 32906d0..63913c8 100644
--- a/term-utils/script.c
+++ b/term-utils/script.c
@@ -328,6 +328,7 @@ doinput(void) {
 	while (die == 0) {
 		FD_SET(STDIN_FILENO, &readfds);
 
+		errno = 0;
 		/* wait for input or signal (including SIGCHLD) */
 		if ((cc = pselect(STDIN_FILENO + 1, &readfds, NULL, NULL, NULL,
 			&unblock_mask)) > 0) {
@@ -349,7 +350,7 @@ doinput(void) {
 			}
 			resized = 0;
 
-		} else if (cc <= 0) {
+		} else if (cc <= 0 && errno != EINTR) {
 			errsv = errno;
 			break;
 		}
-- 
1.8.5.rc3.2.gc302941


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-06-02 10:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-30  6:30 [PATCH] deadlock in script Csaba Kos
2014-05-30  9:15 ` Karel Zak
2014-05-30  9:52   ` Csaba Kos
2014-06-02  9:08     ` Karel Zak
2014-06-02  9:37       ` Ruediger Meier
2014-06-02  9:57         ` Karel Zak
2014-06-02 10:03           ` Ruediger Meier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox