All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kay Sievers <kay.sievers@vrfy.org>
To: linux-hotplug@vger.kernel.org
Subject: Re: Hanging udev process on nfs-mounted /dev
Date: Thu, 30 Sep 2004 02:11:35 +0000	[thread overview]
Message-ID: <20040930021135.GA23623@vrfy.org> (raw)
In-Reply-To: <415980BF.1020401@bio.ifi.lmu.de>

[-- Attachment #1: Type: text/plain, Size: 1972 bytes --]

On Thu, Sep 30, 2004 at 01:39:46AM +0200, Kay Sievers wrote:
> On Wed, 2004-09-29 at 10:18 -0700, Greg KH wrote:
> > On Tue, Sep 28, 2004 at 05:18:23PM +0200, Frank Steiner wrote:
> > > Hi,
> > > 
> > > I also sent this to the NFS list, because I'm not sure if this is an
> > > NFS or an udev problem. I hope it's ok to ask here!
> > > 
> > > 
> > > The issue:
> > > ==========
> > > From time to time some udev process goes mad and comsumes allmost all
> > > the CPU power, making the whole system terribly slow.
> > 
> > This isn't a NFS specific bug.  I've had a number of reports of this in
> > the past.  It traces itself back to a tdb "issue" that the internal
> > database links are getting messed up and looping on themselves wrongly.
> 
> Seems we have two different problems here, one that sounds like a loop
> consuming all the CPU and onother one, like the trace, which looks like
> a F_SETLKW deadlock.
> The traces are indicating a deadlock, where processes are simply waiting
> for each other for a write-lock on the udev.tdb to be released.

Here is a patch that implements a timeout for the dead udev process. After
20 seconds the lock system call is interrupted and the error debug from tdb
is logged to the syslog. I needed to port the sleep() calls, cause they
are not compatible with alarm().

As I can't reproduce this on my box, I locked the complete database with a
simple test program. A deadlock in the db-open call now looks like this:

  udev: main: looking at '/block/hda'
  udev: error: timout reached, node probably not created, please report to <linux-hotplug-devel@lists.sourceforge.net>
  udev: tdb_brlock failed (fd=4) at offset 0 rw_type=1 lck_type=7
  udev: tdb_open_ex: failed to get global lock on /dev/.udev.tdb: Interrupted system call
  udev: udevdb_init: unable to initialize database at '/dev/.udev.tdb'
  udev: main: unable to initialize database

Maybe this will help to bring some light into the tdb failure.

Good luck,
Kay

[-- Attachment #2: udev-deadlock-debug-01.patch --]
[-- Type: text/plain, Size: 5874 bytes --]

===== namedev.c 1.146 vs edited =====
--- 1.146/namedev.c	2004-09-08 15:17:55 +02:00
+++ edited/namedev.c	2004-09-30 04:03:42 +02:00
@@ -29,7 +29,6 @@
 #include <ctype.h>
 #include <unistd.h>
 #include <errno.h>
-#include <time.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/sysinfo.h>
@@ -353,7 +352,6 @@ static struct bus_file {
 	{}
 };
 
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device)
 {
 	/* sleep until we see the file for this specific bus type show up this
@@ -367,14 +365,14 @@ static void wait_for_device_to_initializ
 	struct bus_file *b = &bus_files[0];
 	struct sysfs_attribute *tmpattr;
 	int found = 0;
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 
 	while (1) {
 		if (b->bus == NULL) {
 			if (!found)
 				break;
-			/* sleep to give the kernel a chance to create the file */
-			sleep(1);
+			/* give the kernel a chance to create the file */
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			--loop;
 			if (loop == 0)
 				break;
@@ -682,7 +680,6 @@ static struct sysfs_device *get_sysfs_de
 {
 	struct sysfs_device *sysfs_device;
 	struct sysfs_class_device *class_dev_parent;
-	struct timespec tspec;
 	int loop;
 
 	/* Figure out where the device symlink is at.  For char devices this will
@@ -698,16 +695,14 @@ static struct sysfs_device *get_sysfs_de
 	if (class_dev_parent != NULL) 
 		dbg("given class device has a parent, use this instead");
 
-	tspec.tv_sec = 0;
-	tspec.tv_nsec = 10000000;  /* sleep 10 millisec */
-	loop = 10;
+	loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	while (loop--) {
 		if (udev_sleep) {
 			if (whitelist_search(class_dev)) {
 				sysfs_device = NULL;
 				goto exit;
 			}
-			nanosleep(&tspec, NULL);
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 		}
 
 		if (class_dev_parent)
@@ -729,11 +724,9 @@ device_found:
 		if (sysfs_device->bus[0] != '\0')
 			goto bus_found;
 
-		loop = 10;
-		tspec.tv_nsec = 10000000;
 		while (loop--) {
 			if (udev_sleep)
-				nanosleep(&tspec, NULL);
+				usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			sysfs_get_device_bus(sysfs_device);
 			
 			if (sysfs_device->bus[0] != '\0')
===== udev-add.c 1.73 vs edited =====
--- 1.73/udev-add.c	2004-08-05 00:41:08 +02:00
+++ edited/udev-add.c	2004-09-30 02:18:31 +02:00
@@ -340,11 +340,10 @@ exit:
 /* wait for the "dev" file to show up in the directory in sysfs.
  * If it doesn't happen in about 10 seconds, give up.
  */
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static int sleep_for_file(const char *path, char* file)
 {
 	char filename[SYSFS_PATH_MAX + 6];
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	int retval;
 
 	strfieldcpy(filename, sysfs_path);
@@ -360,7 +359,7 @@ static int sleep_for_file(const char *pa
 			goto exit;
 
 		/* sleep to give the kernel a chance to create the dev file */
-		sleep(1);
+		usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 	}
 	retval = -ENODEV;
 exit:
===== udev.c 1.62 vs edited =====
--- 1.62/udev.c	2004-09-14 02:25:32 +02:00
+++ edited/udev.c	2004-09-30 03:52:06 +02:00
@@ -36,6 +36,9 @@
 #include "namedev.h"
 #include "udevdb.h"
 
+/* timeout flag for udevdb */
+extern sig_atomic_t gotalarm;
+
 /* global variables */
 char **main_argv;
 char **main_envp;
@@ -58,6 +61,11 @@ void log_message(int level, const char *
 asmlinkage static void sig_handler(int signum)
 {
 	switch (signum) {
+		case SIGALRM:
+			gotalarm = 1;
+			info("error: timout reached, node probably not created, "
+			     "please report to <linux-hotplug-devel@lists.sourceforge.net> ");
+			break;
 		case SIGINT:
 		case SIGTERM:
 			udevdb_exit();
@@ -147,14 +155,21 @@ int main(int argc, char *argv[], char *e
 	/* set signal handlers */
 	act.sa_handler = sig_handler;
 	sigemptyset (&act.sa_mask);
+
+	/* alarm should interrupt */
+	sigaction(SIGALRM, &act, NULL);
+
 	act.sa_flags = SA_RESTART;
 	sigaction(SIGINT, &act, NULL);
 	sigaction(SIGTERM, &act, NULL);
 
+	/* trigger timout */
+	alarm(20);
+
 	/* initialize udev database */
 	if (udevdb_init(UDEVDB_DEFAULT) != 0) {
 		dbg("unable to initialize database");
-		goto exit;
+		exit(1);
 	}
 
 	switch(act_type) {
===== udev.h 1.62 vs edited =====
--- 1.62/udev.h	2004-09-14 14:29:10 +02:00
+++ edited/udev.h	2004-09-30 02:46:13 +02:00
@@ -26,6 +26,8 @@
 #include <sys/param.h>
 #include "libsysfs/sysfs/libsysfs.h"
 
+#define WAIT_FOR_FILE_SECONDS		10
+#define WAIT_FOR_FILE_RETRY_FREQ	10
 #define COMMENT_CHARACTER		'#'
 
 #define NAME_SIZE			256
===== udevdb.c 1.30 vs edited =====
--- 1.30/udevdb.c	2004-06-29 14:51:35 +02:00
+++ edited/udevdb.c	2004-09-30 03:01:59 +02:00
@@ -42,7 +42,19 @@
 #include "tdb/tdb.h"
 
 static TDB_CONTEXT *udevdb;
+sig_atomic_t gotalarm;
 
+static void tdb_log(TDB_CONTEXT *tdb, int level, const char *format, ...)
+{
+	va_list args;
+
+	if (!udev_log)
+		return;
+
+	va_start(args, format);
+	vsyslog(level, format, args);
+	va_end(args);
+}
 
 int udevdb_add_dev(const char *path, const struct udevice *dev)
 {
@@ -121,7 +133,9 @@ int udevdb_init(int init_flag)
 	if (init_flag != UDEVDB_DEFAULT && init_flag != UDEVDB_INTERNAL)
 		return -EINVAL;
 
-	udevdb = tdb_open(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644);
+	tdb_set_lock_alarm(&gotalarm);
+
+	udevdb = tdb_open_ex(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644, tdb_log);
 	if (udevdb == NULL) {
 		if (init_flag == UDEVDB_INTERNAL)
 			dbg("unable to initialize in-memory database");
@@ -137,7 +151,7 @@ int udevdb_init(int init_flag)
  */
 int udevdb_open_ro(void)
 {
-	udevdb = tdb_open(udev_db_filename, 0, 0, O_RDONLY, 0);
+	udevdb = tdb_open_ex(udev_db_filename, 0, 0, O_RDONLY, 0, tdb_log);
 	if (udevdb == NULL) {
 		dbg("unable to open database at '%s'", udev_db_filename);
 		return -EACCES;

  parent reply	other threads:[~2004-09-30  2:11 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-28 15:18 Hanging udev process on nfs-mounted /dev Frank Steiner
2004-09-29 17:18 ` Greg KH
2004-09-29 23:39 ` Kay Sievers
2004-09-30  2:11 ` Kay Sievers [this message]
2004-09-30  6:18 ` Frank Steiner
2004-09-30  6:21 ` Frank Steiner
2004-09-30 14:07 ` Kay Sievers
2004-10-01  6:25 ` Frank Steiner
2004-10-01  7:36 ` Kay Sievers
2004-10-01  7:38 ` Frank Steiner
2004-10-01  7:55 ` Frank Steiner
2004-10-01  8:08 ` Kay Sievers
2004-10-01  9:43 ` Frank Steiner
2004-10-01  9:57 ` Kay Sievers
2004-10-01 10:43 ` Kay Sievers
2004-10-01 22:18 ` Kay Sievers
2004-10-03 21:10 ` Frank Steiner
2004-10-03 23:07 ` Kay Sievers
2004-10-04  6:15 ` Frank Steiner
2004-10-04 14:19 ` Kay Sievers
2004-10-04 14:53 ` Frank Steiner
2004-10-05 15:37 ` Kay Sievers
2004-10-06  6:06 ` Frank Steiner
2004-10-06 12:00 ` Kay Sievers
2004-10-06 12:29 ` Frank Steiner
2004-10-08  5:59 ` Frank Steiner
  -- strict thread matches above, loose matches on Subject: below --
2004-09-28  8:48 Frank Steiner
2004-09-28  9:15 ` Frank Steiner
2004-09-28 10:11 ` Frank Steiner
2004-09-28 11:59 ` Bernd Schubert
2004-09-28 14:36   ` Frank Steiner
2004-09-28 15:56     ` Bernd Schubert
2004-09-29  6:00       ` Frank Steiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040930021135.GA23623@vrfy.org \
    --to=kay.sievers@vrfy.org \
    --cc=linux-hotplug@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.