linux-hotplug.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kay Sievers <kay.sievers@vrfy.org>
To: linux-hotplug@vger.kernel.org
Subject: Re: Hanging udev process on nfs-mounted /dev
Date: Thu, 30 Sep 2004 02:11:35 +0000	[thread overview]
Message-ID: <20040930021135.GA23623@vrfy.org> (raw)
In-Reply-To: <415980BF.1020401@bio.ifi.lmu.de>

[-- Attachment #1: Type: text/plain, Size: 1972 bytes --]

On Thu, Sep 30, 2004 at 01:39:46AM +0200, Kay Sievers wrote:
> On Wed, 2004-09-29 at 10:18 -0700, Greg KH wrote:
> > On Tue, Sep 28, 2004 at 05:18:23PM +0200, Frank Steiner wrote:
> > > Hi,
> > > 
> > > I also sent this to the NFS list, because I'm not sure if this is an
> > > NFS or an udev problem. I hope it's ok to ask here!
> > > 
> > > 
> > > The issue:
> > > ==========
> > > From time to time some udev process goes mad and comsumes allmost all
> > > the CPU power, making the whole system terribly slow.
> > 
> > This isn't a NFS specific bug.  I've had a number of reports of this in
> > the past.  It traces itself back to a tdb "issue" that the internal
> > database links are getting messed up and looping on themselves wrongly.
> 
> Seems we have two different problems here, one that sounds like a loop
> consuming all the CPU and onother one, like the trace, which looks like
> a F_SETLKW deadlock.
> The traces are indicating a deadlock, where processes are simply waiting
> for each other for a write-lock on the udev.tdb to be released.

Here is a patch that implements a timeout for the dead udev process. After
20 seconds the lock system call is interrupted and the error debug from tdb
is logged to the syslog. I needed to port the sleep() calls, cause they
are not compatible with alarm().

As I can't reproduce this on my box, I locked the complete database with a
simple test program. A deadlock in the db-open call now looks like this:

  udev: main: looking at '/block/hda'
  udev: error: timout reached, node probably not created, please report to <linux-hotplug-devel@lists.sourceforge.net>
  udev: tdb_brlock failed (fd=4) at offset 0 rw_type=1 lck_type=7
  udev: tdb_open_ex: failed to get global lock on /dev/.udev.tdb: Interrupted system call
  udev: udevdb_init: unable to initialize database at '/dev/.udev.tdb'
  udev: main: unable to initialize database

Maybe this will help to bring some light into the tdb failure.

Good luck,
Kay

[-- Attachment #2: udev-deadlock-debug-01.patch --]
[-- Type: text/plain, Size: 5874 bytes --]

===== namedev.c 1.146 vs edited =====
--- 1.146/namedev.c	2004-09-08 15:17:55 +02:00
+++ edited/namedev.c	2004-09-30 04:03:42 +02:00
@@ -29,7 +29,6 @@
 #include <ctype.h>
 #include <unistd.h>
 #include <errno.h>
-#include <time.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/sysinfo.h>
@@ -353,7 +352,6 @@ static struct bus_file {
 	{}
 };
 
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device)
 {
 	/* sleep until we see the file for this specific bus type show up this
@@ -367,14 +365,14 @@ static void wait_for_device_to_initializ
 	struct bus_file *b = &bus_files[0];
 	struct sysfs_attribute *tmpattr;
 	int found = 0;
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 
 	while (1) {
 		if (b->bus == NULL) {
 			if (!found)
 				break;
-			/* sleep to give the kernel a chance to create the file */
-			sleep(1);
+			/* give the kernel a chance to create the file */
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			--loop;
 			if (loop == 0)
 				break;
@@ -682,7 +680,6 @@ static struct sysfs_device *get_sysfs_de
 {
 	struct sysfs_device *sysfs_device;
 	struct sysfs_class_device *class_dev_parent;
-	struct timespec tspec;
 	int loop;
 
 	/* Figure out where the device symlink is at.  For char devices this will
@@ -698,16 +695,14 @@ static struct sysfs_device *get_sysfs_de
 	if (class_dev_parent != NULL) 
 		dbg("given class device has a parent, use this instead");
 
-	tspec.tv_sec = 0;
-	tspec.tv_nsec = 10000000;  /* sleep 10 millisec */
-	loop = 10;
+	loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	while (loop--) {
 		if (udev_sleep) {
 			if (whitelist_search(class_dev)) {
 				sysfs_device = NULL;
 				goto exit;
 			}
-			nanosleep(&tspec, NULL);
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 		}
 
 		if (class_dev_parent)
@@ -729,11 +724,9 @@ device_found:
 		if (sysfs_device->bus[0] != '\0')
 			goto bus_found;
 
-		loop = 10;
-		tspec.tv_nsec = 10000000;
 		while (loop--) {
 			if (udev_sleep)
-				nanosleep(&tspec, NULL);
+				usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			sysfs_get_device_bus(sysfs_device);
 			
 			if (sysfs_device->bus[0] != '\0')
===== udev-add.c 1.73 vs edited =====
--- 1.73/udev-add.c	2004-08-05 00:41:08 +02:00
+++ edited/udev-add.c	2004-09-30 02:18:31 +02:00
@@ -340,11 +340,10 @@ exit:
 /* wait for the "dev" file to show up in the directory in sysfs.
  * If it doesn't happen in about 10 seconds, give up.
  */
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static int sleep_for_file(const char *path, char* file)
 {
 	char filename[SYSFS_PATH_MAX + 6];
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	int retval;
 
 	strfieldcpy(filename, sysfs_path);
@@ -360,7 +359,7 @@ static int sleep_for_file(const char *pa
 			goto exit;
 
 		/* sleep to give the kernel a chance to create the dev file */
-		sleep(1);
+		usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 	}
 	retval = -ENODEV;
 exit:
===== udev.c 1.62 vs edited =====
--- 1.62/udev.c	2004-09-14 02:25:32 +02:00
+++ edited/udev.c	2004-09-30 03:52:06 +02:00
@@ -36,6 +36,9 @@
 #include "namedev.h"
 #include "udevdb.h"
 
+/* timeout flag for udevdb */
+extern sig_atomic_t gotalarm;
+
 /* global variables */
 char **main_argv;
 char **main_envp;
@@ -58,6 +61,11 @@ void log_message(int level, const char *
 asmlinkage static void sig_handler(int signum)
 {
 	switch (signum) {
+		case SIGALRM:
+			gotalarm = 1;
+			info("error: timout reached, node probably not created, "
+			     "please report to <linux-hotplug-devel@lists.sourceforge.net> ");
+			break;
 		case SIGINT:
 		case SIGTERM:
 			udevdb_exit();
@@ -147,14 +155,21 @@ int main(int argc, char *argv[], char *e
 	/* set signal handlers */
 	act.sa_handler = sig_handler;
 	sigemptyset (&act.sa_mask);
+
+	/* alarm should interrupt */
+	sigaction(SIGALRM, &act, NULL);
+
 	act.sa_flags = SA_RESTART;
 	sigaction(SIGINT, &act, NULL);
 	sigaction(SIGTERM, &act, NULL);
 
+	/* trigger timout */
+	alarm(20);
+
 	/* initialize udev database */
 	if (udevdb_init(UDEVDB_DEFAULT) != 0) {
 		dbg("unable to initialize database");
-		goto exit;
+		exit(1);
 	}
 
 	switch(act_type) {
===== udev.h 1.62 vs edited =====
--- 1.62/udev.h	2004-09-14 14:29:10 +02:00
+++ edited/udev.h	2004-09-30 02:46:13 +02:00
@@ -26,6 +26,8 @@
 #include <sys/param.h>
 #include "libsysfs/sysfs/libsysfs.h"
 
+#define WAIT_FOR_FILE_SECONDS		10
+#define WAIT_FOR_FILE_RETRY_FREQ	10
 #define COMMENT_CHARACTER		'#'
 
 #define NAME_SIZE			256
===== udevdb.c 1.30 vs edited =====
--- 1.30/udevdb.c	2004-06-29 14:51:35 +02:00
+++ edited/udevdb.c	2004-09-30 03:01:59 +02:00
@@ -42,7 +42,19 @@
 #include "tdb/tdb.h"
 
 static TDB_CONTEXT *udevdb;
+sig_atomic_t gotalarm;
 
+static void tdb_log(TDB_CONTEXT *tdb, int level, const char *format, ...)
+{
+	va_list args;
+
+	if (!udev_log)
+		return;
+
+	va_start(args, format);
+	vsyslog(level, format, args);
+	va_end(args);
+}
 
 int udevdb_add_dev(const char *path, const struct udevice *dev)
 {
@@ -121,7 +133,9 @@ int udevdb_init(int init_flag)
 	if (init_flag != UDEVDB_DEFAULT && init_flag != UDEVDB_INTERNAL)
 		return -EINVAL;
 
-	udevdb = tdb_open(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644);
+	tdb_set_lock_alarm(&gotalarm);
+
+	udevdb = tdb_open_ex(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644, tdb_log);
 	if (udevdb == NULL) {
 		if (init_flag == UDEVDB_INTERNAL)
 			dbg("unable to initialize in-memory database");
@@ -137,7 +151,7 @@ int udevdb_init(int init_flag)
  */
 int udevdb_open_ro(void)
 {
-	udevdb = tdb_open(udev_db_filename, 0, 0, O_RDONLY, 0);
+	udevdb = tdb_open_ex(udev_db_filename, 0, 0, O_RDONLY, 0, tdb_log);
 	if (udevdb == NULL) {
 		dbg("unable to open database at '%s'", udev_db_filename);
 		return -EACCES;

  parent reply	other threads:[~2004-09-30  2:11 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-28 15:18 Hanging udev process on nfs-mounted /dev Frank Steiner
2004-09-29 17:18 ` Greg KH
2004-09-29 23:39 ` Kay Sievers
2004-09-30  2:11 ` Kay Sievers [this message]
2004-09-30  6:18 ` Frank Steiner
2004-09-30  6:21 ` Frank Steiner
2004-09-30 14:07 ` Kay Sievers
2004-10-01  6:25 ` Frank Steiner
2004-10-01  7:36 ` Kay Sievers
2004-10-01  7:38 ` Frank Steiner
2004-10-01  7:55 ` Frank Steiner
2004-10-01  8:08 ` Kay Sievers
2004-10-01  9:43 ` Frank Steiner
2004-10-01  9:57 ` Kay Sievers
2004-10-01 10:43 ` Kay Sievers
2004-10-01 22:18 ` Kay Sievers
2004-10-03 21:10 ` Frank Steiner
2004-10-03 23:07 ` Kay Sievers
2004-10-04  6:15 ` Frank Steiner
2004-10-04 14:19 ` Kay Sievers
2004-10-04 14:53 ` Frank Steiner
2004-10-05 15:37 ` Kay Sievers
2004-10-06  6:06 ` Frank Steiner
2004-10-06 12:00 ` Kay Sievers
2004-10-06 12:29 ` Frank Steiner
2004-10-08  5:59 ` Frank Steiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040930021135.GA23623@vrfy.org \
    --to=kay.sievers@vrfy.org \
    --cc=linux-hotplug@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).