linux-hotplug.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kay Sievers <kay.sievers@vrfy.org>
To: linux-hotplug@vger.kernel.org
Subject: Re: Hanging udev process on nfs-mounted /dev
Date: Fri, 01 Oct 2004 22:18:54 +0000	[thread overview]
Message-ID: <20041001221854.GA5589@vrfy.org> (raw)
In-Reply-To: <415980BF.1020401@bio.ifi.lmu.de>

[-- Attachment #1: Type: text/plain, Size: 1931 bytes --]

On Fri, Oct 01, 2004 at 12:43:47PM +0200, Kay Sievers wrote:
> On Fri, 2004-10-01 at 11:43 +0200, Frank Steiner wrote:
> 
> > A general question for understanding things better: Let's assume that
> > the errors indeed are caused by missing nfs locking on my /dev dir.
> > It sounds reasonable that udev must be able to rely on propper locking
> > for maintaining its database, so one should not expect it to work on
> > a fs without locking.
> > Would it be reasonable to issue a warning if udev detects it's running
> > on a fs without locking (if this is possible to detect)? Or, if in
> > case of missing locks the hangs cannot be prevented, udev could even
> > refuse to do any work. If one gets a message from udev "No locks available.
> > I will not create any devices until you give  me locks" this would
> > definitely help people doing stupid things like mounting /dev via NFS :-)
> 
> We should find the loop bug first. Then the alarm() should be sufficient
> to prevent a hanging udev. Yes, we may include a hint in the logged
> error.
> 
> We still can create nodes with udev, even with a corrupt database (I
> will change the alarm() patch to act like this later). Only the remove
> event will eventually fail, if a rule has set a custom name for the
> device.
> Other programs asking with udevinfo, may also not work correctly, but
> it's better to create the node without bookkeeping than to do nothing,
> I think.

Here is a new patch to try to recover from a corrupted udev database. udev
will now continue without database support in that case and log the failure
to syslog. So the node should be generated in any case, remove will obviously
not work for custom names. All tdb errors will be logged to syslog.

I've added two iteration limits to the tdb-code at the places I expect
the endless loop. In the case we try to find more than 100.000 db-entries
with the same hash, we better give up :)

Good luck,
Kay

[-- Attachment #2: udev-deadlock-debug-02.patch --]
[-- Type: text/plain, Size: 8377 bytes --]

===== namedev.c 1.146 vs edited =====
--- 1.146/namedev.c	2004-09-08 15:17:55 +02:00
+++ edited/namedev.c	2004-10-01 22:17:52 +02:00
@@ -29,7 +29,6 @@
 #include <ctype.h>
 #include <unistd.h>
 #include <errno.h>
-#include <time.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <sys/sysinfo.h>
@@ -353,7 +352,6 @@
 	{}
 };
 
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device)
 {
 	/* sleep until we see the file for this specific bus type show up this
@@ -367,14 +365,14 @@
 	struct bus_file *b = &bus_files[0];
 	struct sysfs_attribute *tmpattr;
 	int found = 0;
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 
 	while (1) {
 		if (b->bus == NULL) {
 			if (!found)
 				break;
-			/* sleep to give the kernel a chance to create the file */
-			sleep(1);
+			/* give the kernel a chance to create the file */
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			--loop;
 			if (loop == 0)
 				break;
@@ -394,7 +392,8 @@
 	}
 	if (!found)
 		dbg("did not find bus type '%s' on list of bus_id_files, "
-		    "contact greg@kroah.com", sysfs_device->bus);
+		    "please report to <linux-hotplug-devel@lists.sourceforge.net>",
+		    sysfs_device->bus);
 exit:
 	return; /* here to prevent compiler warning... */
 }
@@ -682,7 +681,6 @@
 {
 	struct sysfs_device *sysfs_device;
 	struct sysfs_class_device *class_dev_parent;
-	struct timespec tspec;
 	int loop;
 
 	/* Figure out where the device symlink is at.  For char devices this will
@@ -698,16 +696,14 @@
 	if (class_dev_parent != NULL) 
 		dbg("given class device has a parent, use this instead");
 
-	tspec.tv_sec = 0;
-	tspec.tv_nsec = 10000000;  /* sleep 10 millisec */
-	loop = 10;
+	loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	while (loop--) {
 		if (udev_sleep) {
 			if (whitelist_search(class_dev)) {
 				sysfs_device = NULL;
 				goto exit;
 			}
-			nanosleep(&tspec, NULL);
+			usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 		}
 
 		if (class_dev_parent)
@@ -729,11 +725,9 @@
 		if (sysfs_device->bus[0] != '\0')
 			goto bus_found;
 
-		loop = 10;
-		tspec.tv_nsec = 10000000;
 		while (loop--) {
 			if (udev_sleep)
-				nanosleep(&tspec, NULL);
+				usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 			sysfs_get_device_bus(sysfs_device);
 			
 			if (sysfs_device->bus[0] != '\0')
===== udev-add.c 1.73 vs edited =====
--- 1.73/udev-add.c	2004-08-05 00:41:08 +02:00
+++ edited/udev-add.c	2004-09-30 02:18:31 +02:00
@@ -340,11 +340,10 @@
 /* wait for the "dev" file to show up in the directory in sysfs.
  * If it doesn't happen in about 10 seconds, give up.
  */
-#define SECONDS_TO_WAIT_FOR_FILE	10
 static int sleep_for_file(const char *path, char* file)
 {
 	char filename[SYSFS_PATH_MAX + 6];
-	int loop = SECONDS_TO_WAIT_FOR_FILE;
+	int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
 	int retval;
 
 	strfieldcpy(filename, sysfs_path);
@@ -360,7 +359,7 @@
 			goto exit;
 
 		/* sleep to give the kernel a chance to create the dev file */
-		sleep(1);
+		usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
 	}
 	retval = -ENODEV;
 exit:
===== udev.c 1.62 vs edited =====
--- 1.62/udev.c	2004-09-14 02:25:32 +02:00
+++ edited/udev.c	2004-10-01 23:50:59 +02:00
@@ -36,6 +36,9 @@
 #include "namedev.h"
 #include "udevdb.h"
 
+/* timeout flag for udevdb */
+extern sig_atomic_t gotalarm;
+
 /* global variables */
 char **main_argv;
 char **main_envp;
@@ -58,6 +61,11 @@
 asmlinkage static void sig_handler(int signum)
 {
 	switch (signum) {
+		case SIGALRM:
+			gotalarm = 1;
+			info("error: timeout reached, event probably not handled correctly, "
+			     "please report to <linux-hotplug-devel@lists.sourceforge.net> ");
+			break;
 		case SIGINT:
 		case SIGTERM:
 			udevdb_exit();
@@ -94,7 +102,8 @@
 
 	dbg("version %s", UDEV_VERSION);
 
-	/* initialize our configuration */
+	init_logging("udev");
+
 	udev_init_config();
 
 	if (strstr(argv[0], "udevstart")) {
@@ -147,15 +156,20 @@
 	/* set signal handlers */
 	act.sa_handler = sig_handler;
 	sigemptyset (&act.sa_mask);
+
+	/* alarm should interrupt */
+	sigaction(SIGALRM, &act, NULL);
+
 	act.sa_flags = SA_RESTART;
 	sigaction(SIGINT, &act, NULL);
 	sigaction(SIGTERM, &act, NULL);
 
+	/* trigger timout to interrupt blocking syscalls */
+	alarm(ALARM_TIMEOUT);
+
 	/* initialize udev database */
-	if (udevdb_init(UDEVDB_DEFAULT) != 0) {
-		dbg("unable to initialize database");
-		goto exit;
-	}
+	if (udevdb_init(UDEVDB_DEFAULT) != 0)
+		info("error: unable to initialize database, continuing without database");
 
 	switch(act_type) {
 	case UDEVSTART:
===== udev.h 1.62 vs edited =====
--- 1.62/udev.h	2004-09-14 14:29:10 +02:00
+++ edited/udev.h	2004-10-01 22:20:37 +02:00
@@ -26,6 +26,9 @@
 #include <sys/param.h>
 #include "libsysfs/sysfs/libsysfs.h"
 
+#define ALARM_TIMEOUT			20
+#define WAIT_FOR_FILE_SECONDS		10
+#define WAIT_FOR_FILE_RETRY_FREQ	10
 #define COMMENT_CHARACTER		'#'
 
 #define NAME_SIZE			256
===== udevdb.c 1.30 vs edited =====
--- 1.30/udevdb.c	2004-06-29 14:51:35 +02:00
+++ edited/udevdb.c	2004-10-01 23:46:47 +02:00
@@ -42,13 +42,28 @@
 #include "tdb/tdb.h"
 
 static TDB_CONTEXT *udevdb;
+sig_atomic_t gotalarm;
 
+static void tdb_log(TDB_CONTEXT *tdb, int level, const char *format, ...)
+{
+	va_list args;
+
+	if (!udev_log)
+		return;
+
+	va_start(args, format);
+	vsyslog(level, format, args);
+	va_end(args);
+}
 
 int udevdb_add_dev(const char *path, const struct udevice *dev)
 {
 	TDB_DATA key, data;
 	char keystr[SYSFS_PATH_MAX];
 
+	if (udevdb == NULL)
+		return -1;
+
 	if ((path == NULL) || (dev == NULL))
 		return -ENODEV;
 
@@ -68,6 +83,9 @@
 {
 	TDB_DATA key, data;
 
+	if (udevdb == NULL)
+		return -1;
+
 	if (path == NULL)
 		return -ENODEV;
 
@@ -88,6 +106,9 @@
 	TDB_DATA key;
 	char keystr[SYSFS_PATH_MAX];
 
+	if (udevdb == NULL)
+		return -1;
+
 	if (path == NULL)
 		return -EINVAL;
 
@@ -121,7 +142,9 @@
 	if (init_flag != UDEVDB_DEFAULT && init_flag != UDEVDB_INTERNAL)
 		return -EINVAL;
 
-	udevdb = tdb_open(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644);
+	tdb_set_lock_alarm(&gotalarm);
+
+	udevdb = tdb_open_ex(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644, tdb_log);
 	if (udevdb == NULL) {
 		if (init_flag == UDEVDB_INTERNAL)
 			dbg("unable to initialize in-memory database");
@@ -137,7 +160,7 @@
  */
 int udevdb_open_ro(void)
 {
-	udevdb = tdb_open(udev_db_filename, 0, 0, O_RDONLY, 0);
+	udevdb = tdb_open_ex(udev_db_filename, 0, 0, O_RDONLY, 0, tdb_log);
 	if (udevdb == NULL) {
 		dbg("unable to open database at '%s'", udev_db_filename);
 		return -EACCES;
@@ -159,6 +182,9 @@
 int udevdb_call_foreach(int (*user_record_handler) (char *path, struct udevice *dev))
 {
 	int retval = 0;
+
+	if (udevdb == NULL)
+		return -1;
 
 	if (user_record_handler == NULL) {
 		dbg("invalid user record handling function");
===== tdb/tdb.c 1.3 vs edited =====
--- 1.3/tdb/tdb.c	2003-12-17 01:23:27 +01:00
+++ edited/tdb/tdb.c	2004-10-01 23:53:17 +02:00
@@ -980,12 +980,14 @@
 			struct list_struct *r)
 {
 	tdb_off rec_ptr;
-	
+	int maxloop;
+
 	/* read in the hash top */
 	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 		return 0;
 
 	/* keep looking until we find the right record */
+	maxloop = 100000;
 	while (rec_ptr) {
 		if (rec_read(tdb, rec_ptr, r) == -1)
 			return 0;
@@ -1005,6 +1007,12 @@
 			SAFE_FREE(k);
 		}
 		rec_ptr = r->next;
+
+		maxloop--;
+		if (maxloop == 0) {
+			TDB_LOG((tdb, 0, "tdb_find maxloop reached; corrupt database!\n"));
+			return TDB_ERRCODE(TDB_ERR_CORRUPT, 0);
+		}
 	}
 	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 }
@@ -1187,6 +1195,7 @@
 {
 	tdb_off last_ptr, i;
 	struct list_struct lastrec;
+	int maxloop;
 
 	if (tdb->read_only) return -1;
 
@@ -1201,9 +1210,18 @@
 	/* find previous record in hash chain */
 	if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 		return -1;
-	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
+
+	maxloop = 100000;
+	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next) {
 		if (rec_read(tdb, i, &lastrec) == -1)
 			return -1;
+
+		maxloop--;
+		if (maxloop == 0) {
+			TDB_LOG((tdb, 0, "(tdb)do_delete: maxloop reached; corrupt database!\n"));
+			return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
+		}
+	}
 
 	/* unlink it: next ptr is at start of record. */
 	if (last_ptr == 0)

  parent reply	other threads:[~2004-10-01 22:18 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-28 15:18 Hanging udev process on nfs-mounted /dev Frank Steiner
2004-09-29 17:18 ` Greg KH
2004-09-29 23:39 ` Kay Sievers
2004-09-30  2:11 ` Kay Sievers
2004-09-30  6:18 ` Frank Steiner
2004-09-30  6:21 ` Frank Steiner
2004-09-30 14:07 ` Kay Sievers
2004-10-01  6:25 ` Frank Steiner
2004-10-01  7:36 ` Kay Sievers
2004-10-01  7:38 ` Frank Steiner
2004-10-01  7:55 ` Frank Steiner
2004-10-01  8:08 ` Kay Sievers
2004-10-01  9:43 ` Frank Steiner
2004-10-01  9:57 ` Kay Sievers
2004-10-01 10:43 ` Kay Sievers
2004-10-01 22:18 ` Kay Sievers [this message]
2004-10-03 21:10 ` Frank Steiner
2004-10-03 23:07 ` Kay Sievers
2004-10-04  6:15 ` Frank Steiner
2004-10-04 14:19 ` Kay Sievers
2004-10-04 14:53 ` Frank Steiner
2004-10-05 15:37 ` Kay Sievers
2004-10-06  6:06 ` Frank Steiner
2004-10-06 12:00 ` Kay Sievers
2004-10-06 12:29 ` Frank Steiner
2004-10-08  5:59 ` Frank Steiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20041001221854.GA5589@vrfy.org \
    --to=kay.sievers@vrfy.org \
    --cc=linux-hotplug@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).