From: Kay Sievers <kay.sievers@vrfy.org>
To: linux-hotplug@vger.kernel.org
Subject: Re: Hanging udev process on nfs-mounted /dev
Date: Thu, 30 Sep 2004 02:11:35 +0000 [thread overview]
Message-ID: <20040930021135.GA23623@vrfy.org> (raw)
In-Reply-To: <415980BF.1020401@bio.ifi.lmu.de>
[-- Attachment #1: Type: text/plain, Size: 1972 bytes --]
On Thu, Sep 30, 2004 at 01:39:46AM +0200, Kay Sievers wrote:
> On Wed, 2004-09-29 at 10:18 -0700, Greg KH wrote:
> > On Tue, Sep 28, 2004 at 05:18:23PM +0200, Frank Steiner wrote:
> > > Hi,
> > >
> > > I also sent this to the NFS list, because I'm not sure if this is an
> > > NFS or an udev problem. I hope it's ok to ask here!
> > >
> > >
> > > The issue:
> > > ==========
> > > From time to time some udev process goes mad and comsumes allmost all
> > > the CPU power, making the whole system terribly slow.
> >
> > This isn't a NFS specific bug. I've had a number of reports of this in
> > the past. It traces itself back to a tdb "issue" that the internal
> > database links are getting messed up and looping on themselves wrongly.
>
> Seems we have two different problems here, one that sounds like a loop
> consuming all the CPU and onother one, like the trace, which looks like
> a F_SETLKW deadlock.
> The traces are indicating a deadlock, where processes are simply waiting
> for each other for a write-lock on the udev.tdb to be released.
Here is a patch that implements a timeout for the dead udev process. After
20 seconds the lock system call is interrupted and the error debug from tdb
is logged to the syslog. I needed to port the sleep() calls, cause they
are not compatible with alarm().
As I can't reproduce this on my box, I locked the complete database with a
simple test program. A deadlock in the db-open call now looks like this:
udev: main: looking at '/block/hda'
udev: error: timout reached, node probably not created, please report to <linux-hotplug-devel@lists.sourceforge.net>
udev: tdb_brlock failed (fd=4) at offset 0 rw_type=1 lck_type=7
udev: tdb_open_ex: failed to get global lock on /dev/.udev.tdb: Interrupted system call
udev: udevdb_init: unable to initialize database at '/dev/.udev.tdb'
udev: main: unable to initialize database
Maybe this will help to bring some light into the tdb failure.
Good luck,
Kay
[-- Attachment #2: udev-deadlock-debug-01.patch --]
[-- Type: text/plain, Size: 5874 bytes --]
===== namedev.c 1.146 vs edited =====
--- 1.146/namedev.c 2004-09-08 15:17:55 +02:00
+++ edited/namedev.c 2004-09-30 04:03:42 +02:00
@@ -29,7 +29,6 @@
#include <ctype.h>
#include <unistd.h>
#include <errno.h>
-#include <time.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/sysinfo.h>
@@ -353,7 +352,6 @@ static struct bus_file {
{}
};
-#define SECONDS_TO_WAIT_FOR_FILE 10
static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device)
{
/* sleep until we see the file for this specific bus type show up this
@@ -367,14 +365,14 @@ static void wait_for_device_to_initializ
struct bus_file *b = &bus_files[0];
struct sysfs_attribute *tmpattr;
int found = 0;
- int loop = SECONDS_TO_WAIT_FOR_FILE;
+ int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
while (1) {
if (b->bus == NULL) {
if (!found)
break;
- /* sleep to give the kernel a chance to create the file */
- sleep(1);
+ /* give the kernel a chance to create the file */
+ usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
--loop;
if (loop == 0)
break;
@@ -682,7 +680,6 @@ static struct sysfs_device *get_sysfs_de
{
struct sysfs_device *sysfs_device;
struct sysfs_class_device *class_dev_parent;
- struct timespec tspec;
int loop;
/* Figure out where the device symlink is at. For char devices this will
@@ -698,16 +695,14 @@ static struct sysfs_device *get_sysfs_de
if (class_dev_parent != NULL)
dbg("given class device has a parent, use this instead");
- tspec.tv_sec = 0;
- tspec.tv_nsec = 10000000; /* sleep 10 millisec */
- loop = 10;
+ loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
while (loop--) {
if (udev_sleep) {
if (whitelist_search(class_dev)) {
sysfs_device = NULL;
goto exit;
}
- nanosleep(&tspec, NULL);
+ usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
}
if (class_dev_parent)
@@ -729,11 +724,9 @@ device_found:
if (sysfs_device->bus[0] != '\0')
goto bus_found;
- loop = 10;
- tspec.tv_nsec = 10000000;
while (loop--) {
if (udev_sleep)
- nanosleep(&tspec, NULL);
+ usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
sysfs_get_device_bus(sysfs_device);
if (sysfs_device->bus[0] != '\0')
===== udev-add.c 1.73 vs edited =====
--- 1.73/udev-add.c 2004-08-05 00:41:08 +02:00
+++ edited/udev-add.c 2004-09-30 02:18:31 +02:00
@@ -340,11 +340,10 @@ exit:
/* wait for the "dev" file to show up in the directory in sysfs.
* If it doesn't happen in about 10 seconds, give up.
*/
-#define SECONDS_TO_WAIT_FOR_FILE 10
static int sleep_for_file(const char *path, char* file)
{
char filename[SYSFS_PATH_MAX + 6];
- int loop = SECONDS_TO_WAIT_FOR_FILE;
+ int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ;
int retval;
strfieldcpy(filename, sysfs_path);
@@ -360,7 +359,7 @@ static int sleep_for_file(const char *pa
goto exit;
/* sleep to give the kernel a chance to create the dev file */
- sleep(1);
+ usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ);
}
retval = -ENODEV;
exit:
===== udev.c 1.62 vs edited =====
--- 1.62/udev.c 2004-09-14 02:25:32 +02:00
+++ edited/udev.c 2004-09-30 03:52:06 +02:00
@@ -36,6 +36,9 @@
#include "namedev.h"
#include "udevdb.h"
+/* timeout flag for udevdb */
+extern sig_atomic_t gotalarm;
+
/* global variables */
char **main_argv;
char **main_envp;
@@ -58,6 +61,11 @@ void log_message(int level, const char *
asmlinkage static void sig_handler(int signum)
{
switch (signum) {
+ case SIGALRM:
+ gotalarm = 1;
+ info("error: timout reached, node probably not created, "
+ "please report to <linux-hotplug-devel@lists.sourceforge.net> ");
+ break;
case SIGINT:
case SIGTERM:
udevdb_exit();
@@ -147,14 +155,21 @@ int main(int argc, char *argv[], char *e
/* set signal handlers */
act.sa_handler = sig_handler;
sigemptyset (&act.sa_mask);
+
+ /* alarm should interrupt */
+ sigaction(SIGALRM, &act, NULL);
+
act.sa_flags = SA_RESTART;
sigaction(SIGINT, &act, NULL);
sigaction(SIGTERM, &act, NULL);
+ /* trigger timout */
+ alarm(20);
+
/* initialize udev database */
if (udevdb_init(UDEVDB_DEFAULT) != 0) {
dbg("unable to initialize database");
- goto exit;
+ exit(1);
}
switch(act_type) {
===== udev.h 1.62 vs edited =====
--- 1.62/udev.h 2004-09-14 14:29:10 +02:00
+++ edited/udev.h 2004-09-30 02:46:13 +02:00
@@ -26,6 +26,8 @@
#include <sys/param.h>
#include "libsysfs/sysfs/libsysfs.h"
+#define WAIT_FOR_FILE_SECONDS 10
+#define WAIT_FOR_FILE_RETRY_FREQ 10
#define COMMENT_CHARACTER '#'
#define NAME_SIZE 256
===== udevdb.c 1.30 vs edited =====
--- 1.30/udevdb.c 2004-06-29 14:51:35 +02:00
+++ edited/udevdb.c 2004-09-30 03:01:59 +02:00
@@ -42,7 +42,19 @@
#include "tdb/tdb.h"
static TDB_CONTEXT *udevdb;
+sig_atomic_t gotalarm;
+static void tdb_log(TDB_CONTEXT *tdb, int level, const char *format, ...)
+{
+ va_list args;
+
+ if (!udev_log)
+ return;
+
+ va_start(args, format);
+ vsyslog(level, format, args);
+ va_end(args);
+}
int udevdb_add_dev(const char *path, const struct udevice *dev)
{
@@ -121,7 +133,9 @@ int udevdb_init(int init_flag)
if (init_flag != UDEVDB_DEFAULT && init_flag != UDEVDB_INTERNAL)
return -EINVAL;
- udevdb = tdb_open(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644);
+ tdb_set_lock_alarm(&gotalarm);
+
+ udevdb = tdb_open_ex(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644, tdb_log);
if (udevdb == NULL) {
if (init_flag == UDEVDB_INTERNAL)
dbg("unable to initialize in-memory database");
@@ -137,7 +151,7 @@ int udevdb_init(int init_flag)
*/
int udevdb_open_ro(void)
{
- udevdb = tdb_open(udev_db_filename, 0, 0, O_RDONLY, 0);
+ udevdb = tdb_open_ex(udev_db_filename, 0, 0, O_RDONLY, 0, tdb_log);
if (udevdb == NULL) {
dbg("unable to open database at '%s'", udev_db_filename);
return -EACCES;
next prev parent reply other threads:[~2004-09-30 2:11 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-09-28 15:18 Hanging udev process on nfs-mounted /dev Frank Steiner
2004-09-29 17:18 ` Greg KH
2004-09-29 23:39 ` Kay Sievers
2004-09-30 2:11 ` Kay Sievers [this message]
2004-09-30 6:18 ` Frank Steiner
2004-09-30 6:21 ` Frank Steiner
2004-09-30 14:07 ` Kay Sievers
2004-10-01 6:25 ` Frank Steiner
2004-10-01 7:36 ` Kay Sievers
2004-10-01 7:38 ` Frank Steiner
2004-10-01 7:55 ` Frank Steiner
2004-10-01 8:08 ` Kay Sievers
2004-10-01 9:43 ` Frank Steiner
2004-10-01 9:57 ` Kay Sievers
2004-10-01 10:43 ` Kay Sievers
2004-10-01 22:18 ` Kay Sievers
2004-10-03 21:10 ` Frank Steiner
2004-10-03 23:07 ` Kay Sievers
2004-10-04 6:15 ` Frank Steiner
2004-10-04 14:19 ` Kay Sievers
2004-10-04 14:53 ` Frank Steiner
2004-10-05 15:37 ` Kay Sievers
2004-10-06 6:06 ` Frank Steiner
2004-10-06 12:00 ` Kay Sievers
2004-10-06 12:29 ` Frank Steiner
2004-10-08 5:59 ` Frank Steiner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040930021135.GA23623@vrfy.org \
--to=kay.sievers@vrfy.org \
--cc=linux-hotplug@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).