* Inotify scalability issue
@ 2012-06-19 13:51 Emil Renner Berthing
0 siblings, 0 replies; only message in thread
From: Emil Renner Berthing @ 2012-06-19 13:51 UTC (permalink / raw)
To: John McCutchan, Robert Love, Eric Paris
Cc: linux-kernel, Jesper Dahl Nyerup, Anders Saaby
[-- Attachment #1: Type: text/plain, Size: 2217 bytes --]
Hi,
We're running Dovecot mailservers and are experiencing problems similar
to what is described here:
http://old.nabble.com/Very-High-Load-on-Dovecot-2-and-Errors-in-mail.err.-tt33856207.html#a33856207
I've written two small programs to expose the problem.
watcher.c:
This program reads a filename from the commandline, creates a new
inotify handle and sets it up to watch IN_CLOSE_WRITE and IN_DELETE on
the file. It then writes a 'z' to stdout, and does a blocking read from
inotify. After receiving an event from inotify the program prints an 'x'
to stdout, closes the inotify handle and then prints a '.' to stdout
before exiting.
test.c:
This program creates 20 files and spawns 20 watchers to watch each of
them. For each watcher it waits between 1 and 2 seconds before touching
the file they watch (which should cause it to wake up and exit), and
then spawns a new watcher on the file, again waiting between 1 and 2
seconds before touching the file again etc.
On my dualcore workstation running the test program behaves as you'd
expect. That is it prints
zzzzzzzzzzzzzzzzzzzzx.zx.zx.zx.zx.zx.zx.zx.zx.zx.zx.zx.zx (etc.)
However on a 16-core server it behaves very differently:
zzzzzzzzzzzzzzzzzzzzxzxzxzxz.xzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxzxz......................................................................................................xzxzxzxzxz.xxzzxzxzxzxzxzxzxzxzxzxzxz.................xzxz.xz
(sorry about the long line)
That is watchers are spawned to watch their files, they're woken up by
inotify as they should be, but then they pile up in D-state waiting for
the close call to finish. Only at irregular intervals do they all return.
They seem to be sleeping on the syncronize_srcu() call in
fsnotify_destroy_group() of fs/notify/group.c.
We've tested this on various machines running kernels from 3.0 and up,
and the trend very clear: The more processors the worse it gets.
However, I also tried it on one 48-core server running an old 2.6.32
debian kernels, and here the processes don't pile up.
/Emil
[-- Attachment #2: watcher.c --]
[-- Type: text/x-c, Size: 1157 bytes --]
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/inotify.h>
#include <limits.h>
#define log(...) fprintf(stderr, __VA_ARGS__)
static void
put(int c)
{
putchar(c);
fflush(stdout);
}
int
main(int argc, char *argv[])
{
int ifd = -1;
int wd;
char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
if (argc != 2) {
log("I need a file..\n");
goto error;
}
ifd = inotify_init();
if (ifd < 0) {
if (errno == EMFILE)
put('!');
else
log("Error initializing inotify: %s\n",
strerror(errno));
goto error;
}
wd = inotify_add_watch(ifd, argv[1], IN_CLOSE_WRITE | IN_DELETE);
if (wd < 0) {
log("Error adding watch on '%s': %s\n",
argv[1], strerror(errno));
goto error;
}
put('z');
if (read(ifd, buf, sizeof(buf)) < 0) {
log("Error reading inotify event: %s\n",
strerror(errno));
goto error;
}
put('x');
if (close(ifd)) {
log("Error closing inotify: %s\n",
strerror(errno));
ifd = -1;
goto error;
}
put('.');
return EXIT_SUCCESS;
error:
if (ifd > 0)
close(ifd);
return EXIT_FAILURE;
}
[-- Attachment #3: test.c --]
[-- Type: text/x-c, Size: 2346 bytes --]
#include <stdint.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include <signal.h>
#define log(...) fprintf(stderr, __VA_ARGS__)
#define WATCHER "watcher"
static struct proc {
uint64_t deadline;
char filename[32 - sizeof(uint64_t)];
} procs[NPROC];
static pid_t
spawn_watcher(char *filename)
{
char *argv[3] = { WATCHER, filename, NULL };
pid_t pid;
pid = fork();
if (pid == 0) {
if (execv(WATCHER, argv)) {
log("Error spawning '%s': %s\n",
WATCHER, strerror(errno));
exit(EXIT_FAILURE);
}
}
if (pid < 0)
log("Error forking: %s\n", strerror(errno));
return pid;
}
static int
touch(char *filename, int flags)
{
int fd = open(filename, flags, 0644);
if (fd < 0 || close(fd)) {
log("Error touching '%s': %s\n",
filename, strerror(errno));
return -1;
}
return 0;
}
static uint64_t
now(void)
{
struct timeval tv;
(void)gettimeofday(&tv, NULL);
return ((uint64_t)tv.tv_sec * 1000000) + (uint64_t)tv.tv_usec;
}
static uint64_t
random_delay(void)
{
return (uint64_t)DELAY_MIN + (random() / (RAND_MAX/(DELAY_MAX - DELAY_MIN)));
}
int
main()
{
unsigned int i;
struct proc *next;
if (signal(SIGCHLD, SIG_IGN)) {
log("Error setting SIGCHLD handler: %s\n",
strerror(errno));
return EXIT_FAILURE;
}
for (i = 0; i < NPROC; i++) {
(void)sprintf(procs[i].filename, "%03u.tmp", i);
if (touch(procs[i].filename, O_WRONLY | O_CREAT)) {
log("Error creating file '%s': %s\n",
procs[i].filename, strerror(errno));
return EXIT_FAILURE;
}
}
next = &procs[0];
for (i = 0; i < NPROC; i++) {
if (spawn_watcher(procs[i].filename) < 0)
goto error;
procs[i].deadline = now() + random_delay();
if (procs[i].deadline < next->deadline)
next = &procs[i];
}
while (1) {
uint64_t n = now();
if (next->deadline > n) {
usleep(next->deadline - n);
continue;
}
if (touch(next->filename, O_WRONLY))
goto error;
if (spawn_watcher(next->filename) < 0)
goto error;
next->deadline = now() + random_delay();
for (i = 0; i < NPROC; i++) {
if (procs[i].deadline < next->deadline)
next = &procs[i];
}
}
error:
for (i = 0; i < NPROC; i++)
(void)touch(procs[i].filename, O_WRONLY);
return EXIT_FAILURE;
}
[-- Attachment #4: Makefile --]
[-- Type: text/plain, Size: 362 bytes --]
NPROC = 20
DELAY_MIN = 1000000
DELAY_MAX = 2000000
CC = gcc
CFLAGS ?= -O2 -pipe -g
CFLAGS += -std=gnu99 -Wall -Wextra -pedantic
all: test watcher
test: test.c
$(CC) $(CFLAGS) -DNPROC=$(NPROC) -DDELAY_MIN=$(DELAY_MIN) -DDELAY_MAX=$(DELAY_MAX) $(LDFLAGS) $< -o $@
watcher: watcher.c
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
clean:
rm -rf test watcher *.tmp
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2012-06-19 14:00 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-19 13:51 Inotify scalability issue Emil Renner Berthing
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.