public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Theodore Tso <tytso@mit.edu>
To: Soeren Sandmann <sandmann@daimi.au.dk>
Cc: linux-kernel@vger.kernel.org
Subject: Re: stat benchmark
Date: Fri, 25 Apr 2008 15:48:46 -0400	[thread overview]
Message-ID: <20080425194845.GE21193@mit.edu> (raw)
In-Reply-To: <ye8wsmnezq9.fsf@camel05.daimi.au.dk>

[-- Attachment #1: Type: text/plain, Size: 964 bytes --]

On Thu, Apr 24, 2008 at 10:59:10PM +0200, Soeren Sandmann wrote:
> 
> Under the theory that disk seeks are killing us, one idea is to add a
> 'multistat' system call that would allow statting of many files at a
> time, which would give the disk scheduler more to work with. 

Why don't you try this version of your stat-benchmark first?  If you
give it the -s option, it will sort the files by inode number first.
I think you will find this should make a significant difference.

If it works, something that would be really great if someone were to
make a generic library which could be used instead of readdir().  I
have something which works as an LD_PRELOAD, but apparently it's been
blowing up on 64-bit systems, and I haven't had time to debug it.
It's probably better to do it as a library which userspace
applications linked against, anyway.  Would you or someone you know be
interesed in maybe taking this idea and running with it?

Regards,

						- Ted


[-- Attachment #2: stat-benchmark.c --]
[-- Type: text/x-csrc, Size: 2821 bytes --]

#include <sys/time.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <getopt.h>

struct dirent_s {
	unsigned long long d_ino;
	long long d_off;
	unsigned short int d_reclen;
	unsigned char d_type;
	char *d_name;
};


static void
disaster (const char *what)
{
    fprintf (stderr, "%s failed: %s\n", what, strerror (errno));
    exit (1);
}

static void
dump_caches (void)
{
    int fd = open ("/proc/sys/vm/drop_caches", O_RDWR);

    if (fd < 0)
	disaster ("opening drop_caches");
    
    if (write (fd, "3", strlen ("3")) < 0)
	disaster ("writing drop_caches");

    if (close (fd) < 0)
	disaster ("closing drop_caches");
}

static int ino_cmp(const void *a, const void *b)
{
	const struct dirent_s *ds_a = (const struct dirent_s *) a;
	const struct dirent_s *ds_b = (const struct dirent_s *) b;
	unsigned int i_a, i_b;
	
	i_a = ds_a->d_ino;
	i_b = ds_b->d_ino;

	return (i_a - i_b);
}

static double
tv_to_sec (const struct timeval *tv)
{
    return tv->tv_sec + tv->tv_usec / 1000000.0;
}

static double
time_diff (const struct timeval *before,
	   const struct timeval *after)
{
    return tv_to_sec (after) - tv_to_sec (before);
}

static int
pot (int n)
{
    int p = 1;
    while (p <= n)
	p *= 2;
    return p;
}

int
main (int argc, char **argv)
{
    DIR *dir = opendir (".");
    struct dirent *ent;
    struct timeval before;
    struct timeval after;
    struct dirent_s *ds = NULL;
    int n_files;
    int do_sort = 0;
    int drop_caches = 1;
    int i, c;

    while ((c = getopt (argc, argv, "sc")) != EOF) {
	    switch (c) {
	    case 's':
		    do_sort++;
		    break;
	    case 'c':
		    drop_caches = 0;
		    break;
	    default:
		    printf("Usage: %s [-s]\n", argv[0]);
	    }
    }

    /* Dump caches */
    
    if (drop_caches)
	    dump_caches();

    sleep (1);

    /* Read directory */
    errno = 0;
    
    gettimeofday (&before, NULL);

    n_files = 0;
    while ((ent = readdir (dir)))
    {
	ds = realloc (ds, sizeof (struct dirent_s) * pot (n_files));

	if (!ds)
	    disaster ("realloc");

	ds[n_files].d_name = strdup (ent->d_name);
	ds[n_files].d_ino = ent->d_ino;
	n_files++;
    }
    
    if (errno != 0)
	disaster ("readdir");
	
    gettimeofday (&after, NULL);

    printf ("Time to readdir(): %f s\n", time_diff (&before, &after));

    if (do_sort)
	    qsort(ds, n_files, sizeof(struct dirent_s), ino_cmp);

    /* Stat all the files */

    gettimeofday (&before, NULL);

    for (i = 0; i < n_files; ++i)
    { 
	struct stat statbuf;

	stat (ds[i].d_name, &statbuf);
    }
    
    gettimeofday (&after, NULL);

    printf ("Time to stat %d files: %f s\n",
	    n_files, time_diff (&before, &after));

    return 0;
}

[-- Attachment #3: spd_readdir.c --]
[-- Type: text/x-csrc, Size: 6551 bytes --]

/*
 * readdir accelerator
 *
 * (C) Copyright 2003, 2004 by Theodore Ts'o.
 *
 * Compile using the command:
 *
 * gcc -o spd_readdir.so -shared spd_readdir.c -ldl
 *
 * Use it by setting the LD_PRELOAD environment variable:
 * 
 * export LD_PRELOAD=/usr/local/sbin/spd_readdir.so
 *
 * %Begin-Header%
 * This file may be redistributed under the terms of the GNU Public
 * License.
 * %End-Header%
 * 
 */

#define ALLOC_STEPSIZE	100
#define MAX_DIRSIZE	0

#define DEBUG

#ifdef DEBUG
#define DEBUG_DIR(x)	{if (do_debug) { x; }}
#else
#define DEBUG_DIR(x)
#endif

#define _GNU_SOURCE
#define __USE_LARGEFILE64

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <errno.h>
#include <dlfcn.h>

struct dirent_s {
	unsigned long long d_ino;
	long long d_off;
	unsigned short int d_reclen;
	unsigned char d_type;
	char *d_name;
};

struct dir_s {
	DIR	*dir;
	int	num;
	int	max;
	struct dirent_s *dp;
	int	pos;
	int	fd;
	struct dirent ret_dir;
	struct dirent64 ret_dir64;
};

static int (*real_closedir)(DIR *dir) = 0;
static DIR *(*real_opendir)(const char *name) = 0;
static struct dirent *(*real_readdir)(DIR *dir) = 0;
static struct dirent64 *(*real_readdir64)(DIR *dir) = 0;
static off_t (*real_telldir)(DIR *dir) = 0;
static void (*real_seekdir)(DIR *dir, off_t offset) = 0;
static int (*real_dirfd)(DIR *dir) = 0;
static unsigned long max_dirsize = MAX_DIRSIZE;
static num_open = 0;
#ifdef DEBUG
static int do_debug = 0;
#endif

static void setup_ptr()
{
	char *cp;

	real_opendir = dlsym(RTLD_NEXT, "opendir");
	real_closedir = dlsym(RTLD_NEXT, "closedir");
	real_readdir = dlsym(RTLD_NEXT, "readdir");
	real_readdir64 = dlsym(RTLD_NEXT, "readdir64");
	real_telldir = dlsym(RTLD_NEXT, "telldir");
	real_seekdir = dlsym(RTLD_NEXT, "seekdir");
	real_dirfd = dlsym(RTLD_NEXT, "dirfd");
	if ((cp = getenv("SPD_READDIR_MAX_SIZE")) != NULL) {
		max_dirsize = atol(cp);
	}
#ifdef DEBUG
	if (getenv("SPD_READDIR_DEBUG"))
		do_debug++;
#endif
}

static void free_cached_dir(struct dir_s *dirstruct)
{
	int i;

	if (!dirstruct->dp)
		return;

	for (i=0; i < dirstruct->num; i++) {
		free(dirstruct->dp[i].d_name);
	}
	free(dirstruct->dp);
	dirstruct->dp = 0;
}	

static int ino_cmp(const void *a, const void *b)
{
	const struct dirent_s *ds_a = (const struct dirent_s *) a;
	const struct dirent_s *ds_b = (const struct dirent_s *) b;
	ino_t i_a, i_b;
	
	i_a = ds_a->d_ino;
	i_b = ds_b->d_ino;

	if (ds_a->d_name[0] == '.') {
		if (ds_a->d_name[1] == 0)
			i_a = 0;
		else if ((ds_a->d_name[1] == '.') && (ds_a->d_name[2] == 0))
			i_a = 1;
	}
	if (ds_b->d_name[0] == '.') {
		if (ds_b->d_name[1] == 0)
			i_b = 0;
		else if ((ds_b->d_name[1] == '.') && (ds_b->d_name[2] == 0))
			i_b = 1;
	}

	return (i_a - i_b);
}


DIR *opendir(const char *name)
{
	DIR *dir;
	struct dir_s	*dirstruct;
	struct dirent_s *ds, *dnew;
	struct dirent64 *d;
	struct stat st;

	if (!real_opendir)
		setup_ptr();

	DEBUG_DIR(printf("Opendir(%s) (%d open)\n", name, num_open++));
	dir = (*real_opendir)(name);
	if (!dir)
		return NULL;

	dirstruct = malloc(sizeof(struct dir_s));
	if (!dirstruct) {
		(*real_closedir)(dir);
		errno = -ENOMEM;
		return NULL;
	}
	dirstruct->num = 0;
	dirstruct->max = 0;
	dirstruct->dp = 0;
	dirstruct->pos = 0;
	dirstruct->dir = 0;

	if (max_dirsize && (stat(name, &st) == 0) && 
	    (st.st_size > max_dirsize)) {
		DEBUG_DIR(printf("Directory size %ld, using direct readdir\n",
				 st.st_size));
		dirstruct->dir = dir;
		return (DIR *) dirstruct;
	}

	while ((d = (*real_readdir64)(dir)) != NULL) {
		if (dirstruct->num >= dirstruct->max) {
			dirstruct->max += ALLOC_STEPSIZE;
			DEBUG_DIR(printf("Reallocating to size %d\n", 
					 dirstruct->max));
			dnew = realloc(dirstruct->dp, 
				       dirstruct->max * sizeof(struct dir_s));
			if (!dnew)
				goto nomem;
			dirstruct->dp = dnew;
		}
		ds = &dirstruct->dp[dirstruct->num++];
		ds->d_ino = d->d_ino;
		ds->d_off = d->d_off;
		ds->d_reclen = d->d_reclen;
		ds->d_type = d->d_type;
		if ((ds->d_name = malloc(strlen(d->d_name)+1)) == NULL) {
			dirstruct->num--;
			goto nomem;
		}
		strcpy(ds->d_name, d->d_name);
		DEBUG_DIR(printf("readdir: %lu %s\n", 
				 (unsigned long) d->d_ino, d->d_name));
	}
	dirstruct->fd = dup((*real_dirfd)(dir));
	(*real_closedir)(dir);
	qsort(dirstruct->dp, dirstruct->num, sizeof(struct dirent_s), ino_cmp);
	return ((DIR *) dirstruct);
nomem:
	DEBUG_DIR(printf("No memory, backing off to direct readdir\n"));
	free_cached_dir(dirstruct);
	dirstruct->dir = dir;
	return ((DIR *) dirstruct);
}

int closedir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	DEBUG_DIR(printf("Closedir (%d open)\n", --num_open));
	if (dirstruct->dir)
		(*real_closedir)(dirstruct->dir);

	if (dirstruct->fd >= 0)
		close(dirstruct->fd);
	free_cached_dir(dirstruct);
	free(dirstruct);
	return 0;
}

struct dirent *readdir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;
	struct dirent_s *ds;

	if (dirstruct->dir)
		return (*real_readdir)(dirstruct->dir);

	if (dirstruct->pos >= dirstruct->num)
		return NULL;

	ds = &dirstruct->dp[dirstruct->pos++];
	dirstruct->ret_dir.d_ino = ds->d_ino;
	dirstruct->ret_dir.d_off = ds->d_off;
	dirstruct->ret_dir.d_reclen = ds->d_reclen;
	dirstruct->ret_dir.d_type = ds->d_type;
	strncpy(dirstruct->ret_dir.d_name, ds->d_name,
		sizeof(dirstruct->ret_dir.d_name));

	return (&dirstruct->ret_dir);
}

struct dirent64 *readdir64(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;
	struct dirent_s *ds;

	if (dirstruct->dir)
		return (*real_readdir64)(dirstruct->dir);

	if (dirstruct->pos >= dirstruct->num)
		return NULL;

	ds = &dirstruct->dp[dirstruct->pos++];
	dirstruct->ret_dir64.d_ino = ds->d_ino;
	dirstruct->ret_dir64.d_off = ds->d_off;
	dirstruct->ret_dir64.d_reclen = ds->d_reclen;
	dirstruct->ret_dir64.d_type = ds->d_type;
	strncpy(dirstruct->ret_dir64.d_name, ds->d_name,
		sizeof(dirstruct->ret_dir64.d_name));

	return (&dirstruct->ret_dir64);
}

off_t telldir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir)
		return (*real_telldir)(dirstruct->dir);

	return ((off_t) dirstruct->pos);
}

void seekdir(DIR *dir, off_t offset)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir) {
		(*real_seekdir)(dirstruct->dir, offset);
		return;
	}

	dirstruct->pos = offset;
}

int dirfd(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir)
		return (*real_dirfd)(dirstruct->dir);

	return (dirstruct->fd);
}

  parent reply	other threads:[~2008-04-27 17:13 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-24 20:59 stat benchmark Soeren Sandmann
2008-04-24 21:42 ` Carl Henrik Lunde
2008-04-24 21:44 ` Jan Engelhardt
2008-04-25  2:27   ` Justin Banks
2008-04-25  7:01   ` Christoph Hellwig
2008-04-25 19:48 ` Theodore Tso [this message]
2008-04-27 23:29   ` Soeren Sandmann
2008-04-28  0:13     ` Carl Henrik Lunde
2008-04-28 19:41       ` Alexander Larsson
2008-04-28  2:10     ` Theodore Tso
2008-04-27 22:40 ` Carl Henrik Lunde
2008-04-28 17:46   ` Zach Brown
2008-04-28  4:43 ` Ulrich Drepper
2008-04-28 11:53   ` Theodore Tso
2008-04-28 11:59     ` Avi Kivity
2008-04-28 13:31       ` Theodore Tso
2008-04-28 16:18     ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080425194845.GE21193@mit.edu \
    --to=tytso@mit.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sandmann@daimi.au.dk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox