public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "Theodore Ts'o" <tytso@mit.edu>
To: "H. Peter Anvin" <hpa@zytor.com>,
	Florian Weimer <fw@DENEB.ENYO.DE>, Andrew Morton <akpm@osdl.org>,
	Linus Torvalds <torvalds@osdl.org>,
	magnus.damm@gmail.com, mason@suse.com, mike.taht@timesys.com,
	mpm@selenic.com, linux-kernel@vger.kernel.org,
	git@vger.kernel.org
Subject: Re: Mercurial 0.3 vs git benchmarks
Date: Wed, 27 Apr 2005 15:57:53 -0400	[thread overview]
Message-ID: <20050427195753.GB7793@thunk.org> (raw)
In-Reply-To: <20050427190144.GA28848@cip.informatik.uni-erlangen.de>

On Wed, Apr 27, 2005 at 09:01:44PM +0200, Thomas Glanzmann wrote:
> Hello,
> 
> > Directory hashing slows down operations that do linear sweeps through 
> > the filesystem reading every single file, simply because without 
> > dir_index, there is likely to be a correlation between inode order and 
> > directory order, whereas with dir_index, readdir() returns entries in 
> > hash order.
> 
> thank you for the awareness training. Than mutt should be slower, too.
> Maybe I should repeat that tests.

If you are using the mutt in Debian unstable, it has the patch applied
which qsorts based on inode number returned from readdir(), which is
why you may not have been seeing the problem.

Or you can LD_PRELOAD the attached quick hack....

						- Ted

/*
 * readdir accelerator
 *
 * (C) Copyright 2003, 2004 by Theodore Ts'o.
 *
 * Compile using the command:
 *
 * gcc -o spd_readdir.so -shared spd_readdir.c -ldl
 *
 * %Begin-Header%
 * This file may be redistributed under the terms of the GNU Public
 * License.
 * %End-Header%
 * 
 */

#define ALLOC_STEPSIZE	100
#define MAX_DIRSIZE	0

#define DEBUG

#ifdef DEBUG
#define DEBUG_DIR(x)	{if (do_debug) { x; }}
#else
#define DEBUG_DIR(x)
#endif

#define _GNU_SOURCE
#define __USE_LARGEFILE64

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <errno.h>
#include <dlfcn.h>

struct dirent_s {
	unsigned long long d_ino;
	long long d_off;
	unsigned short int d_reclen;
	unsigned char d_type;
	char *d_name;
};

struct dir_s {
	DIR	*dir;
	int	num;
	int	max;
	struct dirent_s *dp;
	int	pos;
	int	fd;
	struct dirent ret_dir;
	struct dirent64 ret_dir64;
};

static int (*real_closedir)(DIR *dir) = 0;
static DIR *(*real_opendir)(const char *name) = 0;
static struct dirent *(*real_readdir)(DIR *dir) = 0;
static struct dirent64 *(*real_readdir64)(DIR *dir) = 0;
static off_t (*real_telldir)(DIR *dir) = 0;
static void (*real_seekdir)(DIR *dir, off_t offset) = 0;
static int (*real_dirfd)(DIR *dir) = 0;
static unsigned long max_dirsize = MAX_DIRSIZE;
static num_open = 0;
#ifdef DEBUG
static int do_debug = 0;
#endif

static void setup_ptr()
{
	char *cp;

	real_opendir = dlsym(RTLD_NEXT, "opendir");
	real_closedir = dlsym(RTLD_NEXT, "closedir");
	real_readdir = dlsym(RTLD_NEXT, "readdir");
	real_readdir64 = dlsym(RTLD_NEXT, "readdir64");
	real_telldir = dlsym(RTLD_NEXT, "telldir");
	real_seekdir = dlsym(RTLD_NEXT, "seekdir");
	real_dirfd = dlsym(RTLD_NEXT, "dirfd");
	if ((cp = getenv("SPD_READDIR_MAX_SIZE")) != NULL) {
		max_dirsize = atol(cp);
	}
#ifdef DEBUG
	if (getenv("SPD_READDIR_DEBUG"))
		do_debug++;
#endif
}

static void free_cached_dir(struct dir_s *dirstruct)
{
	int i;

	if (!dirstruct->dp)
		return;

	for (i=0; i < dirstruct->num; i++) {
		free(dirstruct->dp[i].d_name);
	}
	free(dirstruct->dp);
	dirstruct->dp = 0;
}	

static int ino_cmp(const void *a, const void *b)
{
	const struct dirent_s *ds_a = (const struct dirent_s *) a;
	const struct dirent_s *ds_b = (const struct dirent_s *) b;
	ino_t i_a, i_b;
	
	i_a = ds_a->d_ino;
	i_b = ds_b->d_ino;

	if (ds_a->d_name[0] == '.') {
		if (ds_a->d_name[1] == 0)
			i_a = 0;
		else if ((ds_a->d_name[1] == '.') && (ds_a->d_name[2] == 0))
			i_a = 1;
	}
	if (ds_b->d_name[0] == '.') {
		if (ds_b->d_name[1] == 0)
			i_b = 0;
		else if ((ds_b->d_name[1] == '.') && (ds_b->d_name[2] == 0))
			i_b = 1;
	}

	return (i_a - i_b);
}


DIR *opendir(const char *name)
{
	DIR *dir;
	struct dir_s	*dirstruct;
	struct dirent_s *ds, *dnew;
	struct dirent64 *d;
	struct stat st;

	if (!real_opendir)
		setup_ptr();

	DEBUG_DIR(printf("Opendir(%s) (%d open)\n", name, num_open++));
	dir = (*real_opendir)(name);
	if (!dir)
		return NULL;

	dirstruct = malloc(sizeof(struct dir_s));
	if (!dirstruct) {
		(*real_closedir)(dir);
		errno = -ENOMEM;
		return NULL;
	}
	dirstruct->num = 0;
	dirstruct->max = 0;
	dirstruct->dp = 0;
	dirstruct->pos = 0;
	dirstruct->dir = 0;

	if (max_dirsize && (stat(name, &st) == 0) && 
	    (st.st_size > max_dirsize)) {
		DEBUG_DIR(printf("Directory size %ld, using direct readdir\n",
				 st.st_size));
		dirstruct->dir = dir;
		return (DIR *) dirstruct;
	}

	while ((d = (*real_readdir64)(dir)) != NULL) {
		if (dirstruct->num >= dirstruct->max) {
			dirstruct->max += ALLOC_STEPSIZE;
			DEBUG_DIR(printf("Reallocating to size %d\n", 
					 dirstruct->max));
			dnew = realloc(dirstruct->dp, 
				       dirstruct->max * sizeof(struct dir_s));
			if (!dnew)
				goto nomem;
			dirstruct->dp = dnew;
		}
		ds = &dirstruct->dp[dirstruct->num++];
		ds->d_ino = d->d_ino;
		ds->d_off = d->d_off;
		ds->d_reclen = d->d_reclen;
		ds->d_type = d->d_type;
		if ((ds->d_name = malloc(strlen(d->d_name)+1)) == NULL) {
			dirstruct->num--;
			goto nomem;
		}
		strcpy(ds->d_name, d->d_name);
		DEBUG_DIR(printf("readdir: %lu %s\n", 
				 (unsigned long) d->d_ino, d->d_name));
	}
	dirstruct->fd = dup((*real_dirfd)(dir));
	(*real_closedir)(dir);
	qsort(dirstruct->dp, dirstruct->num, sizeof(struct dirent_s), ino_cmp);
	return ((DIR *) dirstruct);
nomem:
	DEBUG_DIR(printf("No memory, backing off to direct readdir\n"));
	free_cached_dir(dirstruct);
	dirstruct->dir = dir;
	return ((DIR *) dirstruct);
}

int closedir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	DEBUG_DIR(printf("Closedir (%d open)\n", --num_open));
	if (dirstruct->dir)
		(*real_closedir)(dirstruct->dir);

	if (dirstruct->fd >= 0)
		close(dirstruct->fd);
	free_cached_dir(dirstruct);
	free(dirstruct);
	return 0;
}

struct dirent *readdir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;
	struct dirent_s *ds;

	if (dirstruct->dir)
		return (*real_readdir)(dirstruct->dir);

	if (dirstruct->pos >= dirstruct->num)
		return NULL;

	ds = &dirstruct->dp[dirstruct->pos++];
	dirstruct->ret_dir.d_ino = ds->d_ino;
	dirstruct->ret_dir.d_off = ds->d_off;
	dirstruct->ret_dir.d_reclen = ds->d_reclen;
	dirstruct->ret_dir.d_type = ds->d_type;
	strncpy(dirstruct->ret_dir.d_name, ds->d_name,
		sizeof(dirstruct->ret_dir.d_name));

	return (&dirstruct->ret_dir);
}

struct dirent64 *readdir64(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;
	struct dirent_s *ds;

	if (dirstruct->dir)
		return (*real_readdir64)(dirstruct->dir);

	if (dirstruct->pos >= dirstruct->num)
		return NULL;

	ds = &dirstruct->dp[dirstruct->pos++];
	dirstruct->ret_dir64.d_ino = ds->d_ino;
	dirstruct->ret_dir64.d_off = ds->d_off;
	dirstruct->ret_dir64.d_reclen = ds->d_reclen;
	dirstruct->ret_dir64.d_type = ds->d_type;
	strncpy(dirstruct->ret_dir64.d_name, ds->d_name,
		sizeof(dirstruct->ret_dir64.d_name));

	return (&dirstruct->ret_dir64);
}

off_t telldir(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir)
		return (*real_telldir)(dirstruct->dir);

	return ((off_t) dirstruct->pos);
}

void seekdir(DIR *dir, off_t offset)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir) {
		(*real_seekdir)(dirstruct->dir, offset);
		return;
	}

	dirstruct->pos = offset;
}

int dirfd(DIR *dir)
{
	struct dir_s	*dirstruct = (struct dir_s *) dir;

	if (dirstruct->dir)
		return (*real_dirfd)(dirstruct->dir);

	return (dirstruct->fd);
}

  reply	other threads:[~2005-04-27 19:59 UTC|newest]

Thread overview: 101+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-26  0:41 Mercurial 0.3 vs git benchmarks Matt Mackall
2005-04-26  1:49 ` Daniel Phillips
2005-04-26  2:08 ` Linus Torvalds
2005-04-26  2:30   ` Mike Taht
2005-04-26  3:04     ` Linus Torvalds
2005-04-26  4:00       ` Linus Torvalds
2005-04-26 11:13         ` Chris Mason
2005-04-26 15:09           ` Magnus Damm
2005-04-26 15:38             ` Chris Mason
2005-04-26 16:23               ` Magnus Damm
2005-04-26 18:18                 ` Chris Mason
2005-04-26 20:56                 ` Andrew Morton
2005-04-26 21:07                   ` Linus Torvalds
2005-04-26 22:50                     ` H. Peter Anvin
2005-04-26 22:56                     ` Andrew Morton
2005-04-26 23:43                       ` H. Peter Anvin
2005-04-27 15:01                         ` Florian Weimer
2005-04-27 15:13                           ` Thomas Glanzmann
2005-04-27 18:54                             ` H. Peter Anvin
2005-04-27 19:01                               ` Thomas Glanzmann
2005-04-27 19:57                                 ` Theodore Ts'o [this message]
2005-04-27 20:06                                   ` Thomas Glanzmann
2005-04-27 20:35                                 ` H. Peter Anvin
2005-04-27 20:39                                   ` Thomas Glanzmann
2005-04-27 20:47                                   ` Florian Weimer
2005-04-27 20:55                                 ` Florian Weimer
2005-04-27 21:04                                   ` H. Peter Anvin
2005-04-27 21:06                                     ` Florian Weimer
2005-04-27 21:32                                       ` Theodore Ts'o
2005-04-27 19:55                       ` Theodore Ts'o
2005-04-27  6:34                   ` Ingo Molnar
2005-04-27 21:10                     ` Bill Davidsen
2005-04-27 21:39                       ` Linus Torvalds
2005-04-26 16:42           ` Linus Torvalds
2005-04-26 17:39             ` Chris Mason
2005-04-26 19:52               ` Chris Mason
2005-04-26 18:15         ` H. Peter Anvin
2005-04-26 20:30           ` Bill Davidsen
2005-04-26 16:11       ` Bill Davidsen
2005-04-26  4:01   ` Matt Mackall
2005-04-26  4:20     ` Linus Torvalds
2005-04-26  4:09   ` Chris Wedgwood
2005-04-26  4:22     ` Andreas Gal
2005-04-26  4:22     ` Linus Torvalds
2005-04-29  6:01   ` Mercurial 0.4b vs git patchbomb benchmark Matt Mackall
2005-04-29  6:40     ` Sean
2005-04-29  7:40       ` Matt Mackall
2005-04-29  8:40         ` Sean
2005-04-29 14:34         ` Linus Torvalds
2005-04-29 15:18           ` Morten Welinder
2005-04-29 16:52             ` Matt Mackall
2005-05-02 16:10               ` Bill Davidsen
2005-05-02 19:02                 ` Sean
2005-05-02 22:02                 ` Linus Torvalds
2005-05-02 22:30                   ` Matt Mackall
2005-05-02 22:49                     ` Linus Torvalds
2005-05-03  0:00                       ` Matt Mackall
2005-05-03  2:48                         ` Linus Torvalds
2005-05-03  3:29                           ` Matt Mackall
2005-05-03  4:18                             ` Linus Torvalds
2005-05-03  4:24                         ` Linus Torvalds
2005-05-03  4:27                           ` Matt Mackall
2005-05-03  8:45                           ` Chris Wedgwood
2005-04-29 15:44           ` Tom Lord
2005-04-29 15:58             ` Linus Torvalds
2005-04-29 17:34               ` Tom Lord
2005-04-29 17:56                 ` Linus Torvalds
2005-04-29 18:08                   ` Tom Lord
2005-04-29 18:33                     ` Sean
2005-04-29 18:54                       ` Tom Lord
2005-04-29 19:13                         ` Sean
2005-05-02 16:15                           ` Bill Davidsen
2005-04-29 16:37           ` Matt Mackall
2005-04-29 17:09             ` Linus Torvalds
2005-04-29 19:12               ` Matt Mackall
2005-04-29 19:50                 ` Linus Torvalds
2005-04-29 20:23                   ` Matt Mackall
2005-04-29 20:49                     ` Linus Torvalds
2005-04-29 21:20                       ` Matt Mackall
2005-04-29 16:46           ` Bill Davidsen
2005-04-29 20:19       ` Andrea Arcangeli
2005-04-29 22:30         ` Olivier Galibert
2005-04-29 22:47           ` Andrea Arcangeli
2005-04-29 20:30     ` Andrea Arcangeli
2005-04-29 20:39       ` Matt Mackall
2005-04-30  2:52         ` Andrea Arcangeli
2005-04-30 15:20           ` Matt Mackall
2005-04-30 16:37             ` Andrea Arcangeli
2005-05-02 15:49           ` Bill Davidsen
2005-05-02 16:14             ` Valdis.Kletnieks
2005-05-03 17:40               ` Bill Davidsen
2005-05-04  2:10                 ` Mercurial 0.4b vs git patchbomb benchmark (/usr/bin/env again) David A. Wheeler
2005-05-02 16:17             ` Mercurial 0.4b vs git patchbomb benchmark Andrea Arcangeli
2005-05-02 16:31             ` Linus Torvalds
2005-05-02 17:18               ` Daniel Jacobowitz
2005-05-02 17:32                 ` Linus Torvalds
2005-05-02 20:54                 ` Sam Ravnborg
2005-05-02 17:20               ` Ryan Anderson
2005-05-02 17:31                 ` Linus Torvalds
2005-05-02 21:17               ` Kyle Moffett
2005-05-03 17:43               ` Bill Davidsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050427195753.GB7793@thunk.org \
    --to=tytso@mit.edu \
    --cc=akpm@osdl.org \
    --cc=fw@DENEB.ENYO.DE \
    --cc=git@vger.kernel.org \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=magnus.damm@gmail.com \
    --cc=mason@suse.com \
    --cc=mike.taht@timesys.com \
    --cc=mpm@selenic.com \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox