public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Shrinks sizeof(files_struct) and better layout
       [not found]     ` <4373698F.9010608@cosmosbay.com>
@ 2006-01-04  0:06       ` Eric Dumazet
  2006-01-04  9:11         ` Jan Engelhardt
                           ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04  0:06 UTC (permalink / raw)
  To: Andrew Morton, Linux kernel

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

[PATCH] shrink the fdset and reorder fields to give better multi-threaded 
performance.


This patch :

1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits 
platforms, lowering kmalloc() allocated space by 50%.

2) Reduces the size of (files_struct), using a special 32 bits (or 64bits) 
embedded_fd_set, instead of a 1024 bits fd_set for the close_on_exec_init and 
open_fds_init fields. This save some ram (248 bytes per task) as most tasks 
dont open more than 32 files. D-Cache footprint for such tasks is also reduced 
to the minimum.

3) Reduces size of allocated fdset. Currently two full pages are allocated, 
that is 32768 bits on x86 for example, and way too much. The minimum is now 
L1_CACHE_BYTES.

UP and SMP should benefit from this patch, because most tasks will touch only 
one cache line when open()/close() stdin/stdout/stderr (0/1/2), (next_fd, 
close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the same cache line)

Thank you

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: shrink-fdset-2.6.15.patch --]
[-- Type: text/plain, Size: 6345 bytes --]

--- linux-2.6.15/include/linux/file.h	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/include/linux/file.h	2006-01-04 00:35:17.000000000 +0100
@@ -17,10 +17,17 @@
  */
 #define NR_OPEN_DEFAULT BITS_PER_LONG
 
+/*
+ * The embedded_fd_set is a small fd_set,
+ * suitable for most tasks (which open <= BITS_PER_LONG files)
+ */
+typedef struct {
+	unsigned long fds_bits[1];
+} embedded_fd_set;
+
 struct fdtable {
 	unsigned int max_fds;
 	int max_fdset;
-	int next_fd;
 	struct file ** fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
@@ -33,13 +40,20 @@
  * Open file table structure
  */
 struct files_struct {
+  /*
+   * read mostly part
+   */
 	atomic_t count;
 	struct fdtable *fdt;
 	struct fdtable fdtab;
-	fd_set close_on_exec_init;
-	fd_set open_fds_init;
+  /*
+   * written part on a separate cache line in SMP
+   */
+	spinlock_t file_lock ____cacheline_aligned_in_smp;
+	int next_fd;
+	embedded_fd_set close_on_exec_init;
+	embedded_fd_set open_fds_init;
 	struct file * fd_array[NR_OPEN_DEFAULT];
-	spinlock_t file_lock;     /* Protects concurrent writers.  Nests inside tsk->alloc_lock */
 };
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
--- linux-2.6.15/include/linux/init_task.h	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/include/linux/init_task.h	2006-01-04 00:32:05.000000000 +0100
@@ -7,11 +7,10 @@
 #define INIT_FDTABLE \
 {							\
 	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.max_fdset	= __FD_SETSIZE, 		\
-	.next_fd	= 0, 				\
+	.max_fdset	= 8 * sizeof(embedded_fd_set),	\
 	.fd		= &init_files.fd_array[0], 	\
-	.close_on_exec	= &init_files.close_on_exec_init, \
-	.open_fds	= &init_files.open_fds_init, 	\
+	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
+	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
 	.rcu		= RCU_HEAD_INIT, 		\
 	.free_files	= NULL,		 		\
 	.next		= NULL,		 		\
@@ -20,9 +19,10 @@
 #define INIT_FILES \
 { 							\
 	.count		= ATOMIC_INIT(1), 		\
-	.file_lock	= SPIN_LOCK_UNLOCKED, 		\
 	.fdt		= &init_files.fdtab, 		\
 	.fdtab		= INIT_FDTABLE,			\
+	.file_lock	= SPIN_LOCK_UNLOCKED, 		\
+	.next_fd	= 0, 				\
 	.close_on_exec_init = { { 0, } }, 		\
 	.open_fds_init	= { { 0, } }, 			\
 	.fd_array	= { NULL, } 			\
--- linux-2.6.15/kernel/fork.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/kernel/fork.c	2006-01-04 00:32:05.000000000 +0100
@@ -581,12 +581,12 @@
 	atomic_set(&newf->count, 1);
 
 	spin_lock_init(&newf->file_lock);
+	newf->next_fd = 0;
 	fdt = &newf->fdtab;
-	fdt->next_fd = 0;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = __FD_SETSIZE;
-	fdt->close_on_exec = &newf->close_on_exec_init;
-	fdt->open_fds = &newf->open_fds_init;
+	fdt->max_fdset = 8 * sizeof(embedded_fd_set);
+	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
 	INIT_RCU_HEAD(&fdt->rcu);
 	fdt->free_files = NULL;
--- linux-2.6.15/fs/open.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/fs/open.c	2006-01-04 00:32:05.000000000 +0100
@@ -928,7 +928,7 @@
 	fdt = files_fdtable(files);
  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
 				fdt->max_fdset,
-				fdt->next_fd);
+				files->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -953,7 +953,7 @@
 
 	FD_SET(fd, fdt->open_fds);
 	FD_CLR(fd, fdt->close_on_exec);
-	fdt->next_fd = fd + 1;
+	files->next_fd = fd + 1;
 #if 1
 	/* Sanity check */
 	if (fdt->fd[fd] != NULL) {
@@ -974,8 +974,8 @@
 {
 	struct fdtable *fdt = files_fdtable(files);
 	__FD_CLR(fd, fdt->open_fds);
-	if (fd < fdt->next_fd)
-		fdt->next_fd = fd;
+	if (fd < files->next_fd)
+		files->next_fd = fd;
 }
 
 void fastcall put_unused_fd(unsigned int fd)
--- linux-2.6.15/fs/fcntl.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/fs/fcntl.c	2006-01-04 00:32:05.000000000 +0100
@@ -72,8 +72,8 @@
 	 * orig_start..fdt->next_fd
 	 */
 	start = orig_start;
-	if (start < fdt->next_fd)
-		start = fdt->next_fd;
+	if (start < files->next_fd)
+		start = files->next_fd;
 
 	newfd = start;
 	if (start < fdt->max_fdset) {
@@ -101,9 +101,8 @@
 	 * we reacquire the fdtable pointer and use it while holding
 	 * the lock, no one can free it during that time.
 	 */
-	fdt = files_fdtable(files);
-	if (start <= fdt->next_fd)
-		fdt->next_fd = newfd + 1;
+	if (start <= files->next_fd)
+		files->next_fd = newfd + 1;
 
 	error = newfd;
 	
--- linux-2.6.15/fs/file.c	2006-01-03 04:21:10.000000000 +0100
+++ linux-2.6.15-edum/fs/file.c	2006-01-04 00:32:05.000000000 +0100
@@ -125,7 +125,8 @@
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+	if (fdt->max_fdset <= 8 * sizeof(embedded_fd_set) &&
+		fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
 		 * The fdtable was embedded
 		 */
@@ -155,8 +156,9 @@
 
 void free_fdtable(struct fdtable *fdt)
 {
-	if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
-					fdt->max_fds > NR_OPEN_DEFAULT)
+	if (fdt->free_files ||
+		fdt->max_fdset > 8 * sizeof(embedded_fd_set) ||
+		fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
 
@@ -199,7 +201,6 @@
 		       (nfdt->max_fds - fdt->max_fds) *
 					sizeof(struct file *));
 	}
-	nfdt->next_fd = fdt->next_fd;
 }
 
 /*
@@ -220,11 +221,9 @@
 
 void free_fdset(fd_set *array, int num)
 {
-	int size = num / 8;
-
-	if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
+	if (num <= 8 * sizeof(embedded_fd_set)) /* Don't free an embedded fdset */
 		return;
-	else if (size <= PAGE_SIZE)
+	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
 	else
 		vfree(array);
@@ -237,22 +236,17 @@
   	fd_set *new_openset = NULL, *new_execset = NULL;
 	struct file **new_fds;
 
-	fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+	fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
 	if (!fdt)
   		goto out;
-	memset(fdt, 0, sizeof(*fdt));
 
-	nfds = __FD_SETSIZE;
+	nfds = 8 * L1_CACHE_BYTES;
   	/* Expand to the max in easy steps */
-  	do {
-		if (nfds < (PAGE_SIZE * 8))
-			nfds = PAGE_SIZE * 8;
-		else {
-			nfds = nfds * 2;
-			if (nfds > NR_OPEN)
-				nfds = NR_OPEN;
-		}
-	} while (nfds <= nr);
+  	while (nfds <= nr) {
+		nfds = nfds * 2;
+		if (nfds > NR_OPEN)
+			nfds = NR_OPEN;
+	}
 
   	new_openset = alloc_fdset(nfds);
   	new_execset = alloc_fdset(nfds);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04  0:06       ` [PATCH] Shrinks sizeof(files_struct) and better layout Eric Dumazet
@ 2006-01-04  9:11         ` Jan Engelhardt
  2006-01-04 10:12           ` Eric Dumazet
  2006-01-04 10:45         ` Andi Kleen
  2006-01-04 11:45         ` Andrew Morton
  2 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2006-01-04  9:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, Linux kernel

> 2) Reduces the size of (files_struct), using a special 32 bits (or 64bits)
> embedded_fd_set, instead of a 1024 bits fd_set for the close_on_exec_init and
> open_fds_init fields. This save some ram (248 bytes per task)


> as most tasks dont open more than 32 files.

How do you know, have you done some empirical testing?



Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04  9:11         ` Jan Engelhardt
@ 2006-01-04 10:12           ` Eric Dumazet
  2006-01-04 10:28             ` Folkert van Heusden
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04 10:12 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Andrew Morton, Linux kernel

Jan Engelhardt a écrit :
>> 2) Reduces the size of (files_struct), using a special 32 bits (or 64bits)
>> embedded_fd_set, instead of a 1024 bits fd_set for the close_on_exec_init and
>> open_fds_init fields. This save some ram (248 bytes per task)
> 
> 
>> as most tasks dont open more than 32 files.
> 
> How do you know, have you done some empirical testing?
> 
20 years working on Unix/linux machines yes :)

Just try this script on your linux machines :

for f in /proc/*/fd; do ls $f|wc -l;done

more than 95% of tasks have less than 32 concurrent files opened.

(I remember working on AT&T Unix in 1985, with a limit of 20 concurrent files 
per process : it was just fine)

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 10:12           ` Eric Dumazet
@ 2006-01-04 10:28             ` Folkert van Heusden
  0 siblings, 0 replies; 22+ messages in thread
From: Folkert van Heusden @ 2006-01-04 10:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jan Engelhardt, Andrew Morton, Linux kernel

> >>2) Reduces the size of (files_struct), using a special 32 bits (or 64bits)
> >>embedded_fd_set, instead of a 1024 bits fd_set for the close_on_exec_init 
> >>and
> >>open_fds_init fields. This save some ram (248 bytes per task)
> >>as most tasks dont open more than 32 files.
> >How do you know, have you done some empirical testing?
> 20 years working on Unix/linux machines yes :)
> Just try this script on your linux machines :
> for f in /proc/*/fd; do ls $f|wc -l;done
> more than 95% of tasks have less than 32 concurrent files opened.

0 root@muur:/home/folkert# for f in /proc/*/fd; do ls $f|wc -l;done | awk '{TOT+=$1; N++;} END{ print TOT / N, N; }'
13.7079 291

So on my system (running 291 processes (postfix, mysql, apache,
asterisk, spamassassin, clamav) it is on average 13.7 filehandles.
On an idle veritas netbackup server (130 processes): 4
On a system running 4 vmware systems (137 processes): 16
On a heavily used mailserver (130 processes, sendmail and MailScanner
package): 6,6


Folkert van Heusden

-- 
Try MultiTail! Multiple windows with logfiles, filtered with regular
expressions, colored output, etc. etc. www.vanheusden.com/multitail/
----------------------------------------------------------------------
Get your PGP/GPG key signed at www.biglumber.com!
----------------------------------------------------------------------
Phone: +31-6-41278122, PGP-key: 1F28D8AE, www.vanheusden.com

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04  0:06       ` [PATCH] Shrinks sizeof(files_struct) and better layout Eric Dumazet
  2006-01-04  9:11         ` Jan Engelhardt
@ 2006-01-04 10:45         ` Andi Kleen
  2006-01-04 11:13           ` Eric Dumazet
  2006-01-04 11:45         ` Andrew Morton
  2 siblings, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2006-01-04 10:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel

Eric Dumazet <dada1@cosmosbay.com> writes:
> 
> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
> platforms, lowering kmalloc() allocated space by 50%.

It should be probably a kmem_cache_alloc() instead of a kmalloc
in the first place anyways. This would reduce fragmentation.

> 2) Reduces the size of (files_struct), using a special 32 bits (or
> 64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
> close_on_exec_init and open_fds_init fields. This save some ram (248
> bytes per task) as most tasks dont open more than 32 files. D-Cache
> footprint for such tasks is also reduced to the minimum.
> 
> 3) Reduces size of allocated fdset. Currently two full pages are
> allocated, that is 32768 bits on x86 for example, and way too
> much. The minimum is now L1_CACHE_BYTES.
> 
> UP and SMP should benefit from this patch, because most tasks will
> touch only one cache line when open()/close() stdin/stdout/stderr
> (0/1/2), (next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2]
> being in the same cache line)

Looks mostly good to me.

> +   * read mostly part
> +   */
>  	atomic_t count;
>  	struct fdtable *fdt;
>  	struct fdtable fdtab;
> -	fd_set close_on_exec_init;
> -	fd_set open_fds_init;
> +  /*
> +   * written part on a separate cache line in SMP
> +   */
> +	spinlock_t file_lock ____cacheline_aligned_in_smp;
> +	int next_fd;
> +	embedded_fd_set close_on_exec_init;
> +	embedded_fd_set open_fds_init;

You didn't describe that change, but unless it's clear the separate cache lines
are a win I would not do it and save memory again. Was this split based on
actual measurements or more theoretical considerations? 

-Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 10:45         ` Andi Kleen
@ 2006-01-04 11:13           ` Eric Dumazet
  2006-01-04 11:15             ` Andi Kleen
  2006-01-06  3:01             ` David Lang
  0 siblings, 2 replies; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04 11:13 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen a écrit :
> Eric Dumazet <dada1@cosmosbay.com> writes:
>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
>> platforms, lowering kmalloc() allocated space by 50%.
> 
> It should be probably a kmem_cache_alloc() instead of a kmalloc
> in the first place anyways. This would reduce fragmentation.

Well in theory yes, if you really expect thousand of tasks running...
But for most machines, number of concurrent tasks is < 200, and using a 
special cache for this is not a win.

> 
>> +   * read mostly part
>> +   */
>>  	atomic_t count;
>>  	struct fdtable *fdt;
>>  	struct fdtable fdtab;
>> -	fd_set close_on_exec_init;
>> -	fd_set open_fds_init;
>> +  /*
>> +   * written part on a separate cache line in SMP
>> +   */
>> +	spinlock_t file_lock ____cacheline_aligned_in_smp;
>> +	int next_fd;
>> +	embedded_fd_set close_on_exec_init;
>> +	embedded_fd_set open_fds_init;
> 
> You didn't describe that change, but unless it's clear the separate cache lines
> are a win I would not do it and save memory again. Was this split based on
> actual measurements or more theoretical considerations? 

As it is a refinement on a previous patch (that was integrated in 2.6.15) that 
put spin_lock after the array[] (so cleary using a separate cache line), I 
omited to describe it.

Yes, this part is really important because some multi-threaded benchmarks get 
a nice speedup with this separation in two parts.

Threads that are doing read()/write() (reading the first part of files_struct) 
are not slowed by others that do open()/close() syscalls (writing the second 
part), no false sharing (and no locking thanks to RCU of course).

Big Apache 2.0 servers, database servers directly benefit from this.

Note that process that tend to create/destroy a lot of threads per second are 
writing into 'count' field and might dirty the 'read mostly' part, but we can 
expect that well writen high performance programs wont do this.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:13           ` Eric Dumazet
@ 2006-01-04 11:15             ` Andi Kleen
  2006-01-04 11:19               ` Eric Dumazet
  2006-01-06  3:01             ` David Lang
  1 sibling, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2006-01-04 11:15 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel

On Wednesday 04 January 2006 12:13, Eric Dumazet wrote:
> Andi Kleen a écrit :
> > Eric Dumazet <dada1@cosmosbay.com> writes:
> >> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
> >> platforms, lowering kmalloc() allocated space by 50%.
> > 
> > It should be probably a kmem_cache_alloc() instead of a kmalloc
> > in the first place anyways. This would reduce fragmentation.
> 
> Well in theory yes, if you really expect thousand of tasks running...
> But for most machines, number of concurrent tasks is < 200, and using a 
> special cache for this is not a win.

It is because it avoids fragmentation because objects with similar livetimes
are clustered together. In general caches are a win
if the data is nearly a page or more.

> 
> > 
> >> +   * read mostly part
> >> +   */
> >>  	atomic_t count;
> >>  	struct fdtable *fdt;
> >>  	struct fdtable fdtab;
> >> -	fd_set close_on_exec_init;
> >> -	fd_set open_fds_init;
> >> +  /*
> >> +   * written part on a separate cache line in SMP
> >> +   */
> >> +	spinlock_t file_lock ____cacheline_aligned_in_smp;
> >> +	int next_fd;
> >> +	embedded_fd_set close_on_exec_init;
> >> +	embedded_fd_set open_fds_init;
> > 
> > You didn't describe that change, but unless it's clear the separate cache lines
> > are a win I would not do it and save memory again. Was this split based on
> > actual measurements or more theoretical considerations? 
> 
> As it is a refinement on a previous patch (that was integrated in 2.6.15) that 
> put spin_lock after the array[] (so cleary using a separate cache line), I 
> omited to describe it.

Ok, perhaps you should describe that too then

-Andi

 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:15             ` Andi Kleen
@ 2006-01-04 11:19               ` Eric Dumazet
  2006-01-04 11:22                 ` Andi Kleen
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04 11:19 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen a écrit :
> On Wednesday 04 January 2006 12:13, Eric Dumazet wrote:
>> Andi Kleen a écrit :
>>> Eric Dumazet <dada1@cosmosbay.com> writes:
>>>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
>>>> platforms, lowering kmalloc() allocated space by 50%.
>>> It should be probably a kmem_cache_alloc() instead of a kmalloc
>>> in the first place anyways. This would reduce fragmentation.
>> Well in theory yes, if you really expect thousand of tasks running...
>> But for most machines, number of concurrent tasks is < 200, and using a 
>> special cache for this is not a win.
> 
> It is because it avoids fragmentation because objects with similar livetimes
> are clustered together. In general caches are a win
> if the data is nearly a page or more.

I dont undertand your last sentence. Do you mean 'if the object size is near 
PAGE_SIZE' ?

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:19               ` Eric Dumazet
@ 2006-01-04 11:22                 ` Andi Kleen
  2006-01-04 11:41                   ` Eric Dumazet
  0 siblings, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2006-01-04 11:22 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel

On Wednesday 04 January 2006 12:19, Eric Dumazet wrote:
> Andi Kleen a écrit :
> > On Wednesday 04 January 2006 12:13, Eric Dumazet wrote:
> >> Andi Kleen a écrit :
> >>> Eric Dumazet <dada1@cosmosbay.com> writes:
> >>>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
> >>>> platforms, lowering kmalloc() allocated space by 50%.
> >>> It should be probably a kmem_cache_alloc() instead of a kmalloc
> >>> in the first place anyways. This would reduce fragmentation.
> >> Well in theory yes, if you really expect thousand of tasks running...
> >> But for most machines, number of concurrent tasks is < 200, and using a 
> >> special cache for this is not a win.
> > 
> > It is because it avoids fragmentation because objects with similar livetimes
> > are clustered together. In general caches are a win
> > if the data is nearly a page or more.
> 
> I dont undertand your last sentence. Do you mean 'if the object size is near 
> PAGE_SIZE' ?

Total data of all objects together. That's because caches always get their
own pages and cannot share them with other caches. The overhead of the kmem_cache_t 
by itself is negligible.

-Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:22                 ` Andi Kleen
@ 2006-01-04 11:41                   ` Eric Dumazet
  2006-01-04 11:58                     ` Andi Kleen
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04 11:41 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen a écrit :
> 
> Total data of all objects together. That's because caches always get their
> own pages and cannot share them with other caches.

OK for this part.

> The overhead of the kmem_cache_t by itself is negligible.

This seems a common misconception among kernel devs (even the best ones Andi :) )

On SMP (and/or NUMA) machines : overhead of kmem_cache_t is *big*

See enable_cpucache in mm/slab.c for 'limit' determination :

         if (cachep->objsize > 131072)
                 limit = 1;
         else if (cachep->objsize > PAGE_SIZE)
                 limit = 8;
         else if (cachep->objsize > 1024)
                 limit = 24;
         else if (cachep->objsize > 256)
                 limit = 54;
         else
                 limit = 120;

On a 64 bits machines, 120*sizeof(void*) = 120*8 = 960

So for small objects (<= 256 bytes), you end with a sizeof(array_cache) = 1024 
bytes per cpu

If 16 CPUS : 16*1024 = 16 Kbytes + all other kmem_cache structures : (If you 
have a lot of Memory Nodes, then it can be *very* big too).

If you know that no more than 100 objects are used in 99% of setups, then a 
dedicated cache is overkill, even locking 100 pages because of extreme 
fragmentation is better.

Probability that a *lot* of tasks are created at once and killed at once is 
close to 0 during a machine lifetime.


Maybe we can introduce an ultra basic memory allocator for such objects 
(without CPU caches, node caches), so that the memory overhead is small. 
Hitting a spinlock at thread creation/deletion time is not that time critical.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04  0:06       ` [PATCH] Shrinks sizeof(files_struct) and better layout Eric Dumazet
  2006-01-04  9:11         ` Jan Engelhardt
  2006-01-04 10:45         ` Andi Kleen
@ 2006-01-04 11:45         ` Andrew Morton
  2006-01-04 13:14           ` Eric Dumazet
  2006-01-04 23:24           ` [2.6 patch] Define BITS_PER_BYTE Adrian Bunk
  2 siblings, 2 replies; 22+ messages in thread
From: Andrew Morton @ 2006-01-04 11:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel

Eric Dumazet <dada1@cosmosbay.com> wrote:
>
> [PATCH] shrink the fdset and reorder fields to give better multi-threaded 
>  performance.

Boy, this is going to need some elaborate performance testing..

We can't have that `8 * sizeof(embedded_fd_set)' splattered all over the
tree.  This? 

 fs/file.c                 |    6 +++---
 include/linux/file.h      |    5 +++++
 include/linux/init_task.h |    2 +-
 kernel/fork.c             |    2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff -puN fs/file.c~shrinks-sizeoffiles_struct-and-better-layout-tidy fs/file.c
--- devel/fs/file.c~shrinks-sizeoffiles_struct-and-better-layout-tidy	2006-01-04 03:45:10.000000000 -0800
+++ devel-akpm/fs/file.c	2006-01-04 03:45:10.000000000 -0800
@@ -125,7 +125,7 @@ static void free_fdtable_rcu(struct rcu_
 		kmem_cache_free(files_cachep, fdt->free_files);
 		return;
 	}
-	if (fdt->max_fdset <= 8 * sizeof(embedded_fd_set) &&
+	if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
 		fdt->max_fds <= NR_OPEN_DEFAULT) {
 		/*
 		 * The fdtable was embedded
@@ -157,7 +157,7 @@ static void free_fdtable_rcu(struct rcu_
 void free_fdtable(struct fdtable *fdt)
 {
 	if (fdt->free_files ||
-		fdt->max_fdset > 8 * sizeof(embedded_fd_set) ||
+		fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
 		fdt->max_fds > NR_OPEN_DEFAULT)
 		call_rcu(&fdt->rcu, free_fdtable_rcu);
 }
@@ -221,7 +221,7 @@ fd_set * alloc_fdset(int num)
 
 void free_fdset(fd_set *array, int num)
 {
-	if (num <= 8 * sizeof(embedded_fd_set)) /* Don't free an embedded fdset */
+	if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
 		return;
 	else if (num <= 8 * PAGE_SIZE)
 		kfree(array);
diff -puN include/linux/file.h~shrinks-sizeoffiles_struct-and-better-layout-tidy include/linux/file.h
--- devel/include/linux/file.h~shrinks-sizeoffiles_struct-and-better-layout-tidy	2006-01-04 03:45:10.000000000 -0800
+++ devel-akpm/include/linux/file.h	2006-01-04 03:45:10.000000000 -0800
@@ -25,6 +25,11 @@ typedef struct {
 	unsigned long fds_bits[1];
 } embedded_fd_set;
 
+/*
+ * More than this number of fds: we use a separately allocated fd_set
+ */
+#define EMBEDDED_FD_SET_SIZE	(8 * sizeof(struct embedded_fd_set))
+
 struct fdtable {
 	unsigned int max_fds;
 	int max_fdset;
diff -puN include/linux/init_task.h~shrinks-sizeoffiles_struct-and-better-layout-tidy include/linux/init_task.h
--- devel/include/linux/init_task.h~shrinks-sizeoffiles_struct-and-better-layout-tidy	2006-01-04 03:45:10.000000000 -0800
+++ devel-akpm/include/linux/init_task.h	2006-01-04 03:45:10.000000000 -0800
@@ -7,7 +7,7 @@
 #define INIT_FDTABLE \
 {							\
 	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.max_fdset	= 8 * sizeof(embedded_fd_set),	\
+	.max_fdset	= EMBEDDED_FD_SET_SIZE,		\
 	.fd		= &init_files.fd_array[0], 	\
 	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
 	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
diff -puN kernel/fork.c~shrinks-sizeoffiles_struct-and-better-layout-tidy kernel/fork.c
--- devel/kernel/fork.c~shrinks-sizeoffiles_struct-and-better-layout-tidy	2006-01-04 03:45:10.000000000 -0800
+++ devel-akpm/kernel/fork.c	2006-01-04 03:45:10.000000000 -0800
@@ -584,7 +584,7 @@ static struct files_struct *alloc_files(
 	newf->next_fd = 0;
 	fdt = &newf->fdtab;
 	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->max_fdset = 8 * sizeof(embedded_fd_set);
+	fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
 	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
 	fdt->open_fds = (fd_set *)&newf->open_fds_init;
 	fdt->fd = &newf->fd_array[0];
_


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:41                   ` Eric Dumazet
@ 2006-01-04 11:58                     ` Andi Kleen
  0 siblings, 0 replies; 22+ messages in thread
From: Andi Kleen @ 2006-01-04 11:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: linux-kernel

On Wednesday 04 January 2006 12:41, Eric Dumazet wrote:

> > The overhead of the kmem_cache_t by itself is negligible.
> 
> This seems a common misconception among kernel devs (even the best ones Andi :) )

It used to be true at least at some point :/

> 
> On SMP (and/or NUMA) machines : overhead of kmem_cache_t is *big*
> 
> See enable_cpucache in mm/slab.c for 'limit' determination :
> 
>          if (cachep->objsize > 131072)
>                  limit = 1;
>          else if (cachep->objsize > PAGE_SIZE)
>                  limit = 8;
>          else if (cachep->objsize > 1024)
>                  limit = 24;
>          else if (cachep->objsize > 256)
>                  limit = 54;
>          else
>                  limit = 120;
> 
> On a 64 bits machines, 120*sizeof(void*) = 120*8 = 960
> 
> So for small objects (<= 256 bytes), you end with a sizeof(array_cache) = 1024 
> bytes per cpu

Hmm - in theory it could be tuned down for SMT siblings which really don't
care about sharing because they have shared caches. But I don't know
how many complications that would add to the slab code.

> 
> If 16 CPUS : 16*1024 = 16 Kbytes + all other kmem_cache structures : (If you 
> have a lot of Memory Nodes, then it can be *very* big too).
> 
> If you know that no more than 100 objects are used in 99% of setups, then a 
> dedicated cache is overkill, even locking 100 pages because of extreme 
> fragmentation is better.

A system with 16 memory nodes should have more than 100 processes, but ok.

> 
> Maybe we can introduce an ultra basic memory allocator for such objects 
> (without CPU caches, node caches), so that the memory overhead is small. 
> Hitting a spinlock at thread creation/deletion time is not that time critical.

Might be a good idea yes. There used to a "simp" allocator long ago for this,
but it was removed because it had other issues. But this was before even slab
got the per cpu/node support.

-Andi
 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:45         ` Andrew Morton
@ 2006-01-04 13:14           ` Eric Dumazet
  2006-01-04 23:24           ` [2.6 patch] Define BITS_PER_BYTE Adrian Bunk
  1 sibling, 0 replies; 22+ messages in thread
From: Eric Dumazet @ 2006-01-04 13:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton a écrit :
> Eric Dumazet <dada1@cosmosbay.com> wrote:
>> [PATCH] shrink the fdset and reorder fields to give better multi-threaded 
>>  performance.
> 
> Boy, this is going to need some elaborate performance testing..
> 
> We can't have that `8 * sizeof(embedded_fd_set)' splattered all over the
> tree.  This? 
>

Fine patch Andrew :)

I was a bit unsure of introducing a XXXX_SIZE macro expressed in bits instead 
of bytes (more natural unit at least for us humans)

Thank you
Eric




^ permalink raw reply	[flat|nested] 22+ messages in thread

* [2.6 patch] Define BITS_PER_BYTE
  2006-01-04 11:45         ` Andrew Morton
  2006-01-04 13:14           ` Eric Dumazet
@ 2006-01-04 23:24           ` Adrian Bunk
  2006-01-05  7:03             ` Jan Engelhardt
  1 sibling, 1 reply; 22+ messages in thread
From: Adrian Bunk @ 2006-01-04 23:24 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Eric Dumazet, linux-kernel, Bryan O'Sullivan

On Wed, Jan 04, 2006 at 03:45:34AM -0800, Andrew Morton wrote:
>...
> +/*
> + * More than this number of fds: we use a separately allocated fd_set
> + */
> +#define EMBEDDED_FD_SET_SIZE	(8 * sizeof(struct embedded_fd_set))
> +
>...

What about applying and using the patch below?

cu
Adrian


<--  snip  -->


This can make some arithmetic expressions clearer.


Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>

--- a/include/linux/types.h	Wed Dec 28 14:19:42 2005 -0800
+++ b/include/linux/types.h	Wed Dec 28 14:19:42 2005 -0800
@@ -8,6 +8,8 @@
 	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
+
+#define BITS_PER_BYTE 8
 #endif
 
 #include <linux/posix_types.h>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [2.6 patch] Define BITS_PER_BYTE
  2006-01-04 23:24           ` [2.6 patch] Define BITS_PER_BYTE Adrian Bunk
@ 2006-01-05  7:03             ` Jan Engelhardt
  2006-01-05 15:18               ` Bryan O'Sullivan
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2006-01-05  7:03 UTC (permalink / raw)
  To: Adrian Bunk
  Cc: Andrew Morton, Eric Dumazet, linux-kernel, Bryan O'Sullivan

>What about applying and using the patch below?
>
>cu
>Adrian
>
>This can make some arithmetic expressions clearer.
>
>+
>+#define BITS_PER_BYTE 8

Oh no :( This sounds as uncommon as CHAR_BIT in C.



Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [2.6 patch] Define BITS_PER_BYTE
  2006-01-05  7:03             ` Jan Engelhardt
@ 2006-01-05 15:18               ` Bryan O'Sullivan
  2006-01-05 19:19                 ` H. Peter Anvin
  0 siblings, 1 reply; 22+ messages in thread
From: Bryan O'Sullivan @ 2006-01-05 15:18 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Adrian Bunk, Andrew Morton, Eric Dumazet, linux-kernel

On Thu, 2006-01-05 at 08:03 +0100, Jan Engelhardt wrote:

> Oh no :( This sounds as uncommon as CHAR_BIT in C.

CHAR_BIT is completely unclear.  BITS_PER_BYTE is self-evident, and
makes it a lot more obvious when you're doing arithmetic that involves
counting bits.

	<b


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [2.6 patch] Define BITS_PER_BYTE
  2006-01-05 15:18               ` Bryan O'Sullivan
@ 2006-01-05 19:19                 ` H. Peter Anvin
  0 siblings, 0 replies; 22+ messages in thread
From: H. Peter Anvin @ 2006-01-05 19:19 UTC (permalink / raw)
  To: linux-kernel

Followup to:  <1136474301.31922.1.camel@serpentine.pathscale.com>
By author:    "Bryan O'Sullivan" <bos@pathscale.com>
In newsgroup: linux.dev.kernel
>
> On Thu, 2006-01-05 at 08:03 +0100, Jan Engelhardt wrote:
> 
> > Oh no :( This sounds as uncommon as CHAR_BIT in C.
> 
> CHAR_BIT is completely unclear.  BITS_PER_BYTE is self-evident, and
> makes it a lot more obvious when you're doing arithmetic that involves
> counting bits.
> 

Tough cookies.  The standard name for this define is CHAR_BIT, and
anyone who doesn't know that "char" means byte in C doesn't know the C
language.  "char" certainly doesn't mean "character" in this day of
UTF-8 and friends.

	-hpa


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-04 11:13           ` Eric Dumazet
  2006-01-04 11:15             ` Andi Kleen
@ 2006-01-06  3:01             ` David Lang
  2006-01-06  6:35               ` Eric Dumazet
  1 sibling, 1 reply; 22+ messages in thread
From: David Lang @ 2006-01-06  3:01 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andi Kleen, linux-kernel

On Wed, 4 Jan 2006, Eric Dumazet wrote:

> Date: Wed, 04 Jan 2006 12:13:25 +0100
> From: Eric Dumazet <dada1@cosmosbay.com>
> To: Andi Kleen <ak@suse.de>
> Cc: linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] Shrinks sizeof(files_struct) and better layout
> 
> Andi Kleen a écrit :
>> Eric Dumazet <dada1@cosmosbay.com> writes:
>>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
>>> platforms, lowering kmalloc() allocated space by 50%.
>> 
>> It should be probably a kmem_cache_alloc() instead of a kmalloc
>> in the first place anyways. This would reduce fragmentation.
>
> Well in theory yes, if you really expect thousand of tasks running...
> But for most machines, number of concurrent tasks is < 200, and using a 
> special cache for this is not a win.

is it enough of a win on machines with thousands of concurrent tasks that 
it would be a useful config option?

David Lang

-- 
There are two ways of constructing a software design. One way is to make it so simple that there are obviously no deficiencies. And the other way is to make it so complicated that there are no obvious deficiencies.
  -- C.A.R. Hoare


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-06  3:01             ` David Lang
@ 2006-01-06  6:35               ` Eric Dumazet
  2006-01-06  7:26                 ` David Lang
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2006-01-06  6:35 UTC (permalink / raw)
  To: David Lang; +Cc: Andi Kleen, linux-kernel

David Lang a écrit :
> On Wed, 4 Jan 2006, Eric Dumazet wrote:
>> Andi Kleen a écrit :
>>> Eric Dumazet <dada1@cosmosbay.com> writes:
>>>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
>>>> platforms, lowering kmalloc() allocated space by 50%.
>>>
>>> It should be probably a kmem_cache_alloc() instead of a kmalloc
>>> in the first place anyways. This would reduce fragmentation.
>>
>> Well in theory yes, if you really expect thousand of tasks running...
>> But for most machines, number of concurrent tasks is < 200, and using 
>> a special cache for this is not a win.
> 
> is it enough of a win on machines with thousands of concurrent tasks 
> that it would be a useful config option?

Well..., not if NR_CPUS is big too. (We just saw a thread on lkml about 
raising NR_CPUS to 1024 on ia64).

On a 1024 CPUS machine, a kmem cache could use at least 1 MB for its internal 
structures, plus 1024 pages (PAGE_SIZE) for holding the caches (one cache per 
CPU), if you assume at least one task was created on behalf each cpu.

if PAGE_SIZE is 64KB, you end up with 65 MB of ram for the cache. Even with 
100.000 tasks running on the machine, its not a win.

slab caches are very complex machinery that consume O(NR_CPUS) ram.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-06  6:35               ` Eric Dumazet
@ 2006-01-06  7:26                 ` David Lang
  2006-01-06  7:37                   ` Eric Dumazet
  0 siblings, 1 reply; 22+ messages in thread
From: David Lang @ 2006-01-06  7:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andi Kleen, linux-kernel

On Fri, 6 Jan 2006, Eric Dumazet wrote:
> David Lang a écrit :
>> On Wed, 4 Jan 2006, Eric Dumazet wrote:
>>> Andi Kleen a écrit :
>>>> Eric Dumazet <dada1@cosmosbay.com> writes:
>>>>> 1) Reduces the size of (struct fdtable) to exactly 64 bytes on 32bits
>>>>> platforms, lowering kmalloc() allocated space by 50%.
>>>> 
>>>> It should be probably a kmem_cache_alloc() instead of a kmalloc
>>>> in the first place anyways. This would reduce fragmentation.
>>> 
>>> Well in theory yes, if you really expect thousand of tasks running...
>>> But for most machines, number of concurrent tasks is < 200, and using a 
>>> special cache for this is not a win.
>> 
>> is it enough of a win on machines with thousands of concurrent tasks that 
>> it would be a useful config option?
>
> Well..., not if NR_CPUS is big too. (We just saw a thread on lkml about 
> raising NR_CPUS to 1024 on ia64).
>
> On a 1024 CPUS machine, a kmem cache could use at least 1 MB for its internal 
> structures, plus 1024 pages (PAGE_SIZE) for holding the caches (one cache per 
> CPU), if you assume at least one task was created on behalf each cpu.
>
> if PAGE_SIZE is 64KB, you end up with 65 MB of ram for the cache. Even with 
> 100.000 tasks running on the machine, its not a win.
>
> slab caches are very complex machinery that consume O(NR_CPUS) ram.

Ok, so if you have large numbers of CPU's and large page sizes it's not 
useful. however, what about a 2-4 cpu machine with 4k page 
sizes, 8-32G of ram (a not unreasonable Opteron system config) that will 
be running 5,000-20,000 processes/threads?

I know people argue that programs that do such things are bad (and I 
definantly agree that they aren't optimized), but the reality is that some 
workloads are like that. if a machine is being built for such uses 
configuring the kernel to better tolorate such use may be useful

David Lang


-- 
There are two ways of constructing a software design. One way is to make it so simple that there are obviously no deficiencies. And the other way is to make it so complicated that there are no obvious deficiencies.
  -- C.A.R. Hoare


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-06  7:26                 ` David Lang
@ 2006-01-06  7:37                   ` Eric Dumazet
  2006-01-06  8:28                     ` David Lang
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Dumazet @ 2006-01-06  7:37 UTC (permalink / raw)
  To: David Lang; +Cc: Andi Kleen, linux-kernel

David Lang a écrit :
> Ok, so if you have large numbers of CPU's and large page sizes it's not 
> useful. however, what about a 2-4 cpu machine with 4k page sizes, 8-32G 
> of ram (a not unreasonable Opteron system config) that will be running 
> 5,000-20,000 processes/threads?

Dont forget 'struct files_struct' are shared between threads of one process.

So may benefit from this 'special cache' only if you plan to run 20.000 processes.

> 
> I know people argue that programs that do such things are bad (and I 
> definantly agree that they aren't optimized), but the reality is that 
> some workloads are like that. if a machine is being built for such uses 
> configuring the kernel to better tolorate such use may be useful

If 20.000 process runs on a machine, I doubt the main problem of sysadmin is 
about the 'struct files_struct' placement in memory :)

Eric


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Shrinks sizeof(files_struct) and better layout
  2006-01-06  7:37                   ` Eric Dumazet
@ 2006-01-06  8:28                     ` David Lang
  0 siblings, 0 replies; 22+ messages in thread
From: David Lang @ 2006-01-06  8:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andi Kleen, linux-kernel

On Fri, 6 Jan 2006, Eric Dumazet wrote:

> David Lang a écrit :
>> Ok, so if you have large numbers of CPU's and large page sizes it's not 
>> useful. however, what about a 2-4 cpu machine with 4k page sizes, 8-32G of 
>> ram (a not unreasonable Opteron system config) that will be running 
>> 5,000-20,000 processes/threads?
>
> Dont forget 'struct files_struct' are shared between threads of one process.
>
> So may benefit from this 'special cache' only if you plan to run 20.000 
> processes.

Ok, that's why I as asking

>> 
>> I know people argue that programs that do such things are bad (and I 
>> definantly agree that they aren't optimized), but the reality is that some 
>> workloads are like that. if a machine is being built for such uses 
>> configuring the kernel to better tolorate such use may be useful
>
> If 20.000 process runs on a machine, I doubt the main problem of sysadmin is 
> about the 'struct files_struct' placement in memory :)

I have some boxes that routinely sit with 3,500-4,000 processes (fork 
heavy workload, ~1400 forks/sec so far) that topple over when they go much 
over 10,000 processes.

I'm only running 32 bit kernels with 1G of ram available, (no himem), I've 
been assuming that ram was my limiting factor, but it hasn't been 
enough of an issue to really track down yet :-)

David Lang

-- 
There are two ways of constructing a software design. One way is to make it so simple that there are obviously no deficiencies. And the other way is to make it so complicated that there are no obvious deficiencies.
  -- C.A.R. Hoare


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2006-01-06  8:28 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20051108185349.6e86cec3.akpm@osdl.org>
     [not found] ` <437226B1.4040901@cosmosbay.com>
     [not found]   ` <20051109220742.067c5f3a.akpm@osdl.org>
     [not found]     ` <4373698F.9010608@cosmosbay.com>
2006-01-04  0:06       ` [PATCH] Shrinks sizeof(files_struct) and better layout Eric Dumazet
2006-01-04  9:11         ` Jan Engelhardt
2006-01-04 10:12           ` Eric Dumazet
2006-01-04 10:28             ` Folkert van Heusden
2006-01-04 10:45         ` Andi Kleen
2006-01-04 11:13           ` Eric Dumazet
2006-01-04 11:15             ` Andi Kleen
2006-01-04 11:19               ` Eric Dumazet
2006-01-04 11:22                 ` Andi Kleen
2006-01-04 11:41                   ` Eric Dumazet
2006-01-04 11:58                     ` Andi Kleen
2006-01-06  3:01             ` David Lang
2006-01-06  6:35               ` Eric Dumazet
2006-01-06  7:26                 ` David Lang
2006-01-06  7:37                   ` Eric Dumazet
2006-01-06  8:28                     ` David Lang
2006-01-04 11:45         ` Andrew Morton
2006-01-04 13:14           ` Eric Dumazet
2006-01-04 23:24           ` [2.6 patch] Define BITS_PER_BYTE Adrian Bunk
2006-01-05  7:03             ` Jan Engelhardt
2006-01-05 15:18               ` Bryan O'Sullivan
2006-01-05 19:19                 ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox