public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: torvalds@osdl.org, akpm@osdl.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH] Use atomic ops for file_nr accounting, not spinlock+irq
Date: Tue, 11 Apr 2006 17:36:15 +0100	[thread overview]
Message-ID: <16476.1144773375@warthog.cambridge.redhat.com> (raw)


Make the kernel use atomic operations for files_stat.nr_files accounting
rather than using a spinlocked and interrupt-disabled critical section.  This
should improve reduce the latency of the critical section.

This patch is slightly problematic for a couple of reasons:

 (1) The sysctl code then implicitly casts the atomic value to an int, which
     I'm not sure is permissible.

 (2) linux/fs.h only includes asm/atomic.h if __KERNEL__ is defined, but the
     files_stat_struct is declared whether or not __KERNEL__ is defined.  To
     deal with this I've made the type of the nr_files member change to int if
     necessary.

     However, I'm not sure there's any need to present files_stat_struct to
     non-kernel code.

Additionally, I'm not sure that doing the accounting in the slab constructor
and destructor is a good idea: if ENFILE is returned, and then a whole bunch
of files are freed up thus permitting the next open to theoretically avoid
ENFILE, what's to say that the slab has actually recycled all those objects?

Under some conditions the destructors will only be invoked when the actual
slab pages are released, in which case the ENFILE condition won't necessarily
cease to be reported, even if it has actually gone away.

Consider where the files_cache has more than one object per slab, and after an
ENFILE condition a whole bunch of files are released, such as to leave all the
slab pages still allocated, with one struct file allocated in each...


Signed-Off-By: David Howells <dhowells@redhat.com>
---
warthog>diffstat -p1 /tmp/file-nr.diff
 fs/file_table.c    |   25 +++++++++----------------
 include/linux/fs.h |    8 +++++++-
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 1008050..67b0084 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -22,7 +22,8 @@
 
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
-	.max_files = NR_FILE
+	.max_files	= NR_FILE,
+	.nr_files	= ATOMIC_INIT(0),
 };
 
 EXPORT_SYMBOL(files_stat); /* Needed by unix.o */
@@ -33,26 +34,18 @@ EXPORT_SYMBOL(files_stat); /* Needed by 
 static DEFINE_SPINLOCK(filp_count_lock);
 
 /* slab constructors and destructors are called from arbitrary
- * context and must be fully threaded - use a local spinlock
- * to protect files_stat.nr_files
+ * context and must be fully threaded
  */
 void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags)
 {
 	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR) {
-		unsigned long flags;
-		spin_lock_irqsave(&filp_count_lock, flags);
-		files_stat.nr_files++;
-		spin_unlock_irqrestore(&filp_count_lock, flags);
-	}
+	    SLAB_CTOR_CONSTRUCTOR)
+		atomic_inc(&files_stat.nr_files);
 }
 
 void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&filp_count_lock, flags);
-	files_stat.nr_files--;
-	spin_unlock_irqrestore(&filp_count_lock, flags);
+	atomic_dec(&files_stat.nr_files);
 }
 
 static inline void file_free_rcu(struct rcu_head *head)
@@ -78,7 +71,7 @@ struct file *get_empty_filp(void)
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files >= files_stat.max_files &&
+	if (atomic_read(&files_stat.nr_files) >= files_stat.max_files &&
 				!capable(CAP_SYS_ADMIN))
 		goto over;
 
@@ -101,10 +94,10 @@ struct file *get_empty_filp(void)
 
 over:
 	/* Ran out of filps - report that */
-	if (files_stat.nr_files > old_max) {
+	if (atomic_read(&files_stat.nr_files) > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
 					files_stat.max_files);
-		old_max = files_stat.nr_files;
+		old_max = atomic_read(&files_stat.nr_files);
 	}
 	goto fail;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc9ecc0..533fd18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -9,6 +9,9 @@
 #include <linux/config.h>
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#ifdef __KERNEL__
+#include <asm/atomic.h>
+#endif
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -30,7 +33,11 @@
 
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
+#ifdef __KERNEL__
+	atomic_t nr_files;	/* read only */
+#else
 	int nr_files;		/* read only */
+#endif
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
@@ -218,7 +225,6 @@ extern int dir_notify_enable;
 #include <linux/sched.h>
 #include <linux/mutex.h>
 
-#include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include <asm/byteorder.h>
 

             reply	other threads:[~2006-04-11 16:36 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-11 16:36 David Howells [this message]
2006-04-11 22:05 ` [PATCH] Use atomic ops for file_nr accounting, not spinlock+irq Andrew Morton
2006-04-12 10:56   ` David Howells
2006-04-12 17:48     ` Trond Myklebust
2006-04-12 18:19       ` David Howells
2006-04-13  3:38         ` Trond Myklebust

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=16476.1144773375@warthog.cambridge.redhat.com \
    --to=dhowells@redhat.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox