* Re: [patchset] Lockfree fd lookup 0 of 5
2004-08-02 10:10 Ravikiran G Thirumalai
@ 2004-08-02 10:18 ` Ravikiran G Thirumalai
2004-08-02 16:56 ` viro
1 sibling, 0 replies; 12+ messages in thread
From: Ravikiran G Thirumalai @ 2004-08-02 10:18 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Greg KH, dipankar, viro
The following patch adds kref_lf_xxx api and kref_lf_get_rcu to enable
refcounting of objects part of a lockfree collection.
Thanks,
Kiran
D:
D: kref-lf-2.6.7.patch:
D: This patch depends on the kref shrinkage patches. This provides
D: infrastructure for lockfree refcounting and adds kref_lf_xxx api
D: for lockfree refcounting. This patch also changes kref_xxx to
D: static inlines since kref_xxx are mostly one-two liners
D:
diff -ruN -X /home/kiran/dontdiff linux-2.6.7/include/linux/kref.h test-2.6.7/include/linux/kref.h
--- linux-2.6.7/include/linux/kref.h 2004-08-01 22:37:46.000000000 +0530
+++ test-2.6.7/include/linux/kref.h 2004-08-01 22:50:06.000000000 +0530
@@ -16,16 +16,264 @@
#define _KREF_H_
#include <linux/types.h>
+#include <linux/kernel.h>
#include <asm/atomic.h>
-
struct kref {
atomic_t refcount;
};
-void kref_init(struct kref *kref);
-struct kref *kref_get(struct kref *kref);
-void kref_put(struct kref *kref, void (*release) (struct kref *kref));
+/**
+ * kref_init - initialize object.
+ * @kref: object in question.
+ */
+static inline void kref_init(struct kref *kref)
+{
+ atomic_set(&kref->refcount, 1);
+}
+
+/**
+ * kref_get - increment refcount for object.
+ * @kref: object.
+ */
+static inline struct kref *kref_get(struct kref *kref)
+{
+ WARN_ON(!atomic_read(&kref->refcount));
+ atomic_inc(&kref->refcount);
+ return kref;
+}
+
+/**
+ * kref_put - decrement refcount for object.
+ * @kref: object.
+ * @release: pointer to the function that will clean up the object
+ * when the last reference to the object is released.
+ * This pointer is required.
+ *
+ * Decrement the refcount, and if 0, call release().
+ */
+static inline void
+kref_put(struct kref *kref, void (*release) (struct kref * kref))
+{
+ if (atomic_dec_and_test(&kref->refcount)) {
+ WARN_ON(release == NULL);
+ pr_debug("kref cleaning up\n");
+ release(kref);
+ }
+}
+
+/**
+ * kref_read - Return the refcount value.
+ * @kref: object.
+ */
+static inline int kref_read(struct kref *kref)
+{
+ return atomic_read(&kref->refcount);
+}
+
+/**
+ * kref_put_last - decrement refcount for object.
+ * @kref: object.
+ *
+ * Decrement the refcount, and if 0 return true.
+ * Returns false otherwise.
+ * Use this only if you cannot use kref_put -- when the
+ * release function of kref_put needs more than just the
+ * refcounted object. Use of kref_put_last when kref_put
+ * can do will be frowned upon.
+ */
+static inline int kref_put_last(struct kref *kref)
+{
+ return atomic_dec_and_test(&kref->refcount);
+}
+
+/*
+ * Refcounter framework for elements of lists/arrays protected by
+ * RCU.
+ *
+ * Refcounting on elements of lists which are protected by traditional
+ * reader/writer spinlocks or semaphores are straight forward as in:
+ *
+ * 1. 2.
+ * add() search_and_reference()
+ * { {
+ * alloc_object read_lock(&list_lock);
+ * ... search_for_element
+ * kref_init(&el->rc) kref_get(&el->rc);
+ * write_lock(&list_lock); ...
+ * add_element read_unlock(&list_lock);
+ * ... ...
+ * write_unlock(&list_lock); }
+ * }
+ *
+ * 3. 4.
+ * release_referenced() delete()
+ * { {
+ * ... write_lock(&list_lock);
+ * if (kref_put_last(&el->rc)) ...
+ * start_cleanup_object ...
+ * free_object delete_element
+ * ... write_unlock(&list_lock);
+ * } ...
+ * if (kref_put_last(&el->rc))
+ * start_cleanup_object
+ * free_object
+ * }
+ *
+ * If this list/array is made lock free using rcu as in changing the
+ * write_lock in add() and delete() to spin_lock and changing read_lock
+ * in search_and_reference to rcu_read_lock(), the kref_get in
+ * search_and_reference could potentially hold reference to an element which
+ * has already been deleted from the list/array. kref_lf_get_rcu takes
+ * care of this scenario. search_and_reference should look as;
+ * 2.
+ * search_and_reference()
+ * {
+ * rcu_read_lock();
+ * search_for_element
+ * if (!kref_lf_get_rcu(&el->rc)) {
+ * rcu_read_unlock();
+ * return FAIL;
+ * }
+ * ...
+ * ...
+ * rcu_read_unlock();
+ * }
+ *
+ * Of course, free_object after kref_put_last should be batched using call_rcu.
+ * Sometimes, reference to the element need to be obtained in the
+ * update (write) stream. In such cases, kref_lf_get_rcu might be an overkill
+ * since the spinlock serialising list updates are held. kref_lf_get
+ * is to be used in such cases.
+ * Note: Except for kref_lf_get_rcu, kref_lf_xxx api are the same as
+ * corresponding kref_xxx api for most arches. However, for arches which
+ * donot have cmpxchg kref_lf_xxx api use a hashed spinlock implementation to
+ * implement kref_lf_get_rcu, and acquire the same hashed spinlock for
+ * kref_lf_get etc to preserve atomicity.
+ * Note: Use kref_lf_xxx api only if you need to use kref_lf_get_rcu on the
+ * refcounter atleast at one place. Mixing kref_lf_xx and kref_xxx api
+ * might lead to races.
+ *
+ */
+
+#ifdef __HAVE_ARCH_CMPXCHG
+
+#define kref_lf_init kref_init
+#define kref_lf_get kref_get
+#define kref_lf_put kref_put
+#define kref_lf_put_last kref_put_last
+
+/*
+ * cmpxchg is needed on UP too, if deletions to the list/array can happen
+ * in interrupt context.
+ */
+
+/**
+ * kref_lf_get_rcu - Take reference to an object of a lockfree collection
+ * by traversing a lockfree list/array.
+ * @kref: object.
+ *
+ * Try and increment the refcount by 1. The increment might fail if
+ * the refcounted object has been through a 1 to 0 transition and
+ * is no longer part of the lockfree list.
+ * Returns non-zero on successful increment and zero otherwise.
+ */
+static inline int kref_lf_get_rcu(struct kref *kref)
+{
+ int c, old;
+ c = atomic_read(&kref->refcount);
+ while (c && (old = cmpxchg(&kref->refcount.counter, c, c + 1)) != c)
+ c = old;
+ return c;
+}
+
+#else /* !__HAVE_ARCH_CMPXCHG */
+
+#include <linux/spinlock.h>
+
+/*
+ * We use an array of spinlocks for the krefs -- similar to ones in sparc
+ * 32 bit atomic_t implementations, and a hash function similar to that
+ * for our refcounting needs.
+ * Can't help multiprocessors which donot have cmpxchg :(
+ */
+
+#ifdef CONFIG_SMP
+#define KREF_HASH_SIZE 4
+#define KREF_HASH(k) \
+ (&__kref_hash[(((unsigned long)k)>>8) & (KREF_HASH_SIZE-1)])
+#else
+#define KREF_HASH_SIZE 1
+#define KREF_HASH(k) &__kref_hash[0]
+#endif /* CONFIG_SMP */
+
+extern spinlock_t __kref_hash[KREF_HASH_SIZE];
+
+static inline void kref_lf_init(struct kref *kref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(KREF_HASH(kref), flags);
+ kref->refcount.counter = 1;
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+}
+
+static inline void kref_lf_get(struct kref *kref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(KREF_HASH(kref), flags);
+ kref->refcount.counter += 1;
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+}
+
+static inline void
+kref_lf_put(struct kref *kref, void (*release) (struct kref * kref))
+{
+ unsigned long flags;
+ spin_lock_irqsave(KREF_HASH(kref), flags);
+ kref->refcount.counter--;
+ if (!kref->refcount.counter) {
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+ WARN_ON(release == NULL);
+ pr_debug("kref cleaning up\n");
+ release(kref);
+ } else {
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+ }
+}
+
+static inline int kref_lf_put_last(struct kref *kref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(KREF_HASH(kref), flags);
+ kref->refcount.counter--;
+ if (!kref->refcount.counter) {
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+ return 1;
+ } else {
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+ return 0;
+ }
+}
+
+static inline int kref_lf_get_rcu(struct kref *kref)
+{
+ int ret;
+ unsigned long flags;
+ spin_lock_irqsave(KREF_HASH(kref), flags);
+ if (kref->refcount.counter)
+ ret = kref->refcount.counter++;
+ else
+ ret = 0;
+ spin_unlock_irqrestore(KREF_HASH(kref), flags);
+ return ret;
+}
+
+#endif /* __HAVE_ARCH_CMPXCHG */
+/* Spinlocks are not needed in this case, if atomic_read is used */
+static inline int kref_lf_read(struct kref *kref)
+{
+ return atomic_read(&kref->refcount);
+}
-#endif /* _KREF_H_ */
+#endif /* _KREF_H_ */
diff -ruN -X /home/kiran/dontdiff linux-2.6.7/lib/kref.c test-2.6.7/lib/kref.c
--- linux-2.6.7/lib/kref.c 2004-08-01 22:37:46.000000000 +0530
+++ test-2.6.7/lib/kref.c 2004-08-01 21:41:19.000000000 +0530
@@ -11,49 +11,12 @@
*
*/
-/* #define DEBUG */
-
#include <linux/kref.h>
#include <linux/module.h>
-/**
- * kref_init - initialize object.
- * @kref: object in question.
- */
-void kref_init(struct kref *kref)
-{
- atomic_set(&kref->refcount,1);
-}
-
-/**
- * kref_get - increment refcount for object.
- * @kref: object.
- */
-struct kref *kref_get(struct kref *kref)
-{
- WARN_ON(!atomic_read(&kref->refcount));
- atomic_inc(&kref->refcount);
- return kref;
-}
-
-/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object
- * when the last reference to the object is released.
- * This pointer is required.
- *
- * Decrement the refcount, and if 0, call release().
- */
-void kref_put(struct kref *kref, void (*release) (struct kref *kref))
-{
- WARN_ON(release == NULL);
- if (atomic_dec_and_test(&kref->refcount)) {
- pr_debug("kref cleaning up\n");
- release(kref);
- }
-}
-
-EXPORT_SYMBOL(kref_init);
-EXPORT_SYMBOL(kref_get);
-EXPORT_SYMBOL(kref_put);
+#ifndef __HAVE_ARCH_CMPXCHG
+spinlock_t __kref_hash[KREF_HASH_SIZE] = {
+ [0 ... (KREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+};
+EXPORT_SYMBOL(__kref_hash);
+#endif
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [patchset] Lockfree fd lookup 0 of 5
2004-08-02 10:10 Ravikiran G Thirumalai
2004-08-02 10:18 ` Ravikiran G Thirumalai
@ 2004-08-02 16:56 ` viro
2004-08-02 20:07 ` David S. Miller
2004-08-03 9:23 ` Ravikiran G Thirumalai
1 sibling, 2 replies; 12+ messages in thread
From: viro @ 2004-08-02 16:56 UTC (permalink / raw)
To: Ravikiran G Thirumalai; +Cc: Andrew Morton, linux-kernel, Greg KH, dipankar
On Mon, Aug 02, 2004 at 03:40:55PM +0530, Ravikiran G Thirumalai wrote:
> Here is a patchset to eliminate taking struct files_struct.file_lock on
> reader side using rcu and rcu based refcounting. These patches
> extend the kref api to include kref_lf_xxx api and kref_lf_get_rcu to
> do lockfree refcounting, and use the same. As posted earlier, since fd
> lookups (struct files_struct.fd[]) will be lock free with these patches,
> threaded workloads doing lots of io should see performance benefits
> due to this patchset. I have observed 13-15% improvement with tiobench
> on a 4 way xeon with this patchset.
How about this for comparison? That's just a dumb "convert to rwlock"
patch; we can be smarter in e.g. close_on_exec handling, but that's a
separate story.
diff -urN RC8-rc2-bk12/arch/mips/kernel/irixioctl.c RC8-rc2-bk12-current/arch/mips/kernel/irixioctl.c
--- RC8-rc2-bk12/arch/mips/kernel/irixioctl.c 2004-03-17 15:29:37.000000000 -0500
+++ RC8-rc2-bk12-current/arch/mips/kernel/irixioctl.c 2004-08-02 12:24:00.000000000 -0400
@@ -33,7 +33,7 @@
struct file *filp;
struct tty_struct *ttyp = NULL;
- spin_lock(¤t->files->file_lock);
+ read_lock(¤t->files->file_lock);
filp = fcheck(fd);
if(filp && filp->private_data) {
ttyp = (struct tty_struct *) filp->private_data;
@@ -41,7 +41,7 @@
if(ttyp->magic != TTY_MAGIC)
ttyp =NULL;
}
- spin_unlock(¤t->files->file_lock);
+ read_unlock(¤t->files->file_lock);
return ttyp;
}
diff -urN RC8-rc2-bk12/arch/sparc64/solaris/ioctl.c RC8-rc2-bk12-current/arch/sparc64/solaris/ioctl.c
--- RC8-rc2-bk12/arch/sparc64/solaris/ioctl.c 2004-08-02 12:19:34.000000000 -0400
+++ RC8-rc2-bk12-current/arch/sparc64/solaris/ioctl.c 2004-08-02 12:24:23.000000000 -0400
@@ -294,15 +294,15 @@
{
struct inode *ino;
/* I wonder which of these tests are superfluous... --patrik */
- spin_lock(¤t->files->file_lock);
+ read_lock(¤t->files->file_lock);
if (! current->files->fd[fd] ||
! current->files->fd[fd]->f_dentry ||
! (ino = current->files->fd[fd]->f_dentry->d_inode) ||
! ino->i_sock) {
- spin_unlock(¤t->files->file_lock);
+ read_unlock(¤t->files->file_lock);
return TBADF;
}
- spin_unlock(¤t->files->file_lock);
+ read_unlock(¤t->files->file_lock);
switch (cmd & 0xff) {
case 109: /* SI_SOCKPARAMS */
diff -urN RC8-rc2-bk12/drivers/char/tty_io.c RC8-rc2-bk12-current/drivers/char/tty_io.c
--- RC8-rc2-bk12/drivers/char/tty_io.c 2004-08-02 12:19:35.000000000 -0400
+++ RC8-rc2-bk12-current/drivers/char/tty_io.c 2004-08-02 12:24:52.139649424 -0400
@@ -1978,7 +1978,7 @@
}
task_lock(p);
if (p->files) {
- spin_lock(&p->files->file_lock);
+ read_lock(&p->files->file_lock);
for (i=0; i < p->files->max_fds; i++) {
filp = fcheck_files(p->files, i);
if (!filp)
@@ -1992,7 +1992,7 @@
break;
}
}
- spin_unlock(&p->files->file_lock);
+ read_unlock(&p->files->file_lock);
}
task_unlock(p);
}
diff -urN RC8-rc2-bk12/fs/exec.c RC8-rc2-bk12-current/fs/exec.c
--- RC8-rc2-bk12/fs/exec.c 2004-08-02 12:19:36.000000000 -0400
+++ RC8-rc2-bk12-current/fs/exec.c 2004-08-02 12:40:31.000000000 -0400
@@ -762,7 +762,7 @@
{
long j = -1;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
for (;;) {
unsigned long set, i;
@@ -774,16 +774,16 @@
if (!set)
continue;
files->close_on_exec->fds_bits[j] = 0;
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
sys_close(i);
}
}
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
}
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
}
int flush_old_exec(struct linux_binprm * bprm)
diff -urN RC8-rc2-bk12/fs/fcntl.c RC8-rc2-bk12-current/fs/fcntl.c
--- RC8-rc2-bk12/fs/fcntl.c 2004-07-18 09:08:41.000000000 -0400
+++ RC8-rc2-bk12-current/fs/fcntl.c 2004-08-02 12:39:55.000000000 -0400
@@ -22,21 +22,21 @@
void fastcall set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (flag)
FD_SET(fd, files->close_on_exec);
else
FD_CLR(fd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
}
static inline int get_close_on_exec(unsigned int fd)
{
struct files_struct *files = current->files;
int res;
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
res = FD_ISSET(fd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
return res;
}
@@ -133,15 +133,15 @@
struct files_struct * files = current->files;
int fd;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
fd = locate_fd(files, file, start);
if (fd >= 0) {
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
fd_install(fd, file);
} else {
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
fput(file);
}
@@ -154,7 +154,7 @@
struct file * file, *tofree;
struct files_struct * files = current->files;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (!(file = fcheck(oldfd)))
goto out_unlock;
err = newfd;
@@ -185,7 +185,7 @@
files->fd[newfd] = file;
FD_SET(newfd, files->open_fds);
FD_CLR(newfd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
if (tofree)
filp_close(tofree, files);
@@ -193,11 +193,11 @@
out:
return err;
out_unlock:
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
goto out;
out_fput:
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
fput(file);
goto out;
}
diff -urN RC8-rc2-bk12/fs/file.c RC8-rc2-bk12-current/fs/file.c
--- RC8-rc2-bk12/fs/file.c 2004-07-18 09:08:41.000000000 -0400
+++ RC8-rc2-bk12-current/fs/file.c 2004-08-02 12:30:35.000000000 -0400
@@ -65,7 +65,7 @@
goto out;
nfds = files->max_fds;
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
/*
* Expand to the max in easy steps, and keep expanding it until
@@ -89,7 +89,7 @@
error = -ENOMEM;
new_fds = alloc_fd_array(nfds);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (!new_fds)
goto out;
@@ -110,15 +110,15 @@
memset(&new_fds[i], 0,
(nfds-i) * sizeof(struct file *));
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
free_fd_array(old_fds, i);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
}
} else {
/* Somebody expanded the array while we slept ... */
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
free_fd_array(new_fds, nfds);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
}
error = 0;
out:
@@ -167,7 +167,7 @@
goto out;
nfds = files->max_fdset;
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
/* Expand to the max in easy steps */
do {
@@ -183,7 +183,7 @@
error = -ENOMEM;
new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (!new_openset || !new_execset)
goto out;
@@ -208,21 +208,21 @@
nfds = xchg(&files->max_fdset, nfds);
new_openset = xchg(&files->open_fds, new_openset);
new_execset = xchg(&files->close_on_exec, new_execset);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
free_fdset (new_openset, nfds);
free_fdset (new_execset, nfds);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
return 0;
}
/* Somebody expanded the array while we slept ... */
out:
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
if (new_openset)
free_fdset(new_openset, nfds);
if (new_execset)
free_fdset(new_execset, nfds);
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
return error;
}
diff -urN RC8-rc2-bk12/fs/file_table.c RC8-rc2-bk12-current/fs/file_table.c
--- RC8-rc2-bk12/fs/file_table.c 2004-05-10 00:23:35.000000000 -0400
+++ RC8-rc2-bk12-current/fs/file_table.c 2004-08-02 12:31:01.000000000 -0400
@@ -197,11 +197,11 @@
struct file *file;
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file)
get_file(file);
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
return file;
}
@@ -223,13 +223,13 @@
if (likely((atomic_read(&files->count) == 1))) {
file = fcheck_files(files, fd);
} else {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file) {
get_file(file);
*fput_needed = 1;
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
return file;
}
diff -urN RC8-rc2-bk12/fs/open.c RC8-rc2-bk12-current/fs/open.c
--- RC8-rc2-bk12/fs/open.c 2004-06-16 10:26:24.000000000 -0400
+++ RC8-rc2-bk12-current/fs/open.c 2004-08-02 12:33:29.000000000 -0400
@@ -841,7 +841,7 @@
int fd, error;
error = -EMFILE;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
repeat:
fd = find_next_zero_bit(files->open_fds->fds_bits,
@@ -890,7 +890,7 @@
error = fd;
out:
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
return error;
}
@@ -906,9 +906,9 @@
void fastcall put_unused_fd(unsigned int fd)
{
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
__put_unused_fd(files, fd);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
}
EXPORT_SYMBOL(put_unused_fd);
@@ -929,11 +929,11 @@
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (unlikely(files->fd[fd] != NULL))
BUG();
files->fd[fd] = file;
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
}
EXPORT_SYMBOL(fd_install);
@@ -1024,7 +1024,7 @@
struct file * filp;
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ write_lock(&files->file_lock);
if (fd >= files->max_fds)
goto out_unlock;
filp = files->fd[fd];
@@ -1033,11 +1033,11 @@
files->fd[fd] = NULL;
FD_CLR(fd, files->close_on_exec);
__put_unused_fd(files, fd);
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
return filp_close(filp, files);
out_unlock:
- spin_unlock(&files->file_lock);
+ write_unlock(&files->file_lock);
return -EBADF;
}
diff -urN RC8-rc2-bk12/fs/proc/base.c RC8-rc2-bk12-current/fs/proc/base.c
--- RC8-rc2-bk12/fs/proc/base.c 2004-07-18 09:08:42.000000000 -0400
+++ RC8-rc2-bk12-current/fs/proc/base.c 2004-08-02 12:55:13.630740688 -0400
@@ -191,16 +191,16 @@
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file) {
*mnt = mntget(file->f_vfsmnt);
*dentry = dget(file->f_dentry);
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
return 0;
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
}
return -ENOENT;
@@ -805,7 +805,7 @@
files = get_files_struct(p);
if (!files)
goto out;
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (fd = filp->f_pos-2;
fd < files->max_fds;
fd++, filp->f_pos++) {
@@ -813,7 +813,7 @@
if (!fcheck_files(files, fd))
continue;
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
j = NUMBUF;
i = fd;
@@ -825,12 +825,12 @@
ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
break;
}
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
}
out:
@@ -1003,9 +1003,9 @@
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
if (fcheck_files(files, fd)) {
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
if (task_dumpable(task)) {
inode->i_uid = task->euid;
@@ -1017,7 +1017,7 @@
security_task_to_inode(task, inode);
return 1;
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
}
d_drop(dentry);
@@ -1109,7 +1109,7 @@
if (!files)
goto out_unlock;
inode->i_mode = S_IFLNK;
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (!file)
goto out_unlock2;
@@ -1117,7 +1117,7 @@
inode->i_mode |= S_IRUSR | S_IXUSR;
if (file->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR;
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
@@ -1127,7 +1127,7 @@
return NULL;
out_unlock2:
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
put_files_struct(files);
out_unlock:
iput(inode);
diff -urN RC8-rc2-bk12/fs/select.c RC8-rc2-bk12-current/fs/select.c
--- RC8-rc2-bk12/fs/select.c 2003-10-09 17:34:47.000000000 -0400
+++ RC8-rc2-bk12-current/fs/select.c 2004-08-02 12:34:06.000000000 -0400
@@ -184,9 +184,9 @@
int retval, i;
long __timeout = *timeout;
- spin_lock(¤t->files->file_lock);
+ read_lock(¤t->files->file_lock);
retval = max_select_fd(n, fds);
- spin_unlock(¤t->files->file_lock);
+ read_unlock(¤t->files->file_lock);
if (retval < 0)
return retval;
diff -urN RC8-rc2-bk12/include/linux/file.h RC8-rc2-bk12-current/include/linux/file.h
--- RC8-rc2-bk12/include/linux/file.h 2004-05-10 00:23:41.000000000 -0400
+++ RC8-rc2-bk12-current/include/linux/file.h 2004-08-02 12:35:52.374278504 -0400
@@ -21,7 +21,7 @@
*/
struct files_struct {
atomic_t count;
- spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
+ rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
int max_fds;
int max_fdset;
int next_fd;
diff -urN RC8-rc2-bk12/include/linux/init_task.h RC8-rc2-bk12-current/include/linux/init_task.h
--- RC8-rc2-bk12/include/linux/init_task.h 2004-07-18 09:08:44.000000000 -0400
+++ RC8-rc2-bk12-current/include/linux/init_task.h 2004-08-02 12:35:37.000000000 -0400
@@ -6,7 +6,7 @@
#define INIT_FILES \
{ \
.count = ATOMIC_INIT(1), \
- .file_lock = SPIN_LOCK_UNLOCKED, \
+ .file_lock = RW_LOCK_UNLOCKED, \
.max_fds = NR_OPEN_DEFAULT, \
.max_fdset = __FD_SETSIZE, \
.next_fd = 0, \
diff -urN RC8-rc2-bk12/kernel/fork.c RC8-rc2-bk12-current/kernel/fork.c
--- RC8-rc2-bk12/kernel/fork.c 2004-08-02 12:19:37.000000000 -0400
+++ RC8-rc2-bk12-current/kernel/fork.c 2004-08-02 12:37:08.000000000 -0400
@@ -680,7 +680,7 @@
atomic_set(&newf->count, 1);
- newf->file_lock = SPIN_LOCK_UNLOCKED;
+ newf->file_lock = RW_LOCK_UNLOCKED;
newf->next_fd = 0;
newf->max_fds = NR_OPEN_DEFAULT;
newf->max_fdset = __FD_SETSIZE;
@@ -693,13 +693,13 @@
size = oldf->max_fdset;
if (size > __FD_SETSIZE) {
newf->max_fdset = 0;
- spin_lock(&newf->file_lock);
+ write_lock(&newf->file_lock);
error = expand_fdset(newf, size-1);
- spin_unlock(&newf->file_lock);
+ write_unlock(&newf->file_lock);
if (error)
goto out_release;
}
- spin_lock(&oldf->file_lock);
+ read_lock(&oldf->file_lock);
open_files = count_open_files(oldf, size);
@@ -710,15 +710,15 @@
*/
nfds = NR_OPEN_DEFAULT;
if (open_files > nfds) {
- spin_unlock(&oldf->file_lock);
+ read_unlock(&oldf->file_lock);
newf->max_fds = 0;
- spin_lock(&newf->file_lock);
+ write_lock(&newf->file_lock);
error = expand_fd_array(newf, open_files-1);
- spin_unlock(&newf->file_lock);
+ write_unlock(&newf->file_lock);
if (error)
goto out_release;
nfds = newf->max_fds;
- spin_lock(&oldf->file_lock);
+ read_lock(&oldf->file_lock);
}
old_fds = oldf->fd;
@@ -733,7 +733,7 @@
get_file(f);
*new_fds++ = f;
}
- spin_unlock(&oldf->file_lock);
+ read_unlock(&oldf->file_lock);
/* compute the remainder to be cleared */
size = (newf->max_fds - open_files) * sizeof(struct file *);
diff -urN RC8-rc2-bk12/net/ipv4/netfilter/ipt_owner.c RC8-rc2-bk12-current/net/ipv4/netfilter/ipt_owner.c
--- RC8-rc2-bk12/net/ipv4/netfilter/ipt_owner.c 2004-07-18 09:08:45.000000000 -0400
+++ RC8-rc2-bk12-current/net/ipv4/netfilter/ipt_owner.c 2004-08-02 12:37:58.000000000 -0400
@@ -35,17 +35,17 @@
task_lock(p);
files = p->files;
if(files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (i=0; i < files->max_fds; i++) {
if (fcheck_files(files, i) ==
skb->sk->sk_socket->file) {
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
task_unlock(p);
read_unlock(&tasklist_lock);
return 1;
}
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
task_unlock(p);
} while_each_thread(g, p);
@@ -67,17 +67,17 @@
task_lock(p);
files = p->files;
if(files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (i=0; i < files->max_fds; i++) {
if (fcheck_files(files, i) ==
skb->sk->sk_socket->file) {
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
task_unlock(p);
read_unlock(&tasklist_lock);
return 1;
}
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
task_unlock(p);
out:
@@ -101,14 +101,14 @@
task_lock(p);
files = p->files;
if (files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (i=0; i < files->max_fds; i++) {
if (fcheck_files(files, i) == file) {
found = 1;
break;
}
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
task_unlock(p);
if (found)
diff -urN RC8-rc2-bk12/net/ipv6/netfilter/ip6t_owner.c RC8-rc2-bk12-current/net/ipv6/netfilter/ip6t_owner.c
--- RC8-rc2-bk12/net/ipv6/netfilter/ip6t_owner.c 2004-07-18 09:08:45.000000000 -0400
+++ RC8-rc2-bk12-current/net/ipv6/netfilter/ip6t_owner.c 2004-08-02 12:38:12.000000000 -0400
@@ -34,16 +34,16 @@
task_lock(p);
files = p->files;
if(files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (i=0; i < files->max_fds; i++) {
if (fcheck_files(files, i) == skb->sk->sk_socket->file) {
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
task_unlock(p);
read_unlock(&tasklist_lock);
return 1;
}
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
task_unlock(p);
out:
@@ -67,14 +67,14 @@
task_lock(p);
files = p->files;
if (files) {
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (i=0; i < files->max_fds; i++) {
if (fcheck_files(files, i) == file) {
found = 1;
break;
}
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
task_unlock(p);
if (found)
diff -urN RC8-rc2-bk12/security/selinux/hooks.c RC8-rc2-bk12-current/security/selinux/hooks.c
--- RC8-rc2-bk12/security/selinux/hooks.c 2004-08-02 12:19:37.000000000 -0400
+++ RC8-rc2-bk12-current/security/selinux/hooks.c 2004-08-02 12:39:03.000000000 -0400
@@ -1794,7 +1794,7 @@
AVC_AUDIT_DATA_INIT(&ad,FS);
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
for (;;) {
unsigned long set, i;
int fd;
@@ -1806,7 +1806,7 @@
set = files->open_fds->fds_bits[j];
if (!set)
continue;
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
file = fget(i);
@@ -1838,10 +1838,10 @@
fput(file);
}
}
- spin_lock(&files->file_lock);
+ read_lock(&files->file_lock);
}
- spin_unlock(&files->file_lock);
+ read_unlock(&files->file_lock);
}
static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
^ permalink raw reply [flat|nested] 12+ messages in thread