All of lore.kernel.org
 help / color / mirror / Atom feed
From: Robert Love <rml@tech9.net>
To: torvalds@transmeta.com
Cc: viro@math.psu.edu, linux-kernel@vger.kernel.org
Subject: [PATCH] 2.5: push BKL out of llseek
Date: 29 Jan 2002 19:00:37 -0500	[thread overview]
Message-ID: <1012348838.817.50.camel@phantasy> (raw)

This patch pushes the BKL out of llseek() and into the individual llseek
methods.  For generic_file_llseek, I replaced it with the inode
semaphore.  The lock contention is noticeable even on 2-way systems. 
Since we simply push the BKL further down the call chain (its the llseek
method's responsibilities now) we aren't doing anything hackish or
unsafe.

I suspect some (Al) may consider this a suboptimal solution, and I
agree.  However it is a first step -- tightening the locks -- toward a
better locking scheme, which is hopefully devoid of the BKL.

The best scores from a slew of dbench runs:

	(2.5.3-pre6 on 2-way Athlon)
	with patch	133.651	165.575	66.9876	37.5297	24.9436
	without patch	132.541	160.774	60.1174	33.2065	22.0126

Interestingly, the shorter lock times corresponded to an 8.9% reduction
in scheduling latency (under the above dbench load) with the preemptible
kernel.

	Robert Love

diff -urN linux-2.5.3-pre6/Documentation/filesystems/Locking linux/Documentation/filesystems/Locking
--- linux-2.5.3-pre6/Documentation/filesystems/Locking	Mon Jan 28 18:30:27 2002
+++ linux/Documentation/filesystems/Locking	Tue Jan 29 17:07:37 2002
@@ -219,7 +219,7 @@
 locking rules:
 	All except ->poll() may block.
 		BKL
-llseek:		yes
+llseek:		yes	(see below)
 read:		no
 write:		no
 readdir:	yes	(see below)
@@ -235,6 +235,10 @@
 readv:		no
 writev:		no
 
+->llseek() locking has moved from llseek to the individual llseek
+implementations.  If your fs is not using generic_file_llseek, you
+need to acquire and release the BKL in your ->llseek().
+
 ->open() locking is in-transit: big lock partially moved into the methods.
 The only exception is ->open() in the instances of file_operations that never
 end up in ->i_fop/->proc_fops, i.e. ones that belong to character devices
diff -urN linux-2.5.3-pre6/fs/block_dev.c linux/fs/block_dev.c
--- linux-2.5.3-pre6/fs/block_dev.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/block_dev.c	Tue Jan 29 16:49:52 2002
@@ -170,6 +170,8 @@
 	loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size;
 	loff_t retval;
 
+	lock_kernel();
+
 	switch (origin) {
 		case 2:
 			offset += size;
@@ -186,6 +188,7 @@
 		}
 		retval = offset;
 	}
+	unlock_kernel();
 	return retval;
 }
 	
diff -urN linux-2.5.3-pre6/fs/hfs/file_cap.c linux/fs/hfs/file_cap.c
--- linux-2.5.3-pre6/fs/hfs/file_cap.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/hfs/file_cap.c	Tue Jan 29 16:49:52 2002
@@ -91,6 +91,8 @@
 {
 	long long retval;
 
+	lock_kernel();
+
 	switch (origin) {
 		case 2:
 			offset += file->f_dentry->d_inode->i_size;
@@ -106,6 +108,7 @@
 		}
 		retval = offset;
 	}
+	unlock_kernel();
 	return retval;
 }
 
diff -urN linux-2.5.3-pre6/fs/hfs/file_hdr.c linux/fs/hfs/file_hdr.c
--- linux-2.5.3-pre6/fs/hfs/file_hdr.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/hfs/file_hdr.c	Tue Jan 29 16:49:52 2002
@@ -347,6 +347,8 @@
 {
 	long long retval;
 
+	lock_kernel();
+
 	switch (origin) {
 		case 2:
 			offset += file->f_dentry->d_inode->i_size;
@@ -362,6 +364,7 @@
 		}
 		retval = offset;
 	}
+	unlock_kernel();
 	return retval;
 }
 
diff -urN linux-2.5.3-pre6/fs/hpfs/dir.c linux/fs/hpfs/dir.c
--- linux-2.5.3-pre6/fs/hpfs/dir.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/hpfs/dir.c	Tue Jan 29 16:49:52 2002
@@ -29,6 +29,9 @@
 	struct inode *i = filp->f_dentry->d_inode;
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
 	struct super_block *s = i->i_sb;
+
+	lock_kernel();
+
 	/*printk("dir lseek\n");*/
 	if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
 	hpfs_lock_inode(i);
@@ -40,10 +43,12 @@
 	}
 	hpfs_unlock_inode(i);
 	ok:
+	unlock_kernel();
 	return filp->f_pos = new_off;
 	fail:
 	hpfs_unlock_inode(i);
 	/*printk("illegal lseek: %016llx\n", new_off);*/
+	unlock_kernel();
 	return -ESPIPE;
 }
 
diff -urN linux-2.5.3-pre6/fs/proc/generic.c linux/fs/proc/generic.c
--- linux-2.5.3-pre6/fs/proc/generic.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/proc/generic.c	Tue Jan 29 16:49:52 2002
@@ -16,6 +16,7 @@
 #include <linux/stat.h>
 #define __NO_VERSION__
 #include <linux/module.h>
+#include <linux/smp_lock.h>
 #include <asm/bitops.h>
 
 static ssize_t proc_file_read(struct file * file, char * buf,
@@ -140,22 +141,30 @@
 static loff_t
 proc_file_lseek(struct file * file, loff_t offset, int orig)
 {
+    lock_kernel();
+
     switch (orig) {
     case 0:
 	if (offset < 0)
-	    return -EINVAL;    
+	    goto out;
 	file->f_pos = offset;
+	unlock_kernel();
 	return(file->f_pos);
     case 1:
 	if (offset + file->f_pos < 0)
-	    return -EINVAL;    
+	    goto out;
 	file->f_pos += offset;
+	unlock_kernel();
 	return(file->f_pos);
     case 2:
-	return(-EINVAL);
+	goto out;
     default:
-	return(-EINVAL);
+	goto out;
     }
+
+out:
+    unlock_kernel();
+    return -EINVAL;
 }
 
 /*
diff -urN linux-2.5.3-pre6/fs/read_write.c linux/fs/read_write.c
--- linux-2.5.3-pre6/fs/read_write.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/read_write.c	Tue Jan 29 16:49:52 2002
@@ -29,6 +29,8 @@
 {
 	long long retval;
 
+	down(&file->f_dentry->d_inode->i_sem);
+
 	switch (origin) {
 		case 2:
 			offset += file->f_dentry->d_inode->i_size;
@@ -45,6 +47,7 @@
 		}
 		retval = offset;
 	}
+	up(&file->f_dentry->d_inode->i_sem);
 	return retval;
 }
 
@@ -57,6 +60,8 @@
 {
 	long long retval;
 
+	lock_kernel();
+
 	switch (origin) {
 		case 2:
 			offset += file->f_dentry->d_inode->i_size;
@@ -73,6 +78,7 @@
 		}
 		retval = offset;
 	}
+	unlock_kernel();
 	return retval;
 }
 
@@ -84,9 +90,7 @@
 	fn = default_llseek;
 	if (file->f_op && file->f_op->llseek)
 		fn = file->f_op->llseek;
-	lock_kernel();
 	retval = fn(file, offset, origin);
-	unlock_kernel();
 	return retval;
 }
 
diff -urN linux-2.5.3-pre6/fs/ufs/file.c linux/fs/ufs/file.c
--- linux-2.5.3-pre6/fs/ufs/file.c	Mon Jan 28 18:30:22 2002
+++ linux/fs/ufs/file.c	Tue Jan 29 16:49:52 2002
@@ -47,6 +47,8 @@
 	long long retval;
 	struct inode *inode = file->f_dentry->d_inode;
 
+	lock_kernel();
+
 	switch (origin) {
 		case 2:
 			offset += inode->i_size;
@@ -64,6 +66,7 @@
 		}
 		retval = offset;
 	}
+	unlock_kernel();
 	return retval;
 }


             reply	other threads:[~2002-01-30  0:08 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-01-30  0:00 Robert Love [this message]
2002-01-30  0:09 ` [PATCH] 2.5: push BKL out of llseek Linus Torvalds
2002-01-30  0:41   ` Robert Love
2002-01-30  0:52     ` Linus Torvalds
2002-01-30  2:24       ` Robert Love
2002-01-30  1:26     ` Andrew Morton
2002-01-30  2:16       ` Linus Torvalds
2002-01-30  2:20       ` Robert Love
2002-01-30  2:20         ` Andrew Morton
2002-01-30  2:21         ` Dave Jones
2002-01-30  2:37           ` Robert Love
2002-01-30  2:50         ` Nigel Gamble
2002-01-30  3:19           ` Andrew Morton
2002-01-30  9:34             ` Nigel Gamble
2002-01-30 10:36         ` Russell King
2002-01-30  4:54   ` Alexander Viro
2002-01-30  8:00     ` Trond Myklebust
2002-01-30 13:39       ` Robert Love
2002-01-30  4:50 ` Anton Blanchard
2002-01-30  5:03 ` Robert Love
  -- strict thread matches above, loose matches on Subject: below --
2002-01-30 21:14 Martin Wirth
2002-01-31 15:39 Martin Wirth
2002-01-31 21:06 ` Nigel Gamble
2002-02-01 19:29 John Hawkes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1012348838.817.50.camel@phantasy \
    --to=rml@tech9.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    --cc=viro@math.psu.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.