public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dipankar Sarma <dipankar@in.ibm.com>
To: viro@parcelfarce.linux.theplanet.co.uk
Cc: Andrew Morton <akpm@digeo.com>, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] reducing overheads in fget/fput
Date: Fri, 2 May 2003 22:47:26 +0530	[thread overview]
Message-ID: <20030502171726.GA1414@in.ibm.com> (raw)
In-Reply-To: <20030428195836.GD1105@in.ibm.com>

On Tue, Apr 29, 2003 at 01:28:36AM +0530, Dipankar Sarma wrote:
> On Mon, Apr 28, 2003 at 08:32:28PM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> > You are.  Have a process share file table at the time of call and
> > have its sibling die in the middle.  Oops - condition that had
> > been true at time of fget_light() (->files->count > 1) is false
> > at the time we fput_light().  Have fun - we had just leaked a
> > reference to struct file.
> 
> That shouldn't be very difficult to fix. For the fget_light/fput_light
> pair in a syscall, we make the files->count == 1 check only once at the 
> beginning. Do you see a problem with that ?

Here is a patch that fixes that. I re-did the measurements with
Andrew's experiment (dd if=/dev/zero of=foo bs=1 count=1M).
[4CPU P3 xeon with 1MB L2 cache and 512MB ram]

kernel           sys time     std-dev
------------     --------     -------
UP - vanilla     2.104        0.028
SMP - vanilla    2.976        0.023
UP - file        1.867        0.019
SMP - file       2.719        0.026

Thanks
Dipankar


diff -urN linux-2.5.66-base/fs/file_table.c linux-2.5.66-file/fs/file_table.c
--- linux-2.5.66-base/fs/file_table.c	2003-03-25 03:29:55.000000000 +0530
+++ linux-2.5.66-file/fs/file_table.c	2003-05-02 21:23:27.000000000 +0530
@@ -147,6 +147,13 @@
 		__fput(file);
 }
 
+void fput_light(struct file * file, int flag)
+{
+	if (unlikely(flag))
+		if (atomic_dec_and_test(&file->f_count))
+			__fput(file);
+}
+
 /* __fput is called from task context when aio completion releases the last
  * last use of a struct file *.  Do not use otherwise.
  */
@@ -190,6 +197,34 @@
 	return file;
 }
 
+/*
+ * Lightweight file lookup - no refcnt increment if fd table isn't shared. 
+ * You can use this only if it is guranteed that the current task already 
+ * holds a refcnt to that file. That check has to be done at fget() only
+ * and a flag is returned to be passed to the corresponding fput_light().
+ * There must not be a cloning between an fget_light/fput_light pair.
+ */
+struct file *fget_light(unsigned int fd, int *flag)
+{
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	*flag = 0;
+	if (likely((atomic_read(&files->count) == 1))) {
+		file = fcheck(fd);
+	} else {
+		read_lock(&files->file_lock);
+		file = fcheck(fd);
+		if (file) {
+			get_file(file);
+			*flag = 1;
+		}
+		read_unlock(&files->file_lock);
+	}
+	return file;
+}
+
+
 void put_filp(struct file *file)
 {
 	if (atomic_dec_and_test(&file->f_count)) {
diff -urN linux-2.5.66-base/fs/read_write.c linux-2.5.66-file/fs/read_write.c
--- linux-2.5.66-base/fs/read_write.c	2003-03-25 03:30:51.000000000 +0530
+++ linux-2.5.66-file/fs/read_write.c	2003-05-02 13:42:53.000000000 +0530
@@ -115,9 +115,10 @@
 {
 	off_t retval;
 	struct file * file;
+	int flag;
 
 	retval = -EBADF;
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (!file)
 		goto bad;
 
@@ -128,7 +129,7 @@
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
 	}
-	fput(file);
+	fput_light(file, flag);
 bad:
 	return retval;
 }
@@ -141,9 +142,10 @@
 	int retval;
 	struct file * file;
 	loff_t offset;
+	int flag;
 
 	retval = -EBADF;
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (!file)
 		goto bad;
 
@@ -161,7 +163,7 @@
 			retval = 0;
 	}
 out_putf:
-	fput(file);
+	fput_light(file, flag);
 bad:
 	return retval;
 }
@@ -251,11 +253,12 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_read(file, buf, count, &file->f_pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -265,11 +268,12 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_write(file, buf, count, &file->f_pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -280,14 +284,15 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_read(file, buf, count, &pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -298,14 +303,15 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_write(file, buf, count, &pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -479,11 +485,12 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_readv(file, vec, vlen, &file->f_pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -494,11 +501,12 @@
 {
 	struct file *file;
 	ssize_t ret = -EBADF;
+	int flag;
 
-	file = fget(fd);
+	file = fget_light(fd, &flag);
 	if (file) {
 		ret = vfs_writev(file, vec, vlen, &file->f_pos);
-		fput(file);
+		fput_light(file, flag);
 	}
 
 	return ret;
@@ -511,12 +519,13 @@
 	struct inode * in_inode, * out_inode;
 	loff_t pos;
 	ssize_t retval;
+	int flag_in, flag_out;
 
 	/*
 	 * Get input file, and verify that it is ok..
 	 */
 	retval = -EBADF;
-	in_file = fget(in_fd);
+	in_file = fget_light(in_fd, &flag_in);
 	if (!in_file)
 		goto out;
 	if (!(in_file->f_mode & FMODE_READ))
@@ -539,7 +548,7 @@
 	 * Get output file, and verify that it is ok..
 	 */
 	retval = -EBADF;
-	out_file = fget(out_fd);
+	out_file = fget_light(out_fd, &flag_out);
 	if (!out_file)
 		goto fput_in;
 	if (!(out_file->f_mode & FMODE_WRITE))
@@ -579,9 +588,9 @@
 		retval = -EOVERFLOW;
 
 fput_out:
-	fput(out_file);
+	fput_light(out_file, flag_out);
 fput_in:
-	fput(in_file);
+	fput_light(in_file, flag_in);
 out:
 	return retval;
 }
diff -urN linux-2.5.66-base/include/linux/file.h linux-2.5.66-file/include/linux/file.h
--- linux-2.5.66-base/include/linux/file.h	2003-03-25 03:29:52.000000000 +0530
+++ linux-2.5.66-file/include/linux/file.h	2003-05-02 13:43:31.000000000 +0530
@@ -35,7 +35,9 @@
 
 extern void FASTCALL(__fput(struct file *));
 extern void FASTCALL(fput(struct file *));
+extern void FASTCALL(fput_light(struct file *, int));
 extern struct file * FASTCALL(fget(unsigned int fd));
+extern struct file * FASTCALL(fget_light(unsigned int fd, int *flag));
 extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);

  reply	other threads:[~2003-05-02 17:03 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-04-28 16:52 [PATCH] reducing overheads in fget/fput Dipankar Sarma
2003-04-28 19:32 ` viro
2003-04-28 19:58   ` Dipankar Sarma
2003-05-02 17:17     ` Dipankar Sarma [this message]
2003-05-02 20:54       ` Andrew Morton
2003-05-03  3:53         ` Dipankar Sarma
2003-05-03  4:00           ` Andrew Morton
2003-05-03  4:24             ` Dipankar Sarma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030502171726.GA1414@in.ibm.com \
    --to=dipankar@in.ibm.com \
    --cc=akpm@digeo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@parcelfarce.linux.theplanet.co.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox