All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: lkml <linux-kernel@vger.kernel.org>
Subject: [patch 11/13] don't hold i_sem during O_DIRECT writes to blockdevs
Date: Sun, 28 Jul 2002 00:33:23 -0700	[thread overview]
Message-ID: <3D439E43.5F2DEE3D@zip.com.au> (raw)



We're moving in the direction of deprecating the raw driver and
recommending that applications use O_DIRECT reads and writes against
blockdevs.

I don't know how acceptable this will be - there may be
operational/admin reasons for preferring the raw driver, and there are
certainly application porting issues.  So we should continue to support
the raw driver well in 2.6.

One weakness which writes to blockdevs have wrt the raw driver is that
they are serialised under i_sem.  There is no need for this.

This patch changes O_DIRECT writes to blockdevs so that they no longer
run under i_sem.



 filemap.c |   71 ++++++++++++++++++++++++++++++++++++++------------------------
 1 files changed, 44 insertions(+), 27 deletions(-)

--- 2.5.29/mm/filemap.c~o_direct-i_sem	Sat Jul 27 23:39:13 2002
+++ 2.5.29-akpm/mm/filemap.c	Sat Jul 27 23:48:58 2002
@@ -1934,6 +1934,7 @@ generic_file_write(struct file *file, co
 	struct address_space_operations *a_ops = mapping->a_ops;
 	struct inode 	*inode = mapping->host;
 	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+	int		isblk = S_ISBLK(inode->i_mode);
 	long		status = 0;
 	loff_t		pos;
 	struct page	*page;
@@ -1953,19 +1954,19 @@ generic_file_write(struct file *file, co
 	pos = *ppos;
 	if (unlikely(pos < 0)) {
 		err = -EINVAL;
-		goto out;
+		goto out_sem;
 	}
 
 	if (unlikely(file->f_error)) {
 		err = file->f_error;
 		file->f_error = 0;
-		goto out;
+		goto out_sem;
 	}
 
 	written = 0;
 
 	/* FIXME: this is for backwards compatibility with 2.4 */
-	if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
+	if (!isblk && file->f_flags & O_APPEND)
 		pos = inode->i_size;
 
 	/*
@@ -1975,7 +1976,7 @@ generic_file_write(struct file *file, co
 		if (pos >= limit) {
 			send_sig(SIGXFSZ, current, 0);
 			err = -EFBIG;
-			goto out;
+			goto out_sem;
 		}
 		if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
 			/* send_sig(SIGXFSZ, current, 0); */
@@ -1991,7 +1992,7 @@ generic_file_write(struct file *file, co
 		if (pos >= MAX_NON_LFS) {
 			send_sig(SIGXFSZ, current, 0);
 			err = -EFBIG;
-			goto out;
+			goto out_sem;
 		}
 		if (count > MAX_NON_LFS - (u32)pos) {
 			/* send_sig(SIGXFSZ, current, 0); */
@@ -2006,12 +2007,12 @@ generic_file_write(struct file *file, co
 	 * exceeded without writing data we send a signal and return EFBIG.
 	 * Linus frestrict idea will clean these up nicely..
 	 */
-	if (likely(!S_ISBLK(inode->i_mode))) {
+	if (likely(!isblk)) {
 		if (unlikely(pos >= inode->i_sb->s_maxbytes)) {
 			if (count || pos > inode->i_sb->s_maxbytes) {
 				send_sig(SIGXFSZ, current, 0);
 				err = -EFBIG;
-				goto out;
+				goto out_sem;
 			}
 			/* zero-length writes at ->s_maxbytes are OK */
 		}
@@ -2021,12 +2022,12 @@ generic_file_write(struct file *file, co
 	} else {
 		if (bdev_read_only(inode->i_bdev)) {
 			err = -EPERM;
-			goto out;
+			goto out_sem;
 		}
 		if (pos >= inode->i_size) {
 			if (count || pos > inode->i_size) {
 				err = -ENOSPC;
-				goto out;
+				goto out_sem;
 			}
 		}
 
@@ -2036,7 +2037,7 @@ generic_file_write(struct file *file, co
 
 	err = 0;
 	if (count == 0)
-		goto out;
+		goto out_sem;
 
 	remove_suid(file->f_dentry);
 	time_now = CURRENT_TIME;
@@ -2047,25 +2048,40 @@ generic_file_write(struct file *file, co
 	}
 
 	if (unlikely(file->f_flags & O_DIRECT)) {
-		written = generic_file_direct_IO(WRITE, inode,
+		if (isblk) {
+			up(&inode->i_sem);
+			written = generic_file_direct_IO(WRITE, inode,
 						(char *)buf, pos, count);
-		if (written > 0) {
-			loff_t end = pos + written;
-			if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
-				inode->i_size = end;
-				mark_inode_dirty(inode);
+			if (written > 0) {
+				if (mapping->nrpages)
+					invalidate_inode_pages2(mapping);
+				*ppos = pos + written;
 			}
-			*ppos = end;
-			if (mapping->nrpages)
-				invalidate_inode_pages2(mapping);
+			err = written;
+			goto out;
+		} else {
+			written = generic_file_direct_IO(WRITE, inode,
+						(char *)buf, pos, count);
+			if (written > 0) {
+				loff_t end = pos + written;
+				if (end > inode->i_size) {
+					inode->i_size = end;
+					mark_inode_dirty(inode);
+				}
+				*ppos = end;
+				/*
+				 * Sync the fs metadata but not the minor inode
+				 * changes and of course not the data as we did
+				 * direct DMA for the IO.
+				 */
+				if (file->f_flags & O_SYNC)
+					status = generic_osync_inode(inode,
+								OSYNC_METADATA);
+				if (mapping->nrpages)
+					invalidate_inode_pages2(mapping);
+			}
+			goto out_status;
 		}
-		/*
-		 * Sync the fs metadata but not the minor inode changes and
-		 * of course not the data as we did direct DMA for the IO.
-		 */
-		if (written >= 0 && file->f_flags & O_SYNC)
-			status = generic_osync_inode(inode, OSYNC_METADATA);
-		goto out_status;
 	}
 
 	do {
@@ -2152,7 +2168,8 @@ generic_file_write(struct file *file, co
 	
 out_status:	
 	err = written ? written : status;
-out:
+out_sem:
 	up(&inode->i_sem);
+out:
 	return err;
 }

.

             reply	other threads:[~2002-07-28  7:24 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-07-28  7:33 Andrew Morton [this message]
2002-07-28 11:06 ` [patch 11/13] don't hold i_sem during O_DIRECT writes to blockdevs Christoph Hellwig
2002-07-28 17:55   ` Andrew Morton
2002-07-28 18:05     ` Christoph Hellwig
2002-07-28 18:41       ` Christoph Hellwig
2002-07-29  0:04 ` Linus Torvalds
2002-07-29  0:39   ` Andrew Morton
2002-07-29  0:47     ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D439E43.5F2DEE3D@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.