[RFC PATCH 2/2] O_DIRECT locking via placeholders

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Chris Mason <chris.mason@oracle.com>
To: linux-fsdevel@vger.kernel.org
Cc: akpm@osdl.org, zach.brown@oracle.com
Subject: [RFC PATCH 2/2] O_DIRECT locking via placeholders
Date: Tue, 24 Oct 2006 15:40:30 -0400	[thread overview]
Message-ID: <20061024194030.GG12815@think.oraclecorp.com> (raw)
In-Reply-To: <20061024193435.GE12815@think.oraclecorp.com>

This changes O_DIRECT to take page locks or insert placeholder pages to
lock regions under direct io.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff -r 18a9e9f5c707 fs/direct-io.c
--- a/fs/direct-io.c	Thu Oct 19 08:30:00 2006 +0700
+++ b/fs/direct-io.c	Tue Oct 24 15:10:48 2006 -0400
@@ -35,6 +35,7 @@
 #include <linux/rwsem.h>
 #include <linux/uio.h>
 #include <asm/atomic.h>
+#include <linux/writeback.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -94,6 +95,14 @@ struct dio {
 	struct buffer_head map_bh;	/* last get_block() result */
 
 	/*
+	 * kernel page pinning
+	 */
+	struct page fake;
+	struct page *tmppages[DIO_PAGES];
+	unsigned long fspages_start_off;
+	unsigned long fspages_end_off;
+
+	/*
 	 * Deferred addition of a page to the dio.  These variables are
 	 * private to dio_send_cur_page(), submit_page_section() and
 	 * dio_bio_add_page().
@@ -190,6 +199,28 @@ out:
 	return ret;	
 }
 
+static void unlock_page_range(struct dio *dio, unsigned long start,
+			      unsigned long nr)
+{
+	remove_placeholder_pages(dio->inode->i_mapping, dio->tmppages,
+				 &dio->fake,
+				 start, start + nr,
+				 ARRAY_SIZE(dio->tmppages));
+}
+
+static int lock_page_range(struct dio *dio, unsigned long start,
+			   unsigned long nr)
+{
+	struct address_space *mapping = dio->inode->i_mapping;
+	struct page *fake = &dio->fake;
+	unsigned long end = start + nr;
+	return find_or_insert_placeholders(mapping, dio->tmppages, start, end,
+	                                  ARRAY_SIZE(dio->tmppages),
+					  GFP_KERNEL, fake,
+					  dio->rw == READ);
+}
+
+
 /*
  * Get another userspace page.  Returns an ERR_PTR on error.  Pages are
  * buffered inside the dio so that we can call get_user_pages() against a
@@ -219,9 +250,9 @@ static void dio_complete(struct dio *dio
 {
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
-		/* lockdep: non-owner release */
-		up_read_non_owner(&dio->inode->i_alloc_sem);
+	unlock_page_range(dio, dio->fspages_start_off,
+			  dio->fspages_end_off - dio->fspages_start_off);
+	dio->fspages_end_off = dio->fspages_start_off;
 }
 
 /*
@@ -517,6 +548,7 @@ static int get_more_blocks(struct dio *d
 	unsigned long fs_count;	/* Number of filesystem-sized blocks */
 	unsigned long dio_count;/* Number of dio_block-sized blocks */
 	unsigned long blkmask;
+	unsigned long index;
 	int create;
 
 	/*
@@ -544,7 +576,21 @@ static int get_more_blocks(struct dio *d
 		} else if (dio->lock_type == DIO_NO_LOCKING) {
 			create = 0;
 		}
-
+	        index = fs_startblk >> (PAGE_CACHE_SHIFT -
+		                        dio->inode->i_blkbits);
+		if (index >= dio->fspages_end_off) {
+			unsigned long end;
+			unsigned long nr;
+			end = (dio->final_block_in_request >>
+			       dio->blkfactor) >>
+			      (PAGE_CACHE_SHIFT - dio->inode->i_blkbits);
+			nr = min(end - index + 1, (unsigned long)DIO_PAGES);
+			ret = lock_page_range(dio, dio->fspages_end_off, nr);
+			if (ret)
+				goto error;
+			dio->fspages_end_off += nr;
+			BUG_ON(index >= dio->fspages_end_off);
+		}
 		/*
 		 * For writes inside i_size we forbid block creations: only
 		 * overwrites are permitted.  We fall back to buffered writes
@@ -554,6 +600,7 @@ static int get_more_blocks(struct dio *d
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
+error:
 	return ret;
 }
 
@@ -944,7 +991,7 @@ out:
 }
 
 /*
- * Releases both i_mutex and i_alloc_sem
+ * Releases both i_mutex
  */
 static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
@@ -1191,7 +1238,6 @@ __blockdev_direct_IO(int rw, struct kioc
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	int release_i_mutex = 0;
 	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
@@ -1221,11 +1267,14 @@ __blockdev_direct_IO(int rw, struct kioc
 				goto out;
 		}
 	}
-
 	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
 	retval = -ENOMEM;
 	if (!dio)
 		goto out;
+
+	set_page_placeholder(&dio->fake);
+	dio->fspages_start_off = offset >> PAGE_CACHE_SHIFT;
+	dio->fspages_end_off = dio->fspages_start_off;
 
 	/*
 	 * For block device access DIO_NO_LOCKING is used,
@@ -1240,30 +1289,11 @@ __blockdev_direct_IO(int rw, struct kioc
 	if (dio_lock_type != DIO_NO_LOCKING) {
 		/* watch out for a 0 len io from a tricksy fs */
 		if (rw == READ && end > offset) {
-			struct address_space *mapping;
-
-			mapping = iocb->ki_filp->f_mapping;
-			if (dio_lock_type != DIO_OWN_LOCKING) {
-				mutex_lock(&inode->i_mutex);
-				release_i_mutex = 1;
-			}
-
-			retval = filemap_write_and_wait_range(mapping, offset,
-							      end - 1);
-			if (retval) {
-				kfree(dio);
-				goto out;
-			}
-
 			if (dio_lock_type == DIO_OWN_LOCKING) {
 				mutex_unlock(&inode->i_mutex);
 				acquire_i_mutex = 1;
 			}
 		}
-
-		if (dio_lock_type == DIO_LOCKING)
-			/* lockdep: not the owner will release it */
-			down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
@@ -1278,13 +1308,8 @@ __blockdev_direct_IO(int rw, struct kioc
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
 				nr_segs, blkbits, get_block, end_io, dio);
 
-	if (rw == READ && dio_lock_type == DIO_LOCKING)
-		release_i_mutex = 0;
-
 out:
-	if (release_i_mutex)
-		mutex_unlock(&inode->i_mutex);
-	else if (acquire_i_mutex)
+	if (acquire_i_mutex)
 		mutex_lock(&inode->i_mutex);
 	return retval;
 }

next prev parent reply	other threads:[~2006-10-24 19:40 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-10-20 18:32 [RFC PATCH 0/2] O_DIRECT locking rework Chris Mason
2006-10-20 18:38 ` [RFC PATCH 1/2] placeholder pages Chris Mason
2006-10-20 18:41 ` [RFC PATCH 1/2] page cache locking for O_DIRECT Chris Mason
2006-10-20 19:30 ` [RFC PATCH 0/2] O_DIRECT locking rework Andrew Morton
2006-10-20 20:03   ` Zach Brown
2006-10-20 20:12     ` Chris Mason
2006-10-20 20:05   ` Chris Mason
2006-10-20 20:23   ` Andi Kleen
2006-10-24 19:34 ` Chris Mason
2006-10-24 19:38   ` [RFC PATCH 1/2] placeholder pages Chris Mason
2006-10-24 19:40   ` Chris Mason [this message]
2006-10-24 20:28   ` [RFC PATCH 0/2] O_DIRECT locking rework Badari Pulavarty
2006-10-24 20:50     ` Chris Mason
2006-10-24 21:52       ` Badari Pulavarty
2006-10-24 22:22         ` Chris Mason
2006-10-24 22:47           ` Badari Pulavarty
2006-10-24 22:37       ` Russell Cattelan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061024194030.GG12815@think.oraclecorp.com \
    --to=chris.mason@oracle.com \
    --cc=akpm@osdl.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=zach.brown@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).