From: Chris Mason <chris.mason@oracle.com>
To: linux-fsdevel@vger.kernel.org
Cc: akpm@osdl.org, zach.brown@oracle.com,
Suparna Bhattacharya <suparna@in.ibm.com>
Subject: [PATCH 4 of 7] Turn the DIO lock_type parameter into a flags field
Date: Wed, 01 Nov 2006 11:08:06 -0400 [thread overview]
Message-ID: <f84d3216430d2b1aa58d.1162397286@opti.oraclecorp.com> (raw)
In-Reply-To: <patchbomb.1162397282@opti.oraclecorp.com>
This creates a number of flags so that filesystems can control
blockdev_direct_IO. It is based on code from Russell Cettelan.
The new flags are:
DIO_CREATE -- always pass create=1 to get_block on writes. This allows
DIO to fill holes in the file.
DIO_PLACEHOLDERS -- use placeholder pages to provide locking against buffered
io and truncates.
DIO_EXTEND -- use truncate to grow the file instead of falling back to
buffered io.
DIO_DROP_I_MUTEX -- drop i_mutex before starting the IO on writes
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff -r 3fa8c25ec60f -r f84d3216430d fs/direct-io.c
--- a/fs/direct-io.c Wed Nov 01 10:22:34 2006 -0500
+++ b/fs/direct-io.c Wed Nov 01 10:24:03 2006 -0500
@@ -53,13 +53,6 @@
*
* If blkfactor is zero then the user's request was aligned to the filesystem's
* blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks. If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
*/
struct dio {
@@ -68,7 +61,7 @@ struct dio {
struct inode *inode;
int rw;
loff_t i_size; /* i_size when submitted */
- int lock_type; /* doesn't change */
+ unsigned flags; /* doesn't change */
int reacquire_i_mutex; /* should we get i_mutex when done? */
unsigned blkbits; /* doesn't change */
unsigned blkfactor; /* When we're using an alignment which
@@ -203,7 +196,7 @@ static void unlock_page_range(struct dio
static void unlock_page_range(struct dio *dio, unsigned long start,
unsigned long nr)
{
- if (dio->lock_type != DIO_NO_LOCKING) {
+ if (dio->flags & DIO_PLACEHOLDERS) {
remove_placeholder_pages(dio->inode->i_mapping, dio->tmppages,
&dio->fake,
start, start + nr,
@@ -218,11 +211,13 @@ static int lock_page_range(struct dio *d
struct page *fake = &dio->fake;
unsigned long end = start + nr;
- if (dio->lock_type == DIO_NO_LOCKING)
- return 0;
- return find_or_insert_placeholders(mapping, dio->tmppages, start, end,
- ARRAY_SIZE(dio->tmppages),
- GFP_KERNEL, fake, 1);
+ if (dio->flags & DIO_PLACEHOLDERS) {
+ return find_or_insert_placeholders(mapping, dio->tmppages,
+ start, end,
+ ARRAY_SIZE(dio->tmppages),
+ GFP_KERNEL, fake, 1);
+ }
+ return 0;
}
@@ -556,6 +551,7 @@ static int get_more_blocks(struct dio *d
unsigned long dio_count;/* Number of dio_block-sized blocks */
unsigned long blkmask;
unsigned long index;
+ unsigned long end;
int create;
/*
@@ -575,8 +571,9 @@ static int get_more_blocks(struct dio *d
map_bh->b_state = 0;
map_bh->b_size = fs_count << dio->inode->i_blkbits;
- create = dio->rw & WRITE;
- if (dio->lock_type == DIO_NO_LOCKING)
+ if (dio->flags & DIO_CREATE)
+ create = dio->rw & WRITE;
+ else
create = 0;
index = fs_startblk >> (PAGE_CACHE_SHIFT -
dio->inode->i_blkbits);
@@ -1193,28 +1190,17 @@ direct_io_worker(int rw, struct kiocb *i
/*
* This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
- *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
- *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- * uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ * The flags parameter is a bitmask of:
+ *
+ * DIO_PLACEHOLDERS (use placeholder pages for locking)
+ * DIO_CREATE (pass create=1 to get_block for filling holes)
+ * DIO_DROP_I_MUTEX (drop inode->i_mutex during writes)
*/
ssize_t
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
- int dio_lock_type)
+ unsigned flags)
{
int seg;
size_t size;
@@ -1225,7 +1211,6 @@ __blockdev_direct_IO(int rw, struct kioc
ssize_t retval = -EINVAL;
loff_t end = offset;
struct dio *dio;
- struct address_space *mapping = iocb->ki_filp->f_mapping;
if (rw & WRITE)
rw = WRITE_SYNC;
@@ -1271,9 +1256,14 @@ __blockdev_direct_IO(int rw, struct kioc
* For regular files using DIO_OWN_LOCKING,
* neither readers nor writers take any locks here
*/
- dio->lock_type = dio_lock_type;
-
- if (dio->lock_type == DIO_NO_LOCKING && end > offset) {
+ dio->flags = flags;
+
+ /*
+ * the placeholder code does filemap_write_and_wait, so if we
+ * aren't using placeholders we have to do it here
+ */
+ if (!(dio->flags & DIO_PLACEHOLDERS) && end > offset) {
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
retval = filemap_write_and_wait_range(mapping, offset, end - 1);
if (retval)
goto out;
@@ -1296,11 +1286,12 @@ __blockdev_direct_IO(int rw, struct kioc
* mmap'd writes using writepage to fill holes
*/
dio->reacquire_i_mutex = 0;
- if ((rw & WRITE) && dio_lock_type == DIO_LOCKING) {
+ if (rw & WRITE) {
/* if our write goes past i_size, do an expanding
* truncate to fill it before dropping i_mutex
*/
- if (end > i_size_read(inode) && iocb->ki_filp) {
+ if ((dio->flags & DIO_EXTEND) && end > i_size_read(inode) &&
+ iocb->ki_filp) {
struct iattr newattrs;
newattrs.ia_size = end;
newattrs.ia_file = iocb->ki_filp;
@@ -1310,7 +1301,7 @@ __blockdev_direct_IO(int rw, struct kioc
if (retval)
goto out;
}
- if (is_sync_kiocb(iocb)) {
+ if ((dio->flags & DIO_DROP_I_MUTEX) && is_sync_kiocb(iocb)) {
dio->reacquire_i_mutex = 1;
mutex_unlock(&inode->i_mutex);
}
diff -r 3fa8c25ec60f -r f84d3216430d include/linux/fs.h
--- a/include/linux/fs.h Wed Nov 01 10:22:34 2006 -0500
+++ b/include/linux/fs.h Wed Nov 01 10:24:03 2006 -0500
@@ -1801,21 +1801,32 @@ ssize_t __blockdev_direct_IO(int rw, str
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
- int lock_type);
-
-enum {
- DIO_LOCKING = 1, /* need locking between buffered and direct access */
- DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */
- DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
-};
+ unsigned int dio_flags);
+
+#define DIO_PLACEHOLDERS (1 << 0) /* insert placeholder pages */
+#define DIO_CREATE (1 << 1) /* pass create=1 to get_block when writing */
+#define DIO_DROP_I_MUTEX (1 << 2) /* drop i_mutex during writes */
+#define DIO_EXTEND (1 << 3) /* extend the file w/truncate if needed */
static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
struct inode *inode, struct block_device *bdev, const struct iovec *iov,
loff_t offset, unsigned long nr_segs, get_block_t get_block,
dio_iodone_t end_io)
{
+ /* locking is on, FS wants to fill holes w/get_block */
return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
- nr_segs, get_block, end_io, DIO_LOCKING);
+ nr_segs, get_block, end_io, DIO_PLACEHOLDERS |
+ DIO_CREATE | DIO_DROP_I_MUTEX | DIO_EXTEND);
+}
+
+static inline ssize_t blockdev_direct_IO_flags(int rw, struct kiocb *iocb,
+ struct inode *inode, struct block_device *bdev, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs, get_block_t get_block,
+ dio_iodone_t end_io, unsigned int flags)
+{
+ /* file system dictates locking and create behavior */
+ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+ nr_segs, get_block, end_io, flags);
}
static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -1823,17 +1834,9 @@ static inline ssize_t blockdev_direct_IO
loff_t offset, unsigned long nr_segs, get_block_t get_block,
dio_iodone_t end_io)
{
+ /* locking is off, create is off */
return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
- nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
- struct inode *inode, struct block_device *bdev, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs, get_block_t get_block,
- dio_iodone_t end_io)
-{
- return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
- nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+ nr_segs, get_block, end_io, 0);
}
#endif
next prev parent reply other threads:[~2006-11-01 16:25 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-01 15:08 [PATCH 0 of 7] O_DIRECT locking rework Chris Mason
2006-11-01 15:08 ` [PATCH 1 of 7] Introduce a place holder page for the pagecache Chris Mason
2006-11-01 15:08 ` [PATCH 2 of 7] Change O_DIRECT to use placeholders instead of i_mutex/i_alloc_sem locking Chris Mason
2006-11-01 22:44 ` David Chinner
2006-11-01 15:08 ` [PATCH 3 of 7] DIO: don't fall back to buffered writes Chris Mason
2006-11-01 15:08 ` Chris Mason [this message]
2006-11-01 22:58 ` [PATCH 4 of 7] Turn the DIO lock_type parameter into a flags field David Chinner
2006-11-02 1:02 ` Chris Mason
2006-11-02 2:16 ` David Chinner
2006-11-08 18:48 ` Chris Mason
2006-11-01 15:08 ` [PATCH 5 of 7] Make ext3 safe for the new DIO locking rules Chris Mason
2006-11-01 15:08 ` [PATCH 6 of 7] Make reiserfs safe for " Chris Mason
2006-11-01 15:08 ` [PATCH 7 of 7] Adapt XFS to the new blockdev_direct_IO calls Chris Mason
2006-11-01 23:00 ` David Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f84d3216430d2b1aa58d.1162397286@opti.oraclecorp.com \
--to=chris.mason@oracle.com \
--cc=akpm@osdl.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=suparna@in.ibm.com \
--cc=zach.brown@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).