linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chris Mason <chris.mason@fusionio.com>
To: Linux FS Devel <linux-fsdevel@vger.kernel.org>,
	Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 2/3] fs: Add O_ATOMIC support to direct IO
Date: Fri, 1 Nov 2013 17:29:44 -0400	[thread overview]
Message-ID: <20131101212944.10239.21905@localhost.localdomain> (raw)
In-Reply-To: <20131101212704.10239.73920@localhost.localdomain>

This adds the O_ATOMIC file flag (which requires O_DIRECT).  If
applications request atomic IO, the generic O_DIRECT code is changed to
build a list of bios to represent any single O_DIRECT write() call.  The
bios may span discontig areas of the drive if the file is fragmented.

The bios are sent to submit_bio as a single unit, and we expect the
storage to do one of three things:

Fail each bio individually if the list is too large for atomic
completion.

Fail each bio individually if there are any errors during any write.

Complete each bio with success if every write is fully stable
on media.

This works with any filesystem that uses the generic O_DIRECT code for
bio submission (almost everyone except Btrfs).

Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/direct-io.c                   | 23 +++++++++++++++++++++--
 fs/fcntl.c                       | 14 +++++++++++---
 include/uapi/asm-generic/fcntl.h |  4 ++++
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 160a548..6837418 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -120,6 +120,7 @@ struct dio {
 	struct inode *inode;
 	loff_t i_size;			/* i_size when submitted */
 	dio_iodone_t *end_io;		/* IO completion function */
+	struct bio_list atomic_bio;
 
 	void *private;			/* copy from map_bh.b_private */
 
@@ -409,14 +410,30 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 	if (sdio->submit_io)
 		sdio->submit_io(dio->rw, bio, dio->inode,
 			       sdio->logical_offset_in_bio);
-	else
-		submit_bio(dio->rw, bio);
+	else {
+		/* atomic writes are collected for submission together */
+		if (dio->rw != READ &&
+		    (dio->iocb->ki_filp->f_flags & O_ATOMIC)) {
+			bio->bi_rw |= (REQ_ATOMIC | dio->rw);
+			bio_list_add(&dio->atomic_bio, bio);
+		} else {
+			/* everything else is sent directly */
+			submit_bio(dio->rw, bio);
+		}
+	}
 
 	sdio->bio = NULL;
 	sdio->boundary = 0;
 	sdio->logical_offset_in_bio = 0;
 }
 
+static inline void dio_bio_atomic_submit(struct dio *dio)
+{
+	struct bio *bio = bio_list_get(&dio->atomic_bio);
+	if (bio)
+		submit_bio(dio->rw, bio);
+}
+
 /*
  * Release any resources in case of a failure
  */
@@ -1173,6 +1190,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * care to only zero out what's needed.
 	 */
 	memset(dio, 0, offsetof(struct dio, pages));
+	bio_list_init(&dio->atomic_bio);
 
 	dio->flags = flags;
 	if (dio->flags & DIO_LOCKING) {
@@ -1318,6 +1336,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (sdio.bio)
 		dio_bio_submit(dio, &sdio);
 
+	dio_bio_atomic_submit(dio);
 	blk_finish_plug(&plug);
 
 	/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 65343c3..09f4c7a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,7 +26,8 @@
 #include <asm/siginfo.h>
 #include <asm/uaccess.h>
 
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | \
+		    O_DIRECT | O_NOATIME | O_ATOMIC)
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
@@ -56,6 +57,12 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 				return -EINVAL;
 	}
 
+	/* O_ATOMIC requires O_DIRECT */
+	if (arg & O_ATOMIC) {
+		if (!((arg | filp->f_flags) & O_DIRECT))
+			return -EINVAL;
+	}
+
 	if (filp->f_op && filp->f_op->check_flags)
 		error = filp->f_op->check_flags(arg);
 	if (error)
@@ -730,14 +737,15 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 		O_RDONLY	| O_WRONLY	| O_RDWR	|
 		O_CREAT		| O_EXCL	| O_NOCTTY	|
 		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
 		__O_SYNC	| O_DSYNC	| FASYNC	|
 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
-		__FMODE_EXEC	| O_PATH	| __O_TMPFILE
+		__FMODE_EXEC	| O_PATH	| __O_TMPFILE	|
+		O_ATOMIC
 		));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 95e46c8..00259df 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -88,6 +88,10 @@
 #define __O_TMPFILE	020000000
 #endif
 
+#ifndef O_ATOMIC
+#define O_ATOMIC	040000000	/* set do atomic O_DIRECT writes */
+#endif
+
 /* a horrid kludge trying to make sure that this will fail on old kernels */
 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
 #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)      
-- 
1.8.2


      parent reply	other threads:[~2013-11-01 21:29 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-01 21:27 [PATCH 0/2] Support for atomic IOs Chris Mason
2013-11-01 21:28 ` [PATCH 1/2] block: Add support for atomic writes Chris Mason
2013-11-01 21:47   ` Shaohua Li
2013-11-05 17:43   ` Jeff Moyer
2013-11-07 13:52     ` Chris Mason
2013-11-07 15:43       ` Jeff Moyer
2013-11-07 15:55         ` Chris Mason
2013-11-07 16:14           ` Jeff Moyer
2013-11-07 16:52             ` Chris Mason
2013-11-13 23:59             ` Dave Chinner
2013-11-12 15:11       ` Matthew Wilcox
2013-11-13 20:44         ` Chris Mason
2013-11-13 20:53           ` Howard Chu
2013-11-13 21:35           ` Matthew Wilcox
2013-11-01 21:29 ` Chris Mason [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131101212944.10239.21905@localhost.localdomain \
    --to=chris.mason@fusionio.com \
    --cc=axboe@kernel.dk \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).