linux-nvdimm.lists.01.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: linux-ext4@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, Dave Chinner <david@fromorbit.com>,
	Jan Kara <jack@suse.cz>, Ted Tso <tytso@mit.edu>,
	linux-nvdimm@lists.01.org
Subject: [PATCH 06/11] ext4: DAX iomap write support
Date: Tue,  1 Nov 2016 22:06:16 +0100	[thread overview]
Message-ID: <1478034381-19037-7-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1478034381-19037-1-git-send-email-jack@suse.cz>

Implement DAX writes using the new iomap infrastructure instead of
overloading the direct IO path.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/file.c  | 39 ++++++++++++++++++++++--
 fs/ext4/inode.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 28ebc2418dc2..d7ab0e90d1b8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -172,6 +172,39 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
 }
 
 static ssize_t
+ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+	bool overwrite = false;
+
+	inode_lock(inode);
+	ret = ext4_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
+	ret = file_remove_privs(iocb->ki_filp);
+	if (ret)
+		goto out;
+	ret = file_update_time(iocb->ki_filp);
+	if (ret)
+		goto out;
+
+	if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+		overwrite = true;
+		downgrade_write(&inode->i_rwsem);
+	}
+	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+out:
+	if (!overwrite)
+		inode_unlock(inode);
+	else
+		inode_unlock_shared(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
+static ssize_t
 ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -180,6 +213,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	int overwrite = 0;
 	ssize_t ret;
 
+	if (IS_DAX(inode))
+		return ext4_dax_write_iter(iocb, from);
+
 	inode_lock(inode);
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
@@ -199,8 +235,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	iocb->private = &overwrite;
 	/* Check whether we do a DIO overwrite or not */
-	if (((o_direct && !unaligned_aio) || IS_DAX(inode)) &&
-	    ext4_should_dioread_nolock(inode) &&
+	if ((o_direct && !unaligned_aio) && ext4_should_dioread_nolock(inode) &&
 	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
 		overwrite = 1;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ac26a390f14c..d07d003ebce2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3316,18 +3316,62 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	struct ext4_map_blocks map;
 	int ret;
 
-	if (flags & IOMAP_WRITE)
-		return -EIO;
-
 	if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
 		return -ERANGE;
 
 	map.m_lblk = first_block;
 	map.m_len = last_block - first_block + 1;
 
-	ret = ext4_map_blocks(NULL, inode, &map, 0);
-	if (ret < 0)
-		return ret;
+	if (!(flags & IOMAP_WRITE)) {
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+	} else {
+		int dio_credits;
+		handle_t *handle;
+		int retries = 0;
+
+		/* Trim mapping request to maximum we can map at once for DIO */
+		if (map.m_len > DIO_MAX_BLOCKS)
+			map.m_len = DIO_MAX_BLOCKS;
+		dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
+		/*
+		 * Either we allocate blocks and then we don't get unwritten
+		 * extent so we have reserved enough credits, or the blocks
+		 * are already allocated and unwritten and in that case
+		 * extent conversion fits in the credits as well.
+		 */
+		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+					    dio_credits);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+
+		ret = ext4_map_blocks(handle, inode, &map,
+				      EXT4_GET_BLOCKS_PRE_IO |
+				      EXT4_GET_BLOCKS_CREATE_ZERO);
+		if (ret < 0) {
+			ext4_journal_stop(handle);
+			if (ret == -ENOSPC &&
+			    ext4_should_retry_alloc(inode->i_sb, &retries))
+				goto retry;
+			return ret;
+		}
+		/* For DAX writes we need to zero out unwritten extents */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+			/*
+			 * We are protected by i_mmap_sem or i_rwsem so we know
+			 * block cannot go away from under us even though we
+			 * dropped i_data_sem. Convert extent to written and
+			 * write zeros there.
+			 */
+			ret = ext4_map_blocks(handle, inode, &map,
+					      EXT4_GET_BLOCKS_CONVERT |
+					      EXT4_GET_BLOCKS_CREATE_ZERO);
+			if (ret < 0) {
+				ext4_journal_stop(handle);
+				return ret;
+			}
+		}
+	}
 
 	iomap->flags = 0;
 	iomap->bdev = inode->i_sb->s_bdev;
@@ -3355,8 +3399,46 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	return 0;
 }
 
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+			  ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	if (flags & IOMAP_WRITE) {
+		handle_t *handle = ext4_journal_current_handle();
+		ext4_lblk_t written_blk, end_blk;
+		int blkbits = inode->i_blkbits;
+		bool truncate = false;
+
+		if (ext4_update_inode_size(inode, offset + written))
+			ext4_mark_inode_dirty(handle, inode);
+		written_blk = (offset + written) >> blkbits;
+		end_blk = (offset + length) >> blkbits;
+		/*
+		 * We may need to truncate allocated but not written blocks
+		 * beyond EOF.
+		 */
+		if (written_blk < end_blk && offset + length > inode->i_size &&
+		    ext4_can_truncate(inode)) {
+			ext4_orphan_add(handle, inode);
+			truncate = true;
+		}
+		ext4_journal_stop(handle);
+		if (truncate) {
+			ext4_truncate_failed_write(inode);
+			/*
+			 * If truncate failed early the inode might still be
+			 * on the orphan list; we need to make sure the inode
+			 * is removed from the orphan list in that case.
+			 */
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+		}
+	}
+	return 0;
+}
+
 struct iomap_ops ext4_iomap_ops = {
 	.iomap_begin		= ext4_iomap_begin,
+	.iomap_end		= ext4_iomap_end,
 };
 
 #else
-- 
2.6.6

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

  parent reply	other threads:[~2016-11-01 21:22 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-01 21:06 [PATCH 0/11] ext4: Convert ext4 DAX IO to iomap framework Jan Kara
2016-11-01 21:06 ` [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter() Jan Kara
2016-11-03 21:04   ` Ross Zwisler
2016-11-04  4:20     ` Jan Kara
2016-11-01 21:06 ` [PATCH 02/11] ext4: Allow unaligned unlocked DAX IO Jan Kara
     [not found]   ` <1478034381-19037-3-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
2016-11-02 14:27     ` Christoph Hellwig
2016-11-03 23:55       ` Jan Kara
2016-11-01 21:06 ` [PATCH 03/11] ext4: Let S_DAX set only if DAX is really supported Jan Kara
2016-11-01 21:06 ` [PATCH 04/11] ext4: Convert DAX reads to iomap infrastructure Jan Kara
2016-11-01 21:06 ` [PATCH 05/11] ext4: Use iomap for zeroing blocks in DAX mode Jan Kara
2016-11-01 21:06 ` Jan Kara [this message]
2016-11-01 21:06 ` [PATCH 07/11] ext4: Avoid split extents for DAX writes Jan Kara
2016-11-01 21:06 ` [PATCH 08/11] ext4: Convert DAX faults to iomap infrastructure Jan Kara
2016-11-02 14:30   ` Christoph Hellwig
     [not found]     ` <20161102143006.GB18387-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2016-11-04  0:02       ` Jan Kara
2016-11-01 21:06 ` [PATCH 09/11] ext4: Rip out DAX handling from direct IO path Jan Kara
2016-11-01 21:06 ` [PATCH 10/11] ext2: Use iomap_zero_range() for zeroing truncated page in DAX path Jan Kara
2016-11-01 21:06 ` [PATCH 11/11] dax: Rip out get_block based IO support Jan Kara
     [not found]   ` <1478034381-19037-12-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
2016-11-02 14:31     ` Christoph Hellwig
2016-11-04  0:03       ` Jan Kara
2016-11-01 22:12 ` [PATCH 0/11] ext4: Convert ext4 DAX IO to iomap framework Dave Chinner
2016-11-01 22:45   ` Ross Zwisler
2016-11-01 23:09     ` Jan Kara
2016-11-02 13:03       ` Theodore Ts'o
2016-11-02 22:23         ` Dave Chinner
     [not found]         ` <20161102130353.nt3oqodrik4hnfk6-AKGzg7BKzIDYtjvyW6yDsg@public.gmane.org>
2016-11-04  0:55           ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478034381-19037-7-git-send-email-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=david@fromorbit.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).