From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-nvdimm@ml01.01.org
Cc: ross.zwisler@linux.intel.com
Subject: [PATCH 05/12] dax: provide an iomap based dax read/write path
Date: Fri, 16 Sep 2016 13:27:07 +0200 [thread overview]
Message-ID: <1474025234-13804-6-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1474025234-13804-1-git-send-email-hch@lst.de>
This is a much simpler implementation of the DAX read/write path that makes
use of the iomap infrastructure. It does not try to mirror the direct I/O
calling conventions and thus doesn't have to deal with i_dio_count or the
end_io handler, but instead leaves locking and filesystem-specific I/O
completion to the caller.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
fs/dax.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/dax.h | 4 ++
2 files changed, 118 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 84343ce..1f9f2d4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
+#include <linux/iomap.h>
+#include "internal.h"
/*
* We use lowest available bit in exceptional entry for locking, other two
@@ -1241,3 +1243,115 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
+
+#ifdef CONFIG_FS_IOMAP
+static loff_t
+iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ struct iov_iter *iter = data;
+ loff_t end = pos + length, done = 0;
+ ssize_t ret = 0;
+
+ if (iov_iter_rw(iter) == READ) {
+ end = min(end, i_size_read(inode));
+ if (pos >= end)
+ return 0;
+
+ if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+ return iov_iter_zero(min(length, end - pos), iter);
+ }
+
+ if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+ return -EIO;
+
+ while (pos < end) {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ struct blk_dax_ctl dax = { 0 };
+ ssize_t map_len;
+
+ dax.sector = iomap->blkno +
+ (((pos & PAGE_MASK) - iomap->offset) >> 9);
+ dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
+ map_len = dax_map_atomic(iomap->bdev, &dax);
+ if (map_len < 0) {
+ ret = map_len;
+ break;
+ }
+
+ dax.addr += offset;
+ map_len -= offset;
+ if (map_len > end - pos)
+ map_len = end - pos;
+
+ if (iov_iter_rw(iter) == WRITE)
+ map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
+ else
+ map_len = copy_to_iter(dax.addr, map_len, iter);
+ dax_unmap_atomic(iomap->bdev, &dax);
+ if (map_len <= 0) {
+ ret = map_len ? map_len : -EFAULT;
+ break;
+ }
+
+ pos += map_len;
+ length -= map_len;
+ done += map_len;
+ }
+
+ return done ? done : ret;
+}
+
+/**
+ * iomap_dax_rw - Perform I/O to a DAX file
+ * @iocb: The control block for this I/O
+ * @iter: The addresses to do I/O from or to
+ * @ops: iomap ops passed from the file system
+ *
+ * This function performs read and write operations to directly mapped
+ * persistent memory. The callers needs to take care of read/write exclusion
+ * and evicting any page cache pages in the region under I/O.
+ */
+ssize_t
+iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
+ loff_t pos = iocb->ki_pos, ret = 0, done = 0;
+ unsigned flags = 0;
+
+ if (iov_iter_rw(iter) == WRITE)
+ flags |= IOMAP_WRITE;
+
+ /*
+ * Yes, even DAX files can have page cache attached to them: A zeroed
+ * page is inserted into the pagecache when we have to serve a write
+ * fault on a hole. It should never be dirtied and can simply be
+ * dropped from the pagecache once we get real data for the page.
+ *
+ * XXX: This is racy against mmap, and there's nothing we can do about
+ * it. We'll eventually need to shift this down even further so that
+ * we can check if we allocated blocks over a hole first.
+ */
+ if (mapping->nrpages) {
+ ret = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT,
+ (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(ret);
+ }
+
+ while (iov_iter_count(iter)) {
+ ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
+ iter, iomap_dax_actor);
+ if (ret <= 0)
+ break;
+ pos += ret;
+ done += ret;
+ }
+
+ iocb->ki_pos += done;
+ return done ? done : ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_rw);
+#endif /* CONFIG_FS_IOMAP */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..a0595b4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -6,9 +6,13 @@
#include <linux/radix-tree.h>
#include <asm/pgtable.h>
+struct iomap_ops;
+
/* We use lowest available exceptional entry bit for locking */
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
+ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
+ struct iomap_ops *ops);
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
get_block_t, dio_iodone_t, int flags);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
--
2.1.4
next prev parent reply other threads:[~2016-09-16 11:27 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-16 11:27 iomap based DAX path V3 Christoph Hellwig
2016-09-16 11:27 ` [PATCH 01/12] iomap: add IOMAP_F_NEW flag Christoph Hellwig
2016-09-16 11:27 ` [PATCH 02/12] iomap: expose iomap_apply outside iomap.c Christoph Hellwig
2016-09-16 11:27 ` [PATCH 03/12] dax: don't pass buffer_head to dax_insert_mapping Christoph Hellwig
2016-09-16 11:27 ` [PATCH 04/12] dax: don't pass buffer_head to copy_user_dax Christoph Hellwig
2016-09-16 11:27 ` Christoph Hellwig [this message]
2016-09-16 11:27 ` [PATCH 06/12] dax: provide an iomap based fault handler Christoph Hellwig
2016-09-16 11:27 ` [PATCH 07/12] xfs: fix locking for DAX writes Christoph Hellwig
2016-09-16 11:27 ` [PATCH 08/12] xfs: take the ilock shared if possible in xfs_file_iomap_begin Christoph Hellwig
2016-09-16 11:27 ` [PATCH 09/12] xfs: refactor xfs_setfilesize Christoph Hellwig
2016-09-16 11:27 ` [PATCH 10/12] xfs: use iomap to implement DAX Christoph Hellwig
2016-09-16 11:27 ` [PATCH 11/12] ext2: stop passing buffer_head to ext2_get_blocks Christoph Hellwig
2016-09-16 11:27 ` [PATCH 12/12] ext2: use iomap to implement DAX Christoph Hellwig
2016-09-26 14:49 ` Jan Kara
-- strict thread matches above, loose matches on Subject: below --
2016-09-14 10:01 iomap based DAX path V2 Christoph Hellwig
2016-09-14 10:01 ` [PATCH 05/12] dax: provide an iomap based dax read/write path Christoph Hellwig
2016-09-14 17:26 ` Ross Zwisler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1474025234-13804-6-git-send-email-hch@lst.de \
--to=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nvdimm@ml01.01.org \
--cc=linux-xfs@vger.kernel.org \
--cc=ross.zwisler@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).