From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: linux-fsdevel@vger.kernel.org, Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Matthew Wilcox <willy@linux.intel.com>
Subject: [PATCH 3/5] block: Add support for DAX on block devices
Date: Mon, 29 Jun 2015 16:02:30 -0400 [thread overview]
Message-ID: <1435608152-6982-4-git-send-email-matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1435608152-6982-1-git-send-email-matthew.r.wilcox@intel.com>
From: Matthew Wilcox <willy@linux.intel.com>
Without this patch, accesses to a file on a filesystem on a block device
could be done without the page cache, but accessing the block device
itself would always go through the page cache.
Now reads and writes to a block device that is capable of DAX will always
bypass the page cache. Loads and stores to an mmapped block device will
bypass the page cache if the user specified O_DIRECT. This opt-in from
the user is necessary because DAX mappings are currently incompatible
with RDMA and O_DIRECT I/Os with non-DAX files.
Include support for the DIO_SKIP_DIO_COUNT flag in DAX, which is only
used by the block device driver.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
fs/block_dev.c | 38 ++++++++++++++++++++++++++++++++++++--
fs/dax.c | 6 ++++--
2 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f04c873..e3fab8c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -152,6 +152,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ if (IS_DAX(inode))
+ return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
+ NULL, DIO_SKIP_DIO_COUNT);
return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
blkdev_get_block, NULL, NULL,
DIO_SKIP_DIO_COUNT);
@@ -333,7 +336,37 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
mutex_unlock(&bd_inode->i_mutex);
return retval;
}
-
+
+#ifdef CONFIG_FS_DAX
+static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return dax_fault(vma, vmf, blkdev_get_block);
+}
+
+static int blkdev_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return dax_mkwrite(vma, vmf, blkdev_get_block);
+}
+
+static const struct vm_operations_struct blkdev_dax_vm_ops = {
+ .fault = blkdev_dax_fault,
+ .page_mkwrite = blkdev_dax_mkwrite,
+};
+
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((IS_DAX(file->f_mapping->host)) && (file->f_flags & O_DIRECT)) {
+ file_accessed(file);
+ vma->vm_ops = &blkdev_dax_vm_ops;
+ vma->vm_flags |= VM_MIXEDMAP;
+ return 0;
+ }
+ return generic_file_mmap(file, vma);
+}
+#else
+#define blkdev_mmap generic_file_mmap
+#endif
+
int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *bd_inode = filp->f_mapping->host;
@@ -1170,6 +1203,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+ bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
if (!partno) {
ret = -ENXIO;
bdev->bd_part = disk_get_part(disk, partno);
@@ -1670,7 +1704,7 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/dax.c b/fs/dax.c
index 159f796..37a0c48 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
}
/* Protects against truncate */
- inode_dio_begin(inode);
+ if (!(flags & DIO_SKIP_DIO_COUNT))
+ inode_dio_begin(inode);
retval = dax_io(inode, iter, pos, end, get_block, &bh);
@@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
if ((retval > 0) && end_io)
end_io(iocb, pos, retval, bh.b_private);
- inode_dio_end(inode);
+ if (!(flags & DIO_SKIP_DIO_COUNT))
+ inode_dio_end(inode);
out:
return retval;
}
--
2.1.4
next prev parent reply other threads:[~2015-06-29 20:02 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-29 20:02 [PATCH 0/5] DAX updates for 4.2 Matthew Wilcox
2015-06-29 20:02 ` [PATCH 1/5] dax: Add block size note to documentation Matthew Wilcox
2015-06-29 20:02 ` [PATCH 2/5] dax: Use copy_from_iter_nocache Matthew Wilcox
2015-06-29 20:02 ` Matthew Wilcox [this message]
2015-06-30 11:19 ` [PATCH 3/5] block: Add support for DAX on block devices Christoph Hellwig
2015-06-30 19:56 ` Matthew Wilcox
2015-07-01 7:19 ` Christoph Hellwig
2015-06-29 20:02 ` [PATCH 4/5] ext4: Use ext4_get_block_write() for DAX Matthew Wilcox
2015-06-29 20:02 ` [PATCH 5/5] vfs: Allow truncate, chomd and chown to be interrupted by fatal signals Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1435608152-6982-4-git-send-email-matthew.r.wilcox@intel.com \
--to=matthew.r.wilcox@intel.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).