* [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter()
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
@ 2016-11-01 21:06 ` Jan Kara
2016-11-03 21:04 ` Ross Zwisler
2016-11-01 21:06 ` [PATCH 03/11] ext4: Let S_DAX set only if DAX is really supported Jan Kara
` (7 subsequent siblings)
8 siblings, 1 reply; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Factor out checks of 'from' and whether we are overwriting out of
ext4_file_write_iter() so that the function is easier to follow.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/file.c | 97 ++++++++++++++++++++++++++++++----------------------------
1 file changed, 50 insertions(+), 47 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2a822d30e73f..a6a7becb9465 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -88,6 +88,51 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
return 0;
}
+/* Is IO overwriting allocated and initialized blocks? */
+static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+{
+ struct ext4_map_blocks map;
+ unsigned int blkbits = inode->i_blkbits;
+ int err, blklen;
+
+ if (pos + len > i_size_read(inode))
+ return false;
+
+ map.m_lblk = pos >> blkbits;
+ map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
+ blklen = map.m_len;
+
+ err = ext4_map_blocks(NULL, inode, &map, 0);
+ /*
+ * 'err==len' means that all of blocks has been preallocated no matter
+ * they are initialized or not. For excluding unwritten extents, we
+ * need to check m_flags.
+ */
+ return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+}
+
+static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ return ret;
+ /*
+ * If we have encountered a bitmap-format file, the size limit
+ * is smaller than s_maxbytes, which is for extent-mapped files.
+ */
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
+ return -EFBIG;
+ iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
+ }
+ return iov_iter_count(from);
+}
+
static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
@@ -98,7 +143,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
inode_lock(inode);
- ret = generic_write_checks(iocb, from);
+ ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -114,53 +159,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_unwritten_wait(inode);
}
- /*
- * If we have encountered a bitmap-format file, the size limit
- * is smaller than s_maxbytes, which is for extent-mapped files.
- */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-
- if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
- ret = -EFBIG;
- goto out;
- }
- iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
- }
-
iocb->private = &overwrite;
- if (o_direct) {
- size_t length = iov_iter_count(from);
- loff_t pos = iocb->ki_pos;
-
- /* check whether we do a DIO overwrite or not */
- if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- pos + length <= i_size_read(inode)) {
- struct ext4_map_blocks map;
- unsigned int blkbits = inode->i_blkbits;
- int err, len;
-
- map.m_lblk = pos >> blkbits;
- map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits);
- len = map.m_len;
-
- err = ext4_map_blocks(NULL, inode, &map, 0);
- /*
- * 'err==len' means that all of blocks has
- * been preallocated no matter they are
- * initialized or not. For excluding
- * unwritten extents, we need to check
- * m_flags. There are two conditions that
- * indicate for initialized extents. 1) If we
- * hit extent cache, EXT4_MAP_MAPPED flag is
- * returned; 2) If we do a real lookup,
- * non-flags are returned. So we should check
- * these two conditions.
- */
- if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
- overwrite = 1;
- }
- }
+ /* Check whether we do a DIO overwrite or not */
+ if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
+ ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
+ overwrite = 1;
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter()
2016-11-01 21:06 ` [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter() Jan Kara
@ 2016-11-03 21:04 ` Ross Zwisler
[not found] ` <20161103210440.GA27351-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
0 siblings, 1 reply; 28+ messages in thread
From: Ross Zwisler @ 2016-11-03 21:04 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-ext4, linux-fsdevel, Dave Chinner, Ted Tso, linux-nvdimm
On Tue, Nov 01, 2016 at 10:06:11PM +0100, Jan Kara wrote:
> Factor out checks of 'from' and whether we are overwriting out of
> ext4_file_write_iter() so that the function is easier to follow.
>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
> fs/ext4/file.c | 97 ++++++++++++++++++++++++++++++----------------------------
> 1 file changed, 50 insertions(+), 47 deletions(-)
>
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 2a822d30e73f..a6a7becb9465 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -88,6 +88,51 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
> return 0;
> }
>
> +/* Is IO overwriting allocated and initialized blocks? */
> +static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
> +{
> + struct ext4_map_blocks map;
> + unsigned int blkbits = inode->i_blkbits;
> + int err, blklen;
> +
> + if (pos + len > i_size_read(inode))
> + return false;
> +
> + map.m_lblk = pos >> blkbits;
> + map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
> + blklen = map.m_len;
> +
> + err = ext4_map_blocks(NULL, inode, &map, 0);
> + /*
> + * 'err==len' means that all of blocks has been preallocated no matter
> + * they are initialized or not. For excluding unwritten extents, we
> + * need to check m_flags.
Does it make sense to clean up this comment, since you're moving it anyway?
i.e.:
'err==len' means that all of the blocks have been preallocated, regardless
of whether they have been initialized or not. To exclude unwritten extents
we need to check m_flags.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 03/11] ext4: Let S_DAX set only if DAX is really supported
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
2016-11-01 21:06 ` [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter() Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 05/11] ext4: Use iomap for zeroing blocks in DAX mode Jan Kara
` (6 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Currently we have S_DAX set inode->i_flags for a regular file whenever
ext4 is mounted with dax mount option. However in some cases we cannot
really do DAX - e.g. when inode is marked to use data journalling, when
inode data is being encrypted, or when inode is stored inline. Make sure
S_DAX flag is appropriately set/cleared in these cases.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/inline.c | 10 ++++++++++
fs/ext4/inode.c | 9 ++++++++-
fs/ext4/super.c | 5 +++++
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f74d5ee2cdec..c29678965c3c 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -299,6 +299,11 @@ static int ext4_create_inline_data(handle_t *handle,
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+ /*
+ * Propagate changes to inode->i_flags as well - e.g. S_DAX may
+ * get cleared
+ */
+ ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -442,6 +447,11 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+ /*
+ * Propagate changes to inode->i_flags as well - e.g. S_DAX may
+ * get set.
+ */
+ ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9eb1f89201ed..4cbd1b24c237 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4350,7 +4350,9 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
+ !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
+ !ext4_encrypted_inode(inode))
new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -5618,6 +5620,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
+ /*
+ * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
+ * E.g. S_DAX may get cleared / set.
+ */
+ ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6db81fbcbaa6..c83d6f1cfab8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1125,6 +1125,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
+ /*
+ * Update inode->i_flags - e.g. S_DAX may get disabled
+ */
+ ext4_set_inode_flags(inode);
}
return res;
}
@@ -1139,6 +1143,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+ /* Update inode->i_flags - e.g. S_DAX may get disabled */
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 05/11] ext4: Use iomap for zeroing blocks in DAX mode
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
2016-11-01 21:06 ` [PATCH 01/11] ext4: Factor out checks from ext4_file_write_iter() Jan Kara
2016-11-01 21:06 ` [PATCH 03/11] ext4: Let S_DAX set only if DAX is really supported Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 06/11] ext4: DAX iomap write support Jan Kara
` (5 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Use iomap infrastructure for zeroing blocks when in DAX mode.
ext4_iomap_begin() handles read requests just fine and that's all that
is needed for iomap_zero_range().
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/inode.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cc10145ea98b..ac26a390f14c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3844,8 +3844,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
if (length > max || length < 0)
length = max;
- if (IS_DAX(inode))
- return dax_zero_page_range(inode, from, length, ext4_get_block);
+ if (IS_DAX(inode)) {
+ return iomap_zero_range(inode, from, length, NULL,
+ &ext4_iomap_ops);
+ }
return __ext4_block_zero_page_range(handle, mapping, from, length);
}
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 06/11] ext4: DAX iomap write support
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (2 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 05/11] ext4: Use iomap for zeroing blocks in DAX mode Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 07/11] ext4: Avoid split extents for DAX writes Jan Kara
` (4 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Implement DAX writes using the new iomap infrastructure instead of
overloading the direct IO path.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/file.c | 39 ++++++++++++++++++++++--
fs/ext4/inode.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 125 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 28ebc2418dc2..d7ab0e90d1b8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -172,6 +172,39 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
}
static ssize_t
+ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+ bool overwrite = false;
+
+ inode_lock(inode);
+ ret = ext4_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+ ret = file_remove_privs(iocb->ki_filp);
+ if (ret)
+ goto out;
+ ret = file_update_time(iocb->ki_filp);
+ if (ret)
+ goto out;
+
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ overwrite = true;
+ downgrade_write(&inode->i_rwsem);
+ }
+ ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+out:
+ if (!overwrite)
+ inode_unlock(inode);
+ else
+ inode_unlock_shared(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+
+static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
@@ -180,6 +213,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
int overwrite = 0;
ssize_t ret;
+ if (IS_DAX(inode))
+ return ext4_dax_write_iter(iocb, from);
+
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
@@ -199,8 +235,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (((o_direct && !unaligned_aio) || IS_DAX(inode)) &&
- ext4_should_dioread_nolock(inode) &&
+ if ((o_direct && !unaligned_aio) && ext4_should_dioread_nolock(inode) &&
ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
overwrite = 1;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ac26a390f14c..d07d003ebce2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3316,18 +3316,62 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct ext4_map_blocks map;
int ret;
- if (flags & IOMAP_WRITE)
- return -EIO;
-
if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
return -ERANGE;
map.m_lblk = first_block;
map.m_len = last_block - first_block + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0)
- return ret;
+ if (!(flags & IOMAP_WRITE)) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ } else {
+ int dio_credits;
+ handle_t *handle;
+ int retries = 0;
+
+ /* Trim mapping request to maximum we can map at once for DIO */
+ if (map.m_len > DIO_MAX_BLOCKS)
+ map.m_len = DIO_MAX_BLOCKS;
+ dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
+ /*
+ * Either we allocate blocks and then we don't get unwritten
+ * extent so we have reserved enough credits, or the blocks
+ * are already allocated and unwritten and in that case
+ * extent conversion fits in the credits as well.
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+ dio_credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ret = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_PRE_IO |
+ EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (ret < 0) {
+ ext4_journal_stop(handle);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+ return ret;
+ }
+ /* For DAX writes we need to zero out unwritten extents */
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ /*
+ * We are protected by i_mmap_sem or i_rwsem so we know
+ * block cannot go away from under us even though we
+ * dropped i_data_sem. Convert extent to written and
+ * write zeros there.
+ */
+ ret = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CONVERT |
+ EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (ret < 0) {
+ ext4_journal_stop(handle);
+ return ret;
+ }
+ }
+ }
iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev;
@@ -3355,8 +3399,46 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return 0;
}
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
+{
+ if (flags & IOMAP_WRITE) {
+ handle_t *handle = ext4_journal_current_handle();
+ ext4_lblk_t written_blk, end_blk;
+ int blkbits = inode->i_blkbits;
+ bool truncate = false;
+
+ if (ext4_update_inode_size(inode, offset + written))
+ ext4_mark_inode_dirty(handle, inode);
+ written_blk = (offset + written) >> blkbits;
+ end_blk = (offset + length) >> blkbits;
+ /*
+ * We may need to truncate allocated but not written blocks
+ * beyond EOF.
+ */
+ if (written_blk < end_blk && offset + length > inode->i_size &&
+ ext4_can_truncate(inode)) {
+ ext4_orphan_add(handle, inode);
+ truncate = true;
+ }
+ ext4_journal_stop(handle);
+ if (truncate) {
+ ext4_truncate_failed_write(inode);
+ /*
+ * If truncate failed early the inode might still be
+ * on the orphan list; we need to make sure the inode
+ * is removed from the orphan list in that case.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
+ }
+ }
+ return 0;
+}
+
struct iomap_ops ext4_iomap_ops = {
.iomap_begin = ext4_iomap_begin,
+ .iomap_end = ext4_iomap_end,
};
#else
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 07/11] ext4: Avoid split extents for DAX writes
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (3 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 06/11] ext4: DAX iomap write support Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 08/11] ext4: Convert DAX faults to iomap infrastructure Jan Kara
` (3 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Currently mapping of blocks for DAX writes happen with
EXT4_GET_BLOCKS_PRE_IO flag set. That has a result that each
ext4_map_blocks() call creates a separate written extent, although it
could be merged to the neighboring extents in the extent tree. The
reason for using this flag is that in case the extent is unwritten, we
need to convert it to written one and zero it out. However this "convert
mapped range to written" operation is already implemented by
ext4_map_blocks() for the case of data writes into unwritten extent. So
just use flags for that mode of operation, simplify the code, and avoid
unnecessary split extents.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/inode.c | 17 -----------------
1 file changed, 17 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d07d003ebce2..635518dde20e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3346,7 +3346,6 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return PTR_ERR(handle);
ret = ext4_map_blocks(handle, inode, &map,
- EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_CREATE_ZERO);
if (ret < 0) {
ext4_journal_stop(handle);
@@ -3355,22 +3354,6 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
goto retry;
return ret;
}
- /* For DAX writes we need to zero out unwritten extents */
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- /*
- * We are protected by i_mmap_sem or i_rwsem so we know
- * block cannot go away from under us even though we
- * dropped i_data_sem. Convert extent to written and
- * write zeros there.
- */
- ret = ext4_map_blocks(handle, inode, &map,
- EXT4_GET_BLOCKS_CONVERT |
- EXT4_GET_BLOCKS_CREATE_ZERO);
- if (ret < 0) {
- ext4_journal_stop(handle);
- return ret;
- }
- }
}
iomap->flags = 0;
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 08/11] ext4: Convert DAX faults to iomap infrastructure
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (4 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 07/11] ext4: Avoid split extents for DAX writes Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-02 14:30 ` Christoph Hellwig
2016-11-01 21:06 ` [PATCH 09/11] ext4: Rip out DAX handling from direct IO path Jan Kara
` (2 subsequent siblings)
8 siblings, 1 reply; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Convert DAX faults to use iomap infrastructure. We would not have to start
transaction in ext4_dax_fault() anymore since ext4_iomap_begin takes
care of that but so far we do that to avoid lock inversion of
transaction start with DAX entry lock which gets acquired in
dax_iomap_fault() before calling ->iomap_begin handler.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/ext4.h | 1 +
fs/ext4/file.c | 9 +++++----
fs/ext4/inode.c | 18 ++++++++++++++++++
3 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 098b39910001..2714eb6174ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3272,6 +3272,7 @@ static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len)
}
extern struct iomap_ops ext4_iomap_ops;
+extern struct iomap_ops ext4_iomap_fault_ops;
#endif /* __KERNEL__ */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d7ab0e90d1b8..da44e49c8276 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -273,7 +273,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = dax_fault(vma, vmf, ext4_dax_get_block);
+ result = dax_iomap_fault(vma, vmf, &ext4_iomap_fault_ops);
if (write) {
if (!IS_ERR(handle))
@@ -307,9 +307,10 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
- else
- result = dax_pmd_fault(vma, addr, pmd, flags,
- ext4_dax_get_block);
+ else {
+ result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
+ &ext4_iomap_fault_ops);
+ }
if (write) {
if (!IS_ERR(handle))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 635518dde20e..001fef06ea97 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3419,11 +3419,29 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
return 0;
}
+/*
+ * For faults we don't allocate any blocks outside of isize and we don't want
+ * to change it so we use a dedicated function for it...
+ */
+static int ext4_iomap_fault_end(struct inode *inode, loff_t offset,
+ loff_t length, ssize_t written, unsigned flags,
+ struct iomap *iomap)
+{
+ if (flags & IOMAP_WRITE)
+ ext4_journal_stop(ext4_journal_current_handle());
+ return 0;
+}
+
struct iomap_ops ext4_iomap_ops = {
.iomap_begin = ext4_iomap_begin,
.iomap_end = ext4_iomap_end,
};
+struct iomap_ops ext4_iomap_fault_ops = {
+ .iomap_begin = ext4_iomap_begin,
+ .iomap_end = ext4_iomap_fault_end,
+};
+
#else
/* Just define empty function, it will never get called. */
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 09/11] ext4: Rip out DAX handling from direct IO path
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (5 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 08/11] ext4: Convert DAX faults to iomap infrastructure Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 10/11] ext2: Use iomap_zero_range() for zeroing truncated page in DAX path Jan Kara
2016-11-01 21:06 ` [PATCH 11/11] dax: Rip out get_block based IO support Jan Kara
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Reads and writes for DAX inodes should no longer end up in direct IO
code. Rip out the support and add a warning.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext4/inode.c | 49 +++++++++++++++----------------------------------
1 file changed, 15 insertions(+), 34 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 001fef06ea97..e236b84fc079 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3571,19 +3571,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
iocb->private = NULL;
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
- else if (IS_DAX(inode)) {
- /*
- * We can avoid zeroing for aligned DAX writes beyond EOF. Other
- * writes need zeroing either because they can race with page
- * faults or because they use partial blocks.
- */
- if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
- ext4_aligned_io(inode, offset, count))
- get_block_func = ext4_dio_get_block;
- else
- get_block_func = ext4_dax_get_block;
- dio_flags = DIO_LOCKING;
- } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3597,14 +3585,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
#ifdef CONFIG_EXT4_FS_ENCRYPTION
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, get_block_func,
- ext4_end_io_dio, dio_flags);
- } else
- ret = __blockdev_direct_IO(iocb, inode,
- inode->i_sb->s_bdev, iter,
- get_block_func,
- ext4_end_io_dio, NULL, dio_flags);
+ ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ get_block_func, ext4_end_io_dio, NULL,
+ dio_flags);
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3673,6 +3656,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
ssize_t ret;
/*
@@ -3681,19 +3665,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
* we are protected against page writeback as well.
*/
inode_lock_shared(inode);
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
- } else {
- size_t count = iov_iter_count(iter);
-
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
- if (ret)
- goto out_unlock;
- ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, ext4_dio_get_block,
- NULL, NULL, 0);
- }
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ iocb->ki_pos + count);
+ if (ret)
+ goto out_unlock;
+ ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, ext4_dio_get_block, NULL, NULL, 0);
out_unlock:
inode_unlock_shared(inode);
return ret;
@@ -3722,6 +3699,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ext4_has_inline_data(inode))
return 0;
+ /* DAX uses iomap path now */
+ if (WARN_ON_ONCE(IS_DAX(inode)))
+ return 0;
+
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == READ)
ret = ext4_direct_IO_read(iocb, iter);
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 10/11] ext2: Use iomap_zero_range() for zeroing truncated page in DAX path
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (6 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 09/11] ext4: Rip out DAX handling from direct IO path Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
2016-11-01 21:06 ` [PATCH 11/11] dax: Rip out get_block based IO support Jan Kara
8 siblings, 0 replies; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
Currently the last user of ext2_get_blocks() for DAX inodes was
dax_truncate_page(). Convert that to iomap_zero_range() so that all DAX
IO uses the iomap path.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/ext2/inode.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 335cd1e1f902..b723666124a7 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -846,6 +846,9 @@ struct iomap_ops ext2_iomap_ops = {
.iomap_begin = ext2_iomap_begin,
.iomap_end = ext2_iomap_end,
};
+#else
+/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
+struct iomap_ops ext2_iomap_ops;
#endif /* CONFIG_FS_DAX */
int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -1289,9 +1292,11 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
- if (IS_DAX(inode))
- error = dax_truncate_page(inode, newsize, ext2_get_block);
- else if (test_opt(inode->i_sb, NOBH))
+ if (IS_DAX(inode)) {
+ error = iomap_zero_range(inode, newsize,
+ PAGE_ALIGN(newsize) - newsize, NULL,
+ &ext2_iomap_ops);
+ } else if (test_opt(inode->i_sb, NOBH))
error = nobh_truncate_page(inode->i_mapping,
newsize, ext2_get_block);
else
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 11/11] dax: Rip out get_block based IO support
[not found] ` <1478034381-19037-1-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
` (7 preceding siblings ...)
2016-11-01 21:06 ` [PATCH 10/11] ext2: Use iomap_zero_range() for zeroing truncated page in DAX path Jan Kara
@ 2016-11-01 21:06 ` Jan Kara
[not found] ` <1478034381-19037-12-git-send-email-jack-AlSwsSmVLrQ@public.gmane.org>
8 siblings, 1 reply; 28+ messages in thread
From: Jan Kara @ 2016-11-01 21:06 UTC (permalink / raw)
To: linux-ext4-u79uwXL29TY76Z2rM5mHXA
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Dave Chinner, Jan Kara,
Ted Tso, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
No one uses functions using the get_block callback anymore. Rip them
out.
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
fs/dax.c | 315 ----------------------------------------------------
include/linux/dax.h | 12 --
2 files changed, 327 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 1fdf4c091371..380ca8547e35 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -116,168 +116,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
return page;
}
-static bool buffer_written(struct buffer_head *bh)
-{
- return buffer_mapped(bh) && !buffer_unwritten(bh);
-}
-
-static sector_t to_sector(const struct buffer_head *bh,
- const struct inode *inode)
-{
- sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
-
- return sector;
-}
-
-static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
- loff_t start, loff_t end, get_block_t get_block,
- struct buffer_head *bh)
-{
- loff_t pos = start, max = start, bh_max = start;
- bool hole = false;
- struct block_device *bdev = NULL;
- int rw = iov_iter_rw(iter), rc;
- long map_len = 0;
- struct blk_dax_ctl dax = {
- .addr = ERR_PTR(-EIO),
- };
- unsigned blkbits = inode->i_blkbits;
- sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
- >> blkbits;
-
- if (rw == READ)
- end = min(end, i_size_read(inode));
-
- while (pos < end) {
- size_t len;
- if (pos == max) {
- long page = pos >> PAGE_SHIFT;
- sector_t block = page << (PAGE_SHIFT - blkbits);
- unsigned first = pos - (block << blkbits);
- long size;
-
- if (pos == bh_max) {
- bh->b_size = PAGE_ALIGN(end - pos);
- bh->b_state = 0;
- rc = get_block(inode, block, bh, rw == WRITE);
- if (rc)
- break;
- bh_max = pos - first + bh->b_size;
- bdev = bh->b_bdev;
- /*
- * We allow uninitialized buffers for writes
- * beyond EOF as those cannot race with faults
- */
- WARN_ON_ONCE(
- (buffer_new(bh) && block < file_blks) ||
- (rw == WRITE && buffer_unwritten(bh)));
- } else {
- unsigned done = bh->b_size -
- (bh_max - (pos - first));
- bh->b_blocknr += done >> blkbits;
- bh->b_size -= done;
- }
-
- hole = rw == READ && !buffer_written(bh);
- if (hole) {
- size = bh->b_size - first;
- } else {
- dax_unmap_atomic(bdev, &dax);
- dax.sector = to_sector(bh, inode);
- dax.size = bh->b_size;
- map_len = dax_map_atomic(bdev, &dax);
- if (map_len < 0) {
- rc = map_len;
- break;
- }
- dax.addr += first;
- size = map_len - first;
- }
- /*
- * pos + size is one past the last offset for IO,
- * so pos + size can overflow loff_t at extreme offsets.
- * Cast to u64 to catch this and get the true minimum.
- */
- max = min_t(u64, pos + size, end);
- }
-
- if (iov_iter_rw(iter) == WRITE) {
- len = copy_from_iter_pmem(dax.addr, max - pos, iter);
- } else if (!hole)
- len = copy_to_iter((void __force *) dax.addr, max - pos,
- iter);
- else
- len = iov_iter_zero(max - pos, iter);
-
- if (!len) {
- rc = -EFAULT;
- break;
- }
-
- pos += len;
- if (!IS_ERR(dax.addr))
- dax.addr += len;
- }
-
- dax_unmap_atomic(bdev, &dax);
-
- return (pos == start) ? rc : pos - start;
-}
-
-/**
- * dax_do_io - Perform I/O to a DAX file
- * @iocb: The control block for this I/O
- * @inode: The file which the I/O is directed at
- * @iter: The addresses to do I/O from or to
- * @get_block: The filesystem method used to translate file offsets to blocks
- * @end_io: A filesystem callback for I/O completion
- * @flags: See below
- *
- * This function uses the same locking scheme as do_blockdev_direct_IO:
- * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
- * caller for writes. For reads, we take and release the i_mutex ourselves.
- * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
- * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
- * is in progress.
- */
-ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
- struct iov_iter *iter, get_block_t get_block,
- dio_iodone_t end_io, int flags)
-{
- struct buffer_head bh;
- ssize_t retval = -EINVAL;
- loff_t pos = iocb->ki_pos;
- loff_t end = pos + iov_iter_count(iter);
-
- memset(&bh, 0, sizeof(bh));
- bh.b_bdev = inode->i_sb->s_bdev;
-
- if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
- inode_lock(inode);
-
- /* Protects against truncate */
- if (!(flags & DIO_SKIP_DIO_COUNT))
- inode_dio_begin(inode);
-
- retval = dax_io(inode, iter, pos, end, get_block, &bh);
-
- if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
- inode_unlock(inode);
-
- if (end_io) {
- int err;
-
- err = end_io(iocb, pos, retval, bh.b_private);
- if (err)
- retval = err;
- }
-
- if (!(flags & DIO_SKIP_DIO_COUNT))
- inode_dio_end(inode);
- return retval;
-}
-EXPORT_SYMBOL_GPL(dax_do_io);
-
/*
* DAX radix tree locking
*/
@@ -920,105 +758,6 @@ static int dax_insert_mapping(struct address_space *mapping,
}
/**
- * dax_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * When a page fault occurs, filesystems may call this helper in their
- * fault handler for DAX files. dax_fault() assumes the caller has done all
- * the necessary locking for the page fault to proceed successfully.
- */
-int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
-{
- struct file *file = vma->vm_file;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- void *entry;
- struct buffer_head bh;
- unsigned long vaddr = (unsigned long)vmf->virtual_address;
- unsigned blkbits = inode->i_blkbits;
- sector_t block;
- pgoff_t size;
- int error;
- int major = 0;
-
- /*
- * Check whether offset isn't beyond end of file now. Caller is supposed
- * to hold locks serializing us with truncate / punch hole so this is
- * a reliable test.
- */
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- return VM_FAULT_SIGBUS;
-
- memset(&bh, 0, sizeof(bh));
- block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
- bh.b_bdev = inode->i_sb->s_bdev;
- bh.b_size = PAGE_SIZE;
-
- entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
- if (IS_ERR(entry)) {
- error = PTR_ERR(entry);
- goto out;
- }
-
- error = get_block(inode, block, &bh, 0);
- if (!error && (bh.b_size < PAGE_SIZE))
- error = -EIO; /* fs corruption? */
- if (error)
- goto unlock_entry;
-
- if (vmf->cow_page) {
- struct page *new_page = vmf->cow_page;
- if (buffer_written(&bh))
- error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
- bh.b_size, new_page, vaddr);
- else
- clear_user_highpage(new_page, vaddr);
- if (error)
- goto unlock_entry;
- if (!radix_tree_exceptional_entry(entry)) {
- vmf->page = entry;
- return VM_FAULT_LOCKED;
- }
- vmf->entry = entry;
- return VM_FAULT_DAX_LOCKED;
- }
-
- if (!buffer_mapped(&bh)) {
- if (vmf->flags & FAULT_FLAG_WRITE) {
- error = get_block(inode, block, &bh, 1);
- count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
- major = VM_FAULT_MAJOR;
- if (!error && (bh.b_size < PAGE_SIZE))
- error = -EIO;
- if (error)
- goto unlock_entry;
- } else {
- return dax_load_hole(mapping, entry, vmf);
- }
- }
-
- /* Filesystem should not return unwritten buffers to us! */
- WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
- error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
- bh.b_size, &entry, vma, vmf);
- unlock_entry:
- put_locked_mapping_entry(mapping, vmf->pgoff, entry);
- out:
- if (error == -ENOMEM)
- return VM_FAULT_OOM | major;
- /* -EBUSY is fine, somebody else faulted on the same PTE */
- if ((error < 0) && (error != -EBUSY))
- return VM_FAULT_SIGBUS | major;
- return VM_FAULT_NOPAGE | major;
-}
-EXPORT_SYMBOL_GPL(dax_fault);
-
-/**
* dax_pfn_mkwrite - handle first write to DAX page
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
@@ -1078,60 +817,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
-/**
- * dax_zero_page_range - zero a range within a page of a DAX file
- * @inode: The file being truncated
- * @from: The file offset that is being truncated to
- * @length: The number of bytes to zero
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * This function can be called by a filesystem when it is zeroing part of a
- * page in a DAX file. This is intended for hole-punch operations. If
- * you are truncating a file, the helper function dax_truncate_page() may be
- * more convenient.
- */
-int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
- get_block_t get_block)
-{
- struct buffer_head bh;
- pgoff_t index = from >> PAGE_SHIFT;
- unsigned offset = from & (PAGE_SIZE-1);
- int err;
-
- /* Block boundary? Nothing to do */
- if (!length)
- return 0;
- if (WARN_ON_ONCE((offset + length) > PAGE_SIZE))
- return -EINVAL;
-
- memset(&bh, 0, sizeof(bh));
- bh.b_bdev = inode->i_sb->s_bdev;
- bh.b_size = PAGE_SIZE;
- err = get_block(inode, index, &bh, 0);
- if (err < 0 || !buffer_written(&bh))
- return err;
-
- return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode),
- offset, length);
-}
-EXPORT_SYMBOL_GPL(dax_zero_page_range);
-
-/**
- * dax_truncate_page - handle a partial page being truncated in a DAX file
- * @inode: The file being truncated
- * @from: The file offset that is being truncated to
- * @get_block: The filesystem method used to translate file offsets to blocks
- *
- * Similar to block_truncate_page(), this function can be called by a
- * filesystem when it is truncating a DAX file to handle the partial page.
- */
-int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
-{
- unsigned length = PAGE_ALIGN(from) - from;
- return dax_zero_page_range(inode, from, length, get_block);
-}
-EXPORT_SYMBOL_GPL(dax_truncate_page);
-
#ifdef CONFIG_FS_IOMAP
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8d1a5c47945f..0afade8bd3d7 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,13 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops);
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
- get_block_t, dio_iodone_t, int flags);
-int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
-int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops);
-int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all);
@@ -73,12 +68,6 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
}
#endif
-static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, unsigned int flags, get_block_t gb)
-{
- return VM_FAULT_FALLBACK;
-}
-
#ifdef CONFIG_FS_DAX_PMD
static inline unsigned int dax_radix_order(void *entry)
{
@@ -101,7 +90,6 @@ static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
}
#endif
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
-#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
--
2.6.6
^ permalink raw reply related [flat|nested] 28+ messages in thread