* [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO
@ 2014-10-11 12:29 WeiWei Wang
2014-10-15 23:42 ` Andrew Morton
2014-10-22 11:58 ` Joseph Qi
0 siblings, 2 replies; 4+ messages in thread
From: WeiWei Wang @ 2014-10-11 12:29 UTC (permalink / raw)
To: ocfs2-devel
Add the inode to orphan dir first, and then delete it once append
O_DIRECT finished.
This is to make sure block allocation and inode size are consistent.
Signed-off-by: Weiwei Wang <wangww631@huawei.com>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
---
fs/ocfs2/aops.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 172 insertions(+), 6 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4a231a1..81d95e1 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -28,6 +28,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
+#include <linux/blkdev.h>
#include <cluster/masklog.h>
@@ -47,6 +48,9 @@
#include "ocfs2_trace.h"
#include "buffer_head_io.h"
+#include "dir.h"
+#include "namei.h"
+#include "sysfile.h"
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -597,6 +601,159 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static int ocfs2_is_overwrite(struct ocfs2_super *osb,
+ struct inode *inode , loff_t offset)
+{
+ ssize_t ret = 0;
+ u32 v_cpos = 0;
+ u32 p_cpos = 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
+ return 1;
+
+ return 0;
+}
+
+static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ ssize_t ret = 0;
+ int orphan = 0;
+ int is_overwrite = 0;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ size_t count = iter->count;
+ journal_t *journal = osb->journal->j_journal;
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
+ u32 zero_len = offset % (1 << osb->s_clustersize_bits);
+ int cluster_align = offset % (1 << osb->s_clustersize_bits) ? 0 : 1;
+ int append_write = offset >= i_size_read(inode) ? 1 : 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ loff_t final_size = offset + count;
+ /*
+ * when final_size > inode->i_size, inode->i_size will be
+ * updated after direct write, so add the inode to orphan
+ * dir first.
+ */
+ if (final_size > i_size_read(inode)) {
+ ret = ocfs2_add_inode_to_orphan(osb, inode);
+ if (ret < 0)
+ goto out;
+ orphan = 1;
+ }
+
+ if (append_write) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, offset);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh,
+ offset, offset);
+ if (ret < 0) {
+ mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
+ if (is_overwrite < 0) {
+ mlog_errno(is_overwrite);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ }
+
+ ret = blockdev_direct_IO(WRITE, iocb,
+ inode, iter, offset,
+ ocfs2_direct_IO_get_blocks);
+ if (unlikely(ret < 0)) {
+ loff_t isize = i_size_read(inode);
+
+ if (offset + count > isize) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+ if (isize == i_size_read(inode)) {
+ ret = ocfs2_truncate_file(inode, di_bh,
+ isize);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ }
+ }
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+ ret = jbd2_journal_force_commit(journal);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+ } else if (append_write && !is_overwrite && !cluster_align) {
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ p_cpos << (osb->s_clustersize_bits - 9),
+ zero_len >> 9,
+ GFP_KERNEL);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+clean_orphan:
+ if (orphan) {
+ int update_isize = ret > 0 ? 1 : 0;
+ loff_t end = update_isize ? (offset + ret) : 0;
+
+ ret = ocfs2_del_inode_from_orphan(osb, inode,
+ update_isize, end);
+ if (ret < 0)
+ goto out;
+
+ ret = jbd2_journal_force_commit(journal);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+out:
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO(int rw,
struct kiocb *iocb,
struct iov_iter *iter,
@@ -604,6 +761,9 @@ static ssize_t ocfs2_direct_IO(int rw,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int full_coherency = !(osb->s_mount_opt &
+ OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* Fallback to buffered I/O if we see an inode without
@@ -612,14 +772,20 @@ static ssize_t ocfs2_direct_IO(int rw,
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
- /* Fallback to buffered I/O if we are appending. */
- if (i_size_read(inode) <= offset)
+ /* Fallback to buffered I/O if we are appending and
+ * concurrent O_DIRECT writes are allowed.
+ */
+ if (i_size_read(inode) <= offset && !full_coherency)
return 0;
- return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
- iter, offset,
- ocfs2_direct_IO_get_blocks,
- ocfs2_dio_end_io, NULL, 0);
+ if (rw == READ)
+ return __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev,
+ iter, offset,
+ ocfs2_direct_IO_get_blocks,
+ ocfs2_dio_end_io, NULL, 0);
+ else
+ return ocfs2_direct_IO_write(iocb, iter, offset);
}
static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
--
1.8.4.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO
2014-10-11 12:29 [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO WeiWei Wang
@ 2014-10-15 23:42 ` Andrew Morton
2014-10-15 23:55 ` Andrew Morton
2014-10-22 11:58 ` Joseph Qi
1 sibling, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2014-10-15 23:42 UTC (permalink / raw)
To: ocfs2-devel
On Sat, 11 Oct 2014 20:29:08 +0800 WeiWei Wang <wangww631@huawei.com> wrote:
> Add the inode to orphan dir first, and then delete it once append
> O_DIRECT finished.
> This is to make sure block allocation and inode size are consistent.
>
> ...
>
> +static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
> + struct iov_iter *iter,
> + loff_t offset)
> +{
> + ssize_t ret = 0;
> + int orphan = 0;
> + int is_overwrite = 0;
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> + struct buffer_head *di_bh = NULL;
> + size_t count = iter->count;
> + journal_t *journal = osb->journal->j_journal;
> + u32 p_cpos = 0;
> + u32 v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
> + u32 zero_len = offset % (1 << osb->s_clustersize_bits);
On i386 this generates a call to _moddi3, which doesn't exist. You'll
need to use do_div() or something similar.
> + int cluster_align = offset % (1 << osb->s_clustersize_bits) ? 0 : 1;
Similar here. Why not just do cluster_align = !!zero_len?
> + int append_write = offset >= i_size_read(inode) ? 1 : 0;
> + unsigned int num_clusters = 0;
> + unsigned int ext_flags = 0;
>
> ...
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO
2014-10-15 23:42 ` Andrew Morton
@ 2014-10-15 23:55 ` Andrew Morton
0 siblings, 0 replies; 4+ messages in thread
From: Andrew Morton @ 2014-10-15 23:55 UTC (permalink / raw)
To: ocfs2-devel
On Wed, 15 Oct 2014 16:42:44 -0700 Andrew Morton <akpm@linux-foundation.org> wrote:
> On Sat, 11 Oct 2014 20:29:08 +0800 WeiWei Wang <wangww631@huawei.com> wrote:
>
> > Add the inode to orphan dir first, and then delete it once append
> > O_DIRECT finished.
> > This is to make sure block allocation and inode size are consistent.
> >
> > ...
> >
> > +static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
> > + struct iov_iter *iter,
> > + loff_t offset)
> > +{
> > + ssize_t ret = 0;
> > + int orphan = 0;
> > + int is_overwrite = 0;
> > + struct file *file = iocb->ki_filp;
> > + struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
> > + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> > + struct buffer_head *di_bh = NULL;
> > + size_t count = iter->count;
> > + journal_t *journal = osb->journal->j_journal;
> > + u32 p_cpos = 0;
> > + u32 v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
> > + u32 zero_len = offset % (1 << osb->s_clustersize_bits);
>
> On i386 this generates a call to _moddi3, which doesn't exist. You'll
> need to use do_div() or something similar.
>
> > + int cluster_align = offset % (1 << osb->s_clustersize_bits) ? 0 : 1;
>
> Similar here. Why not just do cluster_align = !!zero_len?
>
Something like this...
--- a/fs/ocfs2/aops.c~ocfs2-add-and-remove-inode-in-orphan-dir-in-ocfs2_direct_io-fix
+++ a/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
#include <linux/mpage.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
+#include <linux/math64.h>
#include <cluster/masklog.h>
@@ -640,13 +641,20 @@ static ssize_t ocfs2_direct_IO_write(str
journal_t *journal = osb->journal->j_journal;
u32 p_cpos = 0;
u32 v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
- u32 zero_len = offset % (1 << osb->s_clustersize_bits);
- int cluster_align = offset % (1 << osb->s_clustersize_bits) ? 0 : 1;
+ u32 zero_len;
+ int cluster_align;
+ loff_t final_size = offset + count;
int append_write = offset >= i_size_read(inode) ? 1 : 0;
unsigned int num_clusters = 0;
unsigned int ext_flags = 0;
- loff_t final_size = offset + count;
+ {
+ loff_t o = offset;
+
+ zero_len = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align = !!zero_len;
+ }
+
/*
* when final_size > inode->i_size, inode->i_size will be
* updated after direct write, so add the inode to orphan
_
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO
2014-10-11 12:29 [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO WeiWei Wang
2014-10-15 23:42 ` Andrew Morton
@ 2014-10-22 11:58 ` Joseph Qi
1 sibling, 0 replies; 4+ messages in thread
From: Joseph Qi @ 2014-10-22 11:58 UTC (permalink / raw)
To: ocfs2-devel
This patch has some issues:
1) cannot use ocfs2_clusters_for_bytes to calculate v_cpos, we need the
floor, not roof;
2) the return value (bytes written) of blockdev_direct_IO will be
override, which is not right.
On 2014/10/11 20:29, WeiWei Wang wrote:
> Add the inode to orphan dir first, and then delete it once append
> O_DIRECT finished.
> This is to make sure block allocation and inode size are consistent.
>
> Signed-off-by: Weiwei Wang <wangww631@huawei.com>
> Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
> ---
> fs/ocfs2/aops.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 172 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 4a231a1..81d95e1 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -28,6 +28,7 @@
> #include <linux/pipe_fs_i.h>
> #include <linux/mpage.h>
> #include <linux/quotaops.h>
> +#include <linux/blkdev.h>
>
> #include <cluster/masklog.h>
>
> @@ -47,6 +48,9 @@
> #include "ocfs2_trace.h"
>
> #include "buffer_head_io.h"
> +#include "dir.h"
> +#include "namei.h"
> +#include "sysfile.h"
>
> static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
> struct buffer_head *bh_result, int create)
> @@ -597,6 +601,159 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
> return try_to_free_buffers(page);
> }
>
> +static int ocfs2_is_overwrite(struct ocfs2_super *osb,
> + struct inode *inode , loff_t offset)
> +{
> + ssize_t ret = 0;
> + u32 v_cpos = 0;
> + u32 p_cpos = 0;
> + unsigned int num_clusters = 0;
> + unsigned int ext_flags = 0;
> +
> + v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
> +
> + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
> + &num_clusters, &ext_flags);
> + if (ret < 0) {
> + mlog_errno(ret);
> + return ret;
> + }
> +
> + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
> + return 1;
> +
> + return 0;
> +}
> +
> +static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
> + struct iov_iter *iter,
> + loff_t offset)
> +{
> + ssize_t ret = 0;
> + int orphan = 0;
> + int is_overwrite = 0;
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> + struct buffer_head *di_bh = NULL;
> + size_t count = iter->count;
> + journal_t *journal = osb->journal->j_journal;
> + u32 p_cpos = 0;
> + u32 v_cpos = ocfs2_clusters_for_bytes(osb->sb, offset);
> + u32 zero_len = offset % (1 << osb->s_clustersize_bits);
> + int cluster_align = offset % (1 << osb->s_clustersize_bits) ? 0 : 1;
> + int append_write = offset >= i_size_read(inode) ? 1 : 0;
> + unsigned int num_clusters = 0;
> + unsigned int ext_flags = 0;
> +
> + loff_t final_size = offset + count;
> + /*
> + * when final_size > inode->i_size, inode->i_size will be
> + * updated after direct write, so add the inode to orphan
> + * dir first.
> + */
> + if (final_size > i_size_read(inode)) {
> + ret = ocfs2_add_inode_to_orphan(osb, inode);
> + if (ret < 0)
> + goto out;
> + orphan = 1;
> + }
> +
> + if (append_write) {
> + ret = ocfs2_inode_lock(inode, &di_bh, 1);
> + if (ret < 0) {
> + mlog_errno(ret);
> + goto clean_orphan;
> + }
> +
> + if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
> + ret = ocfs2_zero_extend(inode, di_bh, offset);
> + else
> + ret = ocfs2_extend_no_holes(inode, di_bh,
> + offset, offset);
> + if (ret < 0) {
> + mlog_errno(ret);
> + ocfs2_inode_unlock(inode, 1);
> + brelse(di_bh);
> + goto clean_orphan;
> + }
> +
> + is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
> + if (is_overwrite < 0) {
> + mlog_errno(is_overwrite);
> + ocfs2_inode_unlock(inode, 1);
> + brelse(di_bh);
> + goto clean_orphan;
> + }
> +
> + ocfs2_inode_unlock(inode, 1);
> + brelse(di_bh);
> + }
> +
> + ret = blockdev_direct_IO(WRITE, iocb,
> + inode, iter, offset,
> + ocfs2_direct_IO_get_blocks);
> + if (unlikely(ret < 0)) {
> + loff_t isize = i_size_read(inode);
> +
> + if (offset + count > isize) {
> + ret = ocfs2_inode_lock(inode, &di_bh, 1);
> + if (ret < 0) {
> + mlog_errno(ret);
> + goto clean_orphan;
> + }
> + if (isize == i_size_read(inode)) {
> + ret = ocfs2_truncate_file(inode, di_bh,
> + isize);
> + if (ret < 0) {
> + if (ret != -ENOSPC)
> + mlog_errno(ret);
> + }
> + }
> + ocfs2_inode_unlock(inode, 1);
> + brelse(di_bh);
> +
> + ret = jbd2_journal_force_commit(journal);
> + if (ret < 0)
> + mlog_errno(ret);
> + }
> + } else if (append_write && !is_overwrite && !cluster_align) {
> + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
> + &num_clusters, &ext_flags);
> + if (ret < 0) {
> + mlog_errno(ret);
> + goto clean_orphan;
> + }
> +
> + BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
> +
> + ret = blkdev_issue_zeroout(osb->sb->s_bdev,
> + p_cpos << (osb->s_clustersize_bits - 9),
> + zero_len >> 9,
> + GFP_KERNEL);
> + if (ret < 0)
> + mlog_errno(ret);
> + }
> +
> +clean_orphan:
> + if (orphan) {
> + int update_isize = ret > 0 ? 1 : 0;
> + loff_t end = update_isize ? (offset + ret) : 0;
> +
> + ret = ocfs2_del_inode_from_orphan(osb, inode,
> + update_isize, end);
> + if (ret < 0)
> + goto out;
> +
> + ret = jbd2_journal_force_commit(journal);
> + if (ret < 0)
> + mlog_errno(ret);
> + }
> +
> +out:
> + return ret;
> +}
> +
> static ssize_t ocfs2_direct_IO(int rw,
> struct kiocb *iocb,
> struct iov_iter *iter,
> @@ -604,6 +761,9 @@ static ssize_t ocfs2_direct_IO(int rw,
> {
> struct file *file = iocb->ki_filp;
> struct inode *inode = file_inode(file)->i_mapping->host;
> + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> + int full_coherency = !(osb->s_mount_opt &
> + OCFS2_MOUNT_COHERENCY_BUFFERED);
>
> /*
> * Fallback to buffered I/O if we see an inode without
> @@ -612,14 +772,20 @@ static ssize_t ocfs2_direct_IO(int rw,
> if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
> return 0;
>
> - /* Fallback to buffered I/O if we are appending. */
> - if (i_size_read(inode) <= offset)
> + /* Fallback to buffered I/O if we are appending and
> + * concurrent O_DIRECT writes are allowed.
> + */
> + if (i_size_read(inode) <= offset && !full_coherency)
> return 0;
>
> - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
> - iter, offset,
> - ocfs2_direct_IO_get_blocks,
> - ocfs2_dio_end_io, NULL, 0);
> + if (rw == READ)
> + return __blockdev_direct_IO(rw, iocb, inode,
> + inode->i_sb->s_bdev,
> + iter, offset,
> + ocfs2_direct_IO_get_blocks,
> + ocfs2_dio_end_io, NULL, 0);
> + else
> + return ocfs2_direct_IO_write(iocb, iter, offset);
> }
>
> static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2014-10-22 11:58 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-11 12:29 [Ocfs2-devel] [PATCH 4/7 v4] ocfs2: add and remove inode in orphan dir in ocfs2_direct_IO WeiWei Wang
2014-10-15 23:42 ` Andrew Morton
2014-10-15 23:55 ` Andrew Morton
2014-10-22 11:58 ` Joseph Qi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.