From: alex chen <alex.chen@huawei.com>
To: ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [patch 11/11] ocfs2: nowait aio support
Date: Sat, 16 Dec 2017 16:37:49 +0800 [thread overview]
Message-ID: <5A34DB5D.7020800@huawei.com> (raw)
In-Reply-To: <5a20852b.Suoho2jIQTKYp+nU%akpm@linux-foundation.org>
Hi Gang,
On 2017/12/1 6:24, akpm at linux-foundation.org wrote:
> From: Gang He <ghe@suse.com>
> Subject: ocfs2: nowait aio support
>
> Return EAGAIN if any of the following checks fail for direct I/O:
>
> - Cannot get the related locks immediately
>
> - Blocks are not allocated at the write location, it will trigger
> block allocation and block IO operations.
>
> [ghe at suse.com: v2]
> Link: https://urldefense.proofpoint.com/v2/url?u=http-3A__lkml.kernel.org_r_1511944612-2D9629-2D4-2Dgit-2Dsend-2Demail-2Dghe-40suse.com&d=DwICAg&c=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE&r=C7gAd4uDxlAvTdc0vmU6X8CMk6L2iDY8-HD0qT6Fo7Y&m=03axuTG8C4n44K1VY4yCWSg99FrknrLMs6_MlBfcfZU&s=bPQ5h9lbd8EtCwlBcfN_UJx6O1Fr6JiZUO7Ak2Igdds&e=
> Link: https://urldefense.proofpoint.com/v2/url?u=http-3A__lkml.kernel.org_r_1511775987-2D841-2D4-2Dgit-2Dsend-2Demail-2Dghe-40suse.com&d=DwICAg&c=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE&r=C7gAd4uDxlAvTdc0vmU6X8CMk6L2iDY8-HD0qT6Fo7Y&m=03axuTG8C4n44K1VY4yCWSg99FrknrLMs6_MlBfcfZU&s=TX3FDWVXDkrHLxSD3HW3GW5igY6jogyX7Sf-OB5WrYk&e=
> Signed-off-by: Gang He <ghe@suse.com>
> Cc: Mark Fasheh <mfasheh@versity.com>
> Cc: Joel Becker <jlbec@evilplan.org>
> Cc: Junxiao Bi <junxiao.bi@oracle.com>
> Cc: Joseph Qi <jiangqi903@gmail.com>
> Cc: Changwei Ge <ge.changwei@h3c.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>
> fs/ocfs2/dir.c | 2
> fs/ocfs2/dlmglue.c | 20 ++++++--
> fs/ocfs2/dlmglue.h | 2
> fs/ocfs2/file.c | 95 ++++++++++++++++++++++++++++++---------
> fs/ocfs2/mmap.c | 2
> fs/ocfs2/ocfs2_trace.h | 10 ++--
> 6 files changed, 99 insertions(+), 32 deletions(-)
>
> diff -puN fs/ocfs2/dir.c~ocfs2-nowait-aio-support fs/ocfs2/dir.c
> --- a/fs/ocfs2/dir.c~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/dir.c
> @@ -1957,7 +1957,7 @@ int ocfs2_readdir(struct file *file, str
>
> trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
>
> - error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
> + error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
> if (lock_level && error >= 0) {
> /* We release EX lock which used to update atime
> * and get PR lock again to reduce contention
> diff -puN fs/ocfs2/dlmglue.c~ocfs2-nowait-aio-support fs/ocfs2/dlmglue.c
> --- a/fs/ocfs2/dlmglue.c~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/dlmglue.c
> @@ -2515,13 +2515,18 @@ int ocfs2_inode_lock_with_page(struct in
>
> int ocfs2_inode_lock_atime(struct inode *inode,
> struct vfsmount *vfsmnt,
> - int *level)
> + int *level, int wait)
> {
> int ret;
>
> - ret = ocfs2_inode_lock(inode, NULL, 0);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, NULL, 0);
> + else
> + ret = ocfs2_try_inode_lock(inode, NULL, 0);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> return ret;
> }
>
> @@ -2533,9 +2538,14 @@ int ocfs2_inode_lock_atime(struct inode
> struct buffer_head *bh = NULL;
>
> ocfs2_inode_unlock(inode, 0);
> - ret = ocfs2_inode_lock(inode, &bh, 1);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, &bh, 1);
> + else
> + ret = ocfs2_try_inode_lock(inode, &bh, 1);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> return ret;
> }
> *level = 1;
> diff -puN fs/ocfs2/dlmglue.h~ocfs2-nowait-aio-support fs/ocfs2/dlmglue.h
> --- a/fs/ocfs2/dlmglue.h~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/dlmglue.h
> @@ -123,7 +123,7 @@ int ocfs2_try_open_lock(struct inode *in
> void ocfs2_open_unlock(struct inode *inode);
> int ocfs2_inode_lock_atime(struct inode *inode,
> struct vfsmount *vfsmnt,
> - int *level);
> + int *level, int wait);
> int ocfs2_inode_lock_full_nested(struct inode *inode,
> struct buffer_head **ret_bh,
> int ex,
> diff -puN fs/ocfs2/file.c~ocfs2-nowait-aio-support fs/ocfs2/file.c
> --- a/fs/ocfs2/file.c~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/file.c
> @@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode
> spin_unlock(&oi->ip_lock);
> }
>
> + file->f_mode |= FMODE_NOWAIT;
> +
> leave:
> return status;
> }
> @@ -2132,12 +2134,12 @@ out:
> }
>
> static int ocfs2_prepare_inode_for_write(struct file *file,
> - loff_t pos,
> - size_t count)
> + loff_t pos, size_t count, int wait)
> {
> - int ret = 0, meta_level = 0;
> + int ret = 0, meta_level = 0, overwrite_io = 0;
> struct dentry *dentry = file->f_path.dentry;
> struct inode *inode = d_inode(dentry);
> + struct buffer_head *di_bh = NULL;
> loff_t end;
>
> /*
> @@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write
> * if we need to make modifications here.
> */
> for(;;) {
> - ret = ocfs2_inode_lock(inode, NULL, meta_level);
> + if (wait)
> + ret = ocfs2_inode_lock(inode, NULL, meta_level);
> + else
> + ret = ocfs2_try_inode_lock(inode,
> + overwrite_io ? NULL : &di_bh, meta_level);
> if (ret < 0) {
> meta_level = -1;
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> + /*
> + * Check if IO will overwrite allocated blocks in case
> + * IOCB_NOWAIT flag is set.
> + */
> + if (!wait && !overwrite_io) {
> + overwrite_io = 1;
> + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
> + ret = -EAGAIN;
> + goto out_unlock;
> + }
> +
Can we lock the 'ip_alloc_sem' in ocfs2_overwrite_io()?
BTW, should we consider the ocfs2_inode_lock() in ocfs2_prepare_inode_for_refcount()?
> + ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
> + brelse(di_bh);
> + di_bh = NULL;
> + up_read(&OCFS2_I(inode)->ip_alloc_sem);
> + if (ret < 0) {
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> + goto out_unlock;
> + }
> + }
> +
> /* Clear suid / sgid if necessary. We do this here
> * instead of later in the write path because
> * remove_suid() calls ->setattr without any hint that
> @@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write
>
> out_unlock:
> trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
> - pos, count);
> + pos, count, wait);
> +
> + brelse(di_bh);
>
> if (meta_level >= 0)
> ocfs2_inode_unlock(inode, meta_level);
> @@ -2211,7 +2242,7 @@ out:
> static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
> struct iov_iter *from)
> {
> - int direct_io, rw_level;
> + int rw_level;
> ssize_t written = 0;
> ssize_t ret;
> size_t count = iov_iter_count(from);
> @@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(str
> void *saved_ki_complete = NULL;
> int append_write = ((iocb->ki_pos + count) >=
> i_size_read(inode) ? 1 : 0);
> + int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>
> trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
> (unsigned long long)OCFS2_I(inode)->ip_blkno,
> @@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(str
> file->f_path.dentry->d_name.name,
> (unsigned int)from->nr_segs); /* GRRRRR */
>
> + if (!direct_io && nowait)
> + return -EOPNOTSUPP;
> +
> if (count == 0)
> return 0;
>
> - direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
> -
> - inode_lock(inode);
> + if (direct_io && nowait) {
> + if (!inode_trylock(inode))
> + return -EAGAIN;
> + } else
> + inode_lock(inode);
>
> /*
> * Concurrent O_DIRECT writes are allowed with
> @@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(str
> */
> rw_level = (!direct_io || full_coherency || append_write);
>
> - ret = ocfs2_rw_lock(inode, rw_level);
> + if (direct_io && nowait)
> + ret = ocfs2_try_rw_lock(inode, rw_level);
> + else
> + ret = ocfs2_rw_lock(inode, rw_level);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out_mutex;
> }
>
> @@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(str
> * other nodes to drop their caches. Buffered I/O
> * already does this in write_begin().
> */
> - ret = ocfs2_inode_lock(inode, NULL, 1);
> + if (nowait)
> + ret = ocfs2_try_inode_lock(inode, NULL, 1);
> + else
> + ret = ocfs2_inode_lock(inode, NULL, 1);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> @@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(str
> }
> count = ret;
>
> - ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
> + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto out;
> }
>
> @@ -2355,6 +2402,7 @@ static ssize_t ocfs2_file_read_iter(stru
> int ret = 0, rw_level = -1, lock_level = 0;
> struct file *filp = iocb->ki_filp;
> struct inode *inode = file_inode(filp);
> + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
>
> trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
> (unsigned long long)OCFS2_I(inode)->ip_blkno,
> @@ -2374,9 +2422,14 @@ static ssize_t ocfs2_file_read_iter(stru
> * need locks to protect pending reads from racing with truncate.
> */
> if (iocb->ki_flags & IOCB_DIRECT) {
> - ret = ocfs2_rw_lock(inode, 0);
> + if (nowait)
> + ret = ocfs2_try_rw_lock(inode, 0);
> + else
> + ret = ocfs2_rw_lock(inode, 0);
> +
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto bail;
> }
> rw_level = 0;
> @@ -2393,9 +2446,11 @@ static ssize_t ocfs2_file_read_iter(stru
> * like i_size. This allows the checks down below
> * generic_file_aio_read() a chance of actually working.
> */
> - ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
> + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
> + !nowait);
> if (ret < 0) {
> - mlog_errno(ret);
> + if (ret != -EAGAIN)
> + mlog_errno(ret);
> goto bail;
> }
> ocfs2_inode_unlock(inode, lock_level);
> diff -puN fs/ocfs2/mmap.c~ocfs2-nowait-aio-support fs/ocfs2/mmap.c
> --- a/fs/ocfs2/mmap.c~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/mmap.c
> @@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct
> int ret = 0, lock_level = 0;
>
> ret = ocfs2_inode_lock_atime(file_inode(file),
> - file->f_path.mnt, &lock_level);
> + file->f_path.mnt, &lock_level, 1);
> if (ret < 0) {
> mlog_errno(ret);
> goto out;
> diff -puN fs/ocfs2/ocfs2_trace.h~ocfs2-nowait-aio-support fs/ocfs2/ocfs2_trace.h
> --- a/fs/ocfs2/ocfs2_trace.h~ocfs2-nowait-aio-support
> +++ a/fs/ocfs2/ocfs2_trace.h
> @@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_rem
>
> TRACE_EVENT(ocfs2_prepare_inode_for_write,
> TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
> - unsigned long count),
> - TP_ARGS(ino, saved_pos, count),
> + unsigned long count, int wait),
> + TP_ARGS(ino, saved_pos, count, wait),
> TP_STRUCT__entry(
> __field(unsigned long long, ino)
> __field(unsigned long long, saved_pos)
> __field(unsigned long, count)
> + __field(int, wait)
> ),
> TP_fast_assign(
> __entry->ino = ino;
> __entry->saved_pos = saved_pos;
> __entry->count = count;
> + __entry->wait = wait;
> ),
> - TP_printk("%llu %llu %lu", __entry->ino,
> - __entry->saved_pos, __entry->count)
> + TP_printk("%llu %llu %lu %d", __entry->ino,
> + __entry->saved_pos, __entry->count, __entry->wait)
> );
>
> DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
> _
>
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>
> .
>
next prev parent reply other threads:[~2017-12-16 8:37 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-30 22:24 [Ocfs2-devel] [patch 11/11] ocfs2: nowait aio support akpm at linux-foundation.org
2017-12-16 8:37 ` alex chen [this message]
2017-12-18 5:30 ` Gang He
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5A34DB5D.7020800@huawei.com \
--to=alex.chen@huawei.com \
--cc=ocfs2-devel@oss.oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.