From: Peng Tao <tao.peng@primarydata.com>
To: linux-nfs@vger.kernel.org
Cc: Trond Myklebust <trond.myklebust@primarydata.com>,
Peng Tao <tao.peng@primarydata.com>
Subject: [PATCH] nfs: fix dio deadlock when O_DIRECT flag is flipped
Date: Wed, 14 Jan 2015 22:09:03 +0800 [thread overview]
Message-ID: <1421244543-32539-1-git-send-email-tao.peng@primarydata.com> (raw)
Running xfstest generic/036, we'll get following VM_BUG_ON in
nfs_direct_IO(). 036 toggles O_DIRECT flag while IO is going on.
So the VM_BUG_ON should not exist there. However, we have a deadlock
in the code path as well, because inode->i_mutex is taken when calling
into ->direct_IO. And nfs_file_direct_write() would grab inode->i_mutex
again.
Meanwhile, nfs_file_direct_write() does a lot of things that is already
done by vfs before ->direct_IO is called. So skip those duplicates. One
exception is i_size_write. vfs does not take i_lock when setting i_size.
But nfs appears to need i_lock when setting i_size.
Signed-off-by: Peng Tao <tao.peng@primarydata.com>
---
fs/nfs/direct.c | 92 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 65 insertions(+), 27 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bfd9d49..9f88b13 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -105,6 +105,8 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
static void nfs_direct_write_schedule_work(struct work_struct *work);
+static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos);
static inline void get_dreq(struct nfs_direct_req *dreq)
{
@@ -257,11 +259,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
return -EINVAL;
#else
- VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
-
if (rw == READ)
return nfs_file_direct_read(iocb, iter, pos);
- return nfs_file_direct_write(iocb, iter, pos);
+ return nfs_direct_write(iocb, iter, pos);
#endif /* CONFIG_NFS_SWAP */
}
@@ -919,6 +919,66 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
return 0;
}
+static ssize_t _nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct inode *inode, struct nfs_direct_req **dreqp,
+ size_t count, loff_t pos)
+{
+ ssize_t result;
+ struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
+
+ task_io_account_write(count);
+
+ result = -ENOMEM;
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ goto out;
+
+ dreq->inode = inode;
+ dreq->bytes_left = count;
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
+ nfs_direct_req_release(dreq);
+ goto out;
+ }
+ dreq->l_ctx = l_ctx;
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+ *dreqp = dreq;
+
+out:
+ return result;
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct nfs_direct_req *uninitialized_var(dreq);
+ size_t count = iov_iter_count(iter);
+ ssize_t result;
+
+ result = _nfs_direct_write(iocb, iter, inode, &dreq, count, pos);
+ if (!result) {
+ result = nfs_direct_wait(dreq);
+ if (result > 0) {
+ iocb->ki_pos = pos + result;
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
+ }
+ nfs_direct_req_release(dreq);
+ return result;
+}
+
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
@@ -947,8 +1007,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- struct nfs_direct_req *dreq;
- struct nfs_lock_context *l_ctx;
+ struct nfs_direct_req *uninitialized_var(dreq);
loff_t end;
size_t count = iov_iter_count(iter);
end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -982,26 +1041,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
goto out_unlock;
}
- task_io_account_write(count);
-
- result = -ENOMEM;
- dreq = nfs_direct_req_alloc();
- if (!dreq)
- goto out_unlock;
-
- dreq->inode = inode;
- dreq->bytes_left = count;
- dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- l_ctx = nfs_get_lock_context(dreq->ctx);
- if (IS_ERR(l_ctx)) {
- result = PTR_ERR(l_ctx);
- goto out_release;
- }
- dreq->l_ctx = l_ctx;
- if (!is_sync_kiocb(iocb))
- dreq->iocb = iocb;
-
- result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+ result = _nfs_direct_write(iocb, iter, inode, &dreq, count, pos);
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
@@ -1025,8 +1065,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
nfs_direct_req_release(dreq);
return result;
-out_release:
- nfs_direct_req_release(dreq);
out_unlock:
mutex_unlock(&inode->i_mutex);
out:
--
1.9.1
next reply other threads:[~2015-01-14 14:09 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-14 14:09 Peng Tao [this message]
2015-01-14 15:48 ` [PATCH] nfs: fix dio deadlock when O_DIRECT flag is flipped Christoph Hellwig
[not found] ` <CAHQdGtQAqn9zh8UC-dn2s3nLmnRyyxeSNdk=+FZdYmj46ZEcXA@mail.gmail.com>
2015-01-14 16:21 ` Trond Myklebust
2015-01-14 16:55 ` Peng Tao
2015-01-15 15:17 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1421244543-32539-1-git-send-email-tao.peng@primarydata.com \
--to=tao.peng@primarydata.com \
--cc=linux-nfs@vger.kernel.org \
--cc=trond.myklebust@primarydata.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox