linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V8 23/33] nfs: add support for read_iter, write_iter
       [not found] <1374774659-13121-1-git-send-email-dave.kleikamp@oracle.com>
@ 2013-07-25 17:50 ` Dave Kleikamp
  2013-07-25 17:50 ` [PATCH V8 24/33] nfs: simplify swap Dave Kleikamp
  1 sibling, 0 replies; 2+ messages in thread
From: Dave Kleikamp @ 2013-07-25 17:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Andrew Morton, Maxim V. Patlasov, Zach Brown,
	Dave Kleikamp, Trond Myklebust, linux-nfs

This patch implements the read_iter and write_iter file operations which
allow kernel code to initiate directIO. This allows the loop device to
read and write directly to the server, bypassing the page cache.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Zach Brown <zab@zabbo.net>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org
---
 fs/nfs/direct.c        | 247 +++++++++++++++++++++++++++++++++++++------------
 fs/nfs/file.c          |  33 ++++---
 fs/nfs/internal.h      |   4 +-
 fs/nfs/nfs4file.c      |   4 +-
 include/linux/nfs_fs.h |   6 +-
 5 files changed, 210 insertions(+), 84 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bceb47e..2b0ebcb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -90,6 +90,7 @@ struct nfs_direct_req {
 	int			flags;
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+#define NFS_ODIRECT_MARK_DIRTY		(4)	/* mark read pages dirty */
 	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
@@ -131,15 +132,13 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 
 	return -EINVAL;
 #else
-	const struct iovec *iov = iov_iter_iovec(iter);
-
 	VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iov, iter->nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iov, iter->nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
@@ -269,7 +268,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
 
-		if (!PageCompound(page) && bytes < hdr->good_bytes)
+		if ((dreq->flags & NFS_ODIRECT_MARK_DIRTY) &&
+		    !PageCompound(page) && bytes < hdr->good_bytes)
 			set_page_dirty(page);
 		bytes += req->wb_bytes;
 		nfs_list_remove_request(req);
@@ -401,24 +401,17 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 	return result < 0 ? (ssize_t) result : -EFAULT;
 }
 
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      const struct iovec *iov,
-					      unsigned long nr_segs,
-					      loff_t pos, bool uio)
+static ssize_t nfs_direct_do_schedule_read_iovec(
+		struct nfs_pageio_descriptor *desc, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos, bool uio)
 {
-	struct nfs_pageio_descriptor desc;
 	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
-			     &nfs_direct_read_completion_ops);
-	get_dreq(dreq);
-	desc.pg_dreq = dreq;
-
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+		result = nfs_direct_read_schedule_segment(desc, vec, pos, uio);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -426,6 +419,78 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			break;
 		pos += vec->iov_len;
 	}
+	if (requested_bytes)
+		return requested_bytes;
+
+	return result < 0 ? result : -EIO;
+}
+
+#ifdef CONFIG_BLOCK
+static ssize_t nfs_direct_do_schedule_read_bvec(
+		struct nfs_pageio_descriptor *desc,
+		struct bio_vec *bvec, unsigned long nr_segs, loff_t pos)
+{
+	struct nfs_direct_req *dreq = desc->pg_dreq;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
+	ssize_t result = -EINVAL;
+	size_t requested_bytes = 0;
+	unsigned long seg;
+	struct nfs_page *req;
+	unsigned int req_len;
+
+	for (seg = 0; seg < nr_segs; seg++) {
+		result = -EIO;
+		req_len = bvec[seg].bv_len;
+		req = nfs_create_request(ctx, inode,
+					 bvec[seg].bv_page,
+					 bvec[seg].bv_offset, req_len);
+		if (IS_ERR(req)) {
+			result = PTR_ERR(req);
+			break;
+		}
+		req->wb_index = pos >> PAGE_SHIFT;
+		req->wb_offset = pos & ~PAGE_MASK;
+		if (!nfs_pageio_add_request(desc, req)) {
+			result = desc->pg_error;
+			nfs_release_request(req);
+			break;
+		}
+		requested_bytes += req_len;
+		pos += req_len;
+	}
+
+	if (requested_bytes)
+		return requested_bytes;
+
+	return result < 0 ? result : -EIO;
+}
+#endif /* CONFIG_BLOCK */
+
+static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+					struct iov_iter *iter, loff_t pos,
+					bool uio)
+{
+	struct nfs_pageio_descriptor desc;
+	ssize_t result;
+
+	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+			     &nfs_direct_read_completion_ops);
+	get_dreq(dreq);
+	desc.pg_dreq = dreq;
+
+	if (iov_iter_has_iovec(iter)) {
+		if (uio)
+			dreq->flags = NFS_ODIRECT_MARK_DIRTY;
+		result = nfs_direct_do_schedule_read_iovec(&desc,
+				iov_iter_iovec(iter), iter->nr_segs, pos, uio);
+#ifdef CONFIG_BLOCK
+	} else if (iov_iter_has_bvec(iter)) {
+		result = nfs_direct_do_schedule_read_bvec(&desc,
+				iov_iter_bvec(iter), iter->nr_segs, pos);
+#endif
+	} else
+		BUG();
 
 	nfs_pageio_complete(&desc);
 
@@ -433,9 +498,9 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	 * If no bytes were started, return the error, and let the
 	 * generic layer handle the completion.
 	 */
-	if (requested_bytes == 0) {
+	if (result < 0) {
 		nfs_direct_req_release(dreq);
-		return result < 0 ? result : -EIO;
+		return result;
 	}
 
 	if (put_dreq(dreq))
@@ -443,8 +508,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	return 0;
 }
 
-static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos, bool uio)
+static ssize_t nfs_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+			       loff_t pos, bool uio)
 {
 	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -456,7 +521,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	dreq->inode = inode;
-	dreq->bytes_left = iov_length(iov, nr_segs);
+	dreq->bytes_left = iov_iter_count(iter);
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
 	if (IS_ERR(l_ctx)) {
@@ -467,8 +532,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	NFS_I(inode)->read_io += iov_length(iov, nr_segs);
-	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	NFS_I(inode)->read_io += iov_iter_count(iter);
+	result = nfs_direct_read_schedule(dreq, iter, pos, uio);
 	if (!result)
 		result = nfs_direct_wait(dreq);
 out_release:
@@ -802,27 +867,18 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 	.completion = nfs_direct_write_completion,
 };
 
-static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
-					       const struct iovec *iov,
-					       unsigned long nr_segs,
-					       loff_t pos, bool uio)
+static ssize_t nfs_direct_do_schedule_write_iovec(
+		struct nfs_pageio_descriptor *desc, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos, bool uio)
 {
-	struct nfs_pageio_descriptor desc;
-	struct inode *inode = dreq->inode;
-	ssize_t result = 0;
+	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
-			      &nfs_direct_write_completion_ops);
-	desc.pg_dreq = dreq;
-	get_dreq(dreq);
-	atomic_inc(&inode->i_dio_count);
-
-	NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+		result = nfs_direct_write_schedule_segment(desc, vec,
+							   pos, uio);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -830,16 +886,92 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			break;
 		pos += vec->iov_len;
 	}
+
+	if (requested_bytes)
+		return requested_bytes;
+
+	return result < 0 ? result : -EIO;
+}
+
+#ifdef CONFIG_BLOCK
+static ssize_t nfs_direct_do_schedule_write_bvec(
+		struct nfs_pageio_descriptor *desc,
+		struct bio_vec *bvec, unsigned long nr_segs, loff_t pos)
+{
+	struct nfs_direct_req *dreq = desc->pg_dreq;
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = dreq->inode;
+	ssize_t result = 0;
+	size_t requested_bytes = 0;
+	unsigned long seg;
+	struct nfs_page *req;
+	unsigned int req_len;
+
+	for (seg = 0; seg < nr_segs; seg++) {
+		req_len = bvec[seg].bv_len;
+
+		req = nfs_create_request(ctx, inode, bvec[seg].bv_page,
+					 bvec[seg].bv_offset, req_len);
+		if (IS_ERR(req)) {
+			result = PTR_ERR(req);
+			break;
+		}
+		nfs_lock_request(req);
+		req->wb_index = pos >> PAGE_SHIFT;
+		req->wb_offset = pos & ~PAGE_MASK;
+		if (!nfs_pageio_add_request(desc, req)) {
+			result = desc->pg_error;
+			nfs_unlock_and_release_request(req);
+			break;
+		}
+		requested_bytes += req_len;
+		pos += req_len;
+	}
+
+	if (requested_bytes)
+		return requested_bytes;
+
+	return result < 0 ? result : -EIO;
+}
+#endif /* CONFIG_BLOCK */
+
+static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq,
+					 struct iov_iter *iter, loff_t pos,
+					 bool uio)
+{
+	struct nfs_pageio_descriptor desc;
+	struct inode *inode = dreq->inode;
+	ssize_t result = 0;
+
+	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+			      &nfs_direct_write_completion_ops);
+	desc.pg_dreq = dreq;
+	get_dreq(dreq);
+	atomic_inc(&inode->i_dio_count);
+
+	NFS_I(dreq->inode)->write_io += iov_iter_count(iter);
+
+	if (iov_iter_has_iovec(iter)) {
+		result = nfs_direct_do_schedule_write_iovec(&desc,
+				iov_iter_iovec(iter), iter->nr_segs, pos, uio);
+#ifdef CONFIG_BLOCK
+	} else if (iov_iter_has_bvec(iter)) {
+		result = nfs_direct_do_schedule_write_bvec(&desc,
+				iov_iter_bvec(iter), iter->nr_segs, pos);
+#endif
+	} else
+		BUG();
+
 	nfs_pageio_complete(&desc);
 
 	/*
 	 * If no bytes were started, return the error, and let the
 	 * generic layer handle the completion.
 	 */
-	if (requested_bytes == 0) {
+	if (result < 0) {
 		inode_dio_done(inode);
 		nfs_direct_req_release(dreq);
-		return result < 0 ? result : -EIO;
+		return result;
 	}
 
 	if (put_dreq(dreq))
@@ -847,9 +979,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	return 0;
 }
 
-static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos,
-				size_t count, bool uio)
+static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -861,7 +992,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	dreq->inode = inode;
-	dreq->bytes_left = count;
+	dreq->bytes_left = iov_iter_count(iter);
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
 	if (IS_ERR(l_ctx)) {
@@ -872,7 +1003,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	result = nfs_direct_write_schedule(dreq, iter, pos, uio);
 	if (!result)
 		result = nfs_direct_wait(dreq);
 out_release:
@@ -884,12 +1015,11 @@ out:
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
+ * @iter: vector of buffers into which to read data
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
- * generic_file_aio_read() in order to avoid gfar's check to see if
+ * generic_file_read_iter() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
@@ -902,15 +1032,15 @@ out:
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+			     loff_t pos, bool uio)
 {
 	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_iter_count(iter);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
 	dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
@@ -928,7 +1058,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 
 	task_io_account_read(count);
 
-	retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
+	retval = nfs_direct_read(iocb, iter, pos, uio);
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
@@ -939,12 +1069,11 @@ out:
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of buffers from which to write data
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
- * generic_file_aio_write() in order to avoid taking the inode
+ * generic_file_write_iter() in order to avoid taking the inode
  * semaphore and updating the i_size.  The NFS server will set
  * the new i_size and this client must read the updated size
  * back into its cache.  We let the server do generic write
@@ -958,15 +1087,15 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+			      loff_t pos, bool uio)
 {
 	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_iter_count(iter);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
 
 	dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
@@ -991,7 +1120,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	task_io_account_write(count);
 
-	retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
+	retval = nfs_direct_write(iocb, iter, pos, uio);
 	if (retval > 0) {
 		struct inode *inode = mapping->host;
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94e94bd..bbff2f9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -172,29 +172,28 @@ nfs_file_flush(struct file *file, fl_owner_t id)
 EXPORT_SYMBOL_GPL(nfs_file_flush);
 
 ssize_t
-nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+nfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
 	struct inode * inode = dentry->d_inode;
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_read(iocb, iter, pos, true);
 
-	dprintk("NFS: read(%s/%s, %lu@%lu)\n",
+	dprintk("NFS: read_iter(%s/%s, %lu@%lu)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+		(unsigned long) iov_iter_count(iter), (unsigned long) pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
-		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		result = generic_file_read_iter(iocb, iter, pos);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
 	return result;
 }
-EXPORT_SYMBOL_GPL(nfs_file_read);
+EXPORT_SYMBOL_GPL(nfs_file_read_iter);
 
 ssize_t
 nfs_file_splice_read(struct file *filp, loff_t *ppos,
@@ -250,7 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
  * disk, but it retrieves and clears ctx->error after synching, despite
  * the two being set at the same time in nfs_context_set_write_error().
  * This is because the former is used to notify the _next_ call to
- * nfs_file_write() that a write error occurred, and hence cause it to
+ * nfs_file_write_iter() that a write error occurred, and hence cause it to
  * fall back to doing a synchronous write.
  */
 int
@@ -642,19 +641,19 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
 	return 0;
 }
 
-ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter,
+				   loff_t pos)
 {
 	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
 	struct inode * inode = dentry->d_inode;
 	unsigned long written = 0;
 	ssize_t result;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_iter_count(iter);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_write(iocb, iter, pos, true);
 
-	dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
+	dprintk("NFS: write_iter(%s/%s, %lu@%lld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
@@ -674,7 +673,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!count)
 		goto out;
 
-	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	result = generic_file_write_iter(iocb, iter, pos);
 	if (result > 0)
 		written = result;
 
@@ -693,7 +692,7 @@ out_swapfile:
 	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
 	goto out;
 }
-EXPORT_SYMBOL_GPL(nfs_file_write);
+EXPORT_SYMBOL_GPL(nfs_file_write_iter);
 
 ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
 			      struct file *filp, loff_t *ppos,
@@ -953,8 +952,8 @@ const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.aio_read	= nfs_file_read,
-	.aio_write	= nfs_file_write,
+	.read_iter	= nfs_file_read_iter,
+	.write_iter	= nfs_file_write_iter,
 	.mmap		= nfs_file_mmap,
 	.open		= nfs_file_open,
 	.flush		= nfs_file_flush,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3c8373f..d689ca9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -286,11 +286,11 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
 int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
 loff_t nfs_file_llseek(struct file *, loff_t, int);
 int nfs_file_flush(struct file *, fl_owner_t);
-ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_read_iter(struct kiocb *, struct iov_iter *, loff_t);
 ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
 			     size_t, unsigned int);
 int nfs_file_mmap(struct file *, struct vm_area_struct *);
-ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_write_iter(struct kiocb *, struct iov_iter *, loff_t);
 int nfs_file_release(struct inode *, struct file *);
 int nfs_lock(struct file *, int, struct file_lock *);
 int nfs_flock(struct file *, int, struct file_lock *);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e5b804d..e13bb02 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -121,8 +121,8 @@ const struct file_operations nfs4_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-	.aio_read	= nfs_file_read,
-	.aio_write	= nfs_file_write,
+	.read_iter	= nfs_file_read_iter,
+	.write_iter	= nfs_file_write_iter,
 	.mmap		= nfs_file_mmap,
 	.open		= nfs4_file_open,
 	.flush		= nfs_file_flush,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a4b19d2..b2324be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -458,11 +458,9 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
  * linux/fs/nfs/direct.c
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 			loff_t pos, bool uio);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 			loff_t pos, bool uio);
 
 /*
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH V8 24/33] nfs: simplify swap
       [not found] <1374774659-13121-1-git-send-email-dave.kleikamp@oracle.com>
  2013-07-25 17:50 ` [PATCH V8 23/33] nfs: add support for read_iter, write_iter Dave Kleikamp
@ 2013-07-25 17:50 ` Dave Kleikamp
  1 sibling, 0 replies; 2+ messages in thread
From: Dave Kleikamp @ 2013-07-25 17:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Andrew Morton, Maxim V. Patlasov, Zach Brown,
	Dave Kleikamp, Mel Gorman, Trond Myklebust, linux-nfs

swap_writepage can now call nfs's write_iter f_op, eliminating the need to
implement for the special-case direct_IO a_op. There is no longer a need to
pass the uio flag through the direct write path.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org
---
 fs/nfs/direct.c           | 94 ++++++++++++++++-------------------------------
 fs/nfs/file.c             |  4 +-
 include/linux/blk_types.h |  2 -
 include/linux/fs.h        |  2 -
 include/linux/nfs_fs.h    |  4 +-
 mm/page_io.c              | 13 +++----
 6 files changed, 42 insertions(+), 77 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2b0ebcb..239c2fe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -118,29 +118,18 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
  * @nr_segs: size of iovec array
  *
  * The presence of this routine in the address space ops vector means
- * the NFS client supports direct I/O. However, for most direct IO, we
- * shunt off direct read and write requests before the VFS gets them,
- * so this method is only ever called for swap.
+ * the NFS client supports direct I/O. However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
  */
 ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		      loff_t pos)
 {
-#ifndef CONFIG_NFS_SWAP
 	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
 			iocb->ki_filp->f_path.dentry->d_name.name,
 			(long long) pos, iter->nr_segs);
 
 	return -EINVAL;
-#else
-	VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
-	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
-
-	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iter, pos,
-				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iter, pos,
-				rw == WRITE ? true : false);
-#endif /* CONFIG_NFS_SWAP */
 }
 
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -312,7 +301,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
  */
 static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
 						const struct iovec *iov,
-						loff_t pos, bool uio)
+						loff_t pos)
 {
 	struct nfs_direct_req *dreq = desc->pg_dreq;
 	struct nfs_open_context *ctx = dreq->ctx;
@@ -340,20 +329,12 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 					  GFP_KERNEL);
 		if (!pagevec)
 			break;
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
 					npages, 1, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 1, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
-
+		up_read(&current->mm->mmap_sem);
+		if (result < 0)
+			break;
 		if ((unsigned)result < npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
@@ -403,7 +384,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 
 static ssize_t nfs_direct_do_schedule_read_iovec(
 		struct nfs_pageio_descriptor *desc, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos, bool uio)
+		unsigned long nr_segs, loff_t pos)
 {
 	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
@@ -411,7 +392,7 @@ static ssize_t nfs_direct_do_schedule_read_iovec(
 
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_read_schedule_segment(desc, vec, pos, uio);
+		result = nfs_direct_read_schedule_segment(desc, vec, pos);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -468,8 +449,7 @@ static ssize_t nfs_direct_do_schedule_read_bvec(
 #endif /* CONFIG_BLOCK */
 
 static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-					struct iov_iter *iter, loff_t pos,
-					bool uio)
+					struct iov_iter *iter, loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
 	ssize_t result;
@@ -480,10 +460,8 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 	desc.pg_dreq = dreq;
 
 	if (iov_iter_has_iovec(iter)) {
-		if (uio)
-			dreq->flags = NFS_ODIRECT_MARK_DIRTY;
 		result = nfs_direct_do_schedule_read_iovec(&desc,
-				iov_iter_iovec(iter), iter->nr_segs, pos, uio);
+				iov_iter_iovec(iter), iter->nr_segs, pos);
 #ifdef CONFIG_BLOCK
 	} else if (iov_iter_has_bvec(iter)) {
 		result = nfs_direct_do_schedule_read_bvec(&desc,
@@ -509,7 +487,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 }
 
 static ssize_t nfs_direct_read(struct kiocb *iocb, struct iov_iter *iter,
-			       loff_t pos, bool uio)
+			       loff_t pos)
 {
 	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -533,7 +511,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 		dreq->iocb = iocb;
 
 	NFS_I(inode)->read_io += iov_iter_count(iter);
-	result = nfs_direct_read_schedule(dreq, iter, pos, uio);
+	result = nfs_direct_read_schedule(dreq, iter, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
 out_release:
@@ -698,7 +676,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
  */
 static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
 						 const struct iovec *iov,
-						 loff_t pos, bool uio)
+						 loff_t pos)
 {
 	struct nfs_direct_req *dreq = desc->pg_dreq;
 	struct nfs_open_context *ctx = dreq->ctx;
@@ -726,19 +704,12 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 		if (!pagevec)
 			break;
 
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-						npages, 0, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 0, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
+					npages, 0, 0, pagevec, NULL);
+		up_read(&current->mm->mmap_sem);
+		if (result < 0)
+			break;
 
 		if ((unsigned)result < npages) {
 			bytes = result * PAGE_SIZE;
@@ -869,7 +840,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 
 static ssize_t nfs_direct_do_schedule_write_iovec(
 		struct nfs_pageio_descriptor *desc, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos, bool uio)
+		unsigned long nr_segs, loff_t pos)
 {
 	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
@@ -878,7 +849,7 @@ static ssize_t nfs_direct_do_schedule_write_iovec(
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
 		result = nfs_direct_write_schedule_segment(desc, vec,
-							   pos, uio);
+							   pos);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -936,8 +907,7 @@ static ssize_t nfs_direct_do_schedule_write_bvec(
 #endif /* CONFIG_BLOCK */
 
 static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq,
-					 struct iov_iter *iter, loff_t pos,
-					 bool uio)
+					 struct iov_iter *iter, loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
@@ -953,7 +923,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq,
 
 	if (iov_iter_has_iovec(iter)) {
 		result = nfs_direct_do_schedule_write_iovec(&desc,
-				iov_iter_iovec(iter), iter->nr_segs, pos, uio);
+				iov_iter_iovec(iter), iter->nr_segs, pos);
 #ifdef CONFIG_BLOCK
 	} else if (iov_iter_has_bvec(iter)) {
 		result = nfs_direct_do_schedule_write_bvec(&desc,
@@ -980,7 +950,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq,
 }
 
 static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos, bool uio)
+				loff_t pos)
 {
 	ssize_t result = -ENOMEM;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -1003,7 +973,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule(dreq, iter, pos, uio);
+	result = nfs_direct_write_schedule(dreq, iter, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
 out_release:
@@ -1033,7 +1003,7 @@ out:
  * cache.
  */
 ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
-			     loff_t pos, bool uio)
+			     loff_t pos)
 {
 	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -1058,7 +1028,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 
 	task_io_account_read(count);
 
-	retval = nfs_direct_read(iocb, iter, pos, uio);
+	retval = nfs_direct_read(iocb, iter, pos);
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
@@ -1088,7 +1058,7 @@ out:
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
 ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-			      loff_t pos, bool uio)
+			      loff_t pos)
 {
 	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -1120,7 +1090,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 
 	task_io_account_write(count);
 
-	retval = nfs_direct_write(iocb, iter, pos, uio);
+	retval = nfs_direct_write(iocb, iter, pos);
 	if (retval > 0) {
 		struct inode *inode = mapping->host;
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index bbff2f9..3e210ca 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -179,7 +179,7 @@ nfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, iter, pos, true);
+		return nfs_file_direct_read(iocb, iter, pos);
 
 	dprintk("NFS: read_iter(%s/%s, %lu@%lu)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -651,7 +651,7 @@ ssize_t nfs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, iter, pos, true);
+		return nfs_file_direct_write(iocb, iter, pos);
 
 	dprintk("NFS: write_iter(%s/%s, %lu@%lld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fa1abeb..1bea25f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -176,7 +176,6 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
 	__REQ_NR_BITS,		/* stops here */
 };
@@ -227,7 +226,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
-#define REQ_KERNEL		(1 << __REQ_KERNEL)
 #define REQ_PM			(1 << __REQ_PM)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 26d9d8d4..06f2290 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -181,8 +181,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
-#define KERNEL_READ		(READ|REQ_KERNEL)
-#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b2324be..1f6a332 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -459,9 +459,9 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 
 /*
  * linux/fs/nfs/dir.c
diff --git a/mm/page_io.c b/mm/page_io.c
index 0c1db1a..21023df 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -258,14 +258,14 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	if (sis->flags & SWP_FILE) {
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
-		struct address_space *mapping = swap_file->f_mapping;
-		struct iovec iov = {
-			.iov_base = kmap(page),
-			.iov_len  = PAGE_SIZE,
+		struct bio_vec bvec = {
+			.bv_page = kmap(page),
+			.bv_len = PAGE_SIZE,
+			.bv_offset = 0,
 		};
 		struct iov_iter iter;
 
-		iov_iter_init(&iter, &iov, 1, PAGE_SIZE, 0);
+		iov_iter_init_bvec(&iter, &bvec, 1, PAGE_SIZE, 0);
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
@@ -274,8 +274,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(KERNEL_WRITE, &kiocb, &iter,
-						kiocb.ki_pos);
+		ret = swap_file->f_op->write_iter(&kiocb, &iter, kiocb.ki_pos);
 		kunmap(page);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2013-07-25 17:52 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1374774659-13121-1-git-send-email-dave.kleikamp@oracle.com>
2013-07-25 17:50 ` [PATCH V8 23/33] nfs: add support for read_iter, write_iter Dave Kleikamp
2013-07-25 17:50 ` [PATCH V8 24/33] nfs: simplify swap Dave Kleikamp

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).