All of lore.kernel.org
 help / color / mirror / Atom feed
From: majianpeng <majianpeng@gmail.com>
To: sage <sage@inktank.com>
Cc: "Yan, Zheng" <zheng.z.yan@intel.com>,
	ceph-devel <ceph-devel@vger.kernel.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 1/2] ceph: Implement readv/preadv for sync operation.
Date: Tue, 3 Sep 2013 16:52:10 +0800	[thread overview]
Message-ID: <201309031652087109940@gmail.com> (raw)

For readv/preadv sync-operatoin, ceph only do the first iov.
It don't think other iovs.Now implement this.

Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
---
 fs/ceph/file.c | 175 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 123 insertions(+), 52 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3de8982..7d6a3ee 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -408,51 +408,95 @@ more:
  *
  * If the read spans object boundary, just do multiple reads.
  */
-static ssize_t ceph_sync_read(struct file *file, char __user *data,
-			      unsigned len, loff_t *poff, int *checkeof)
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iovec *iov,
+			      unsigned long nr_segs, int *checkeof)
 {
+	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct page **pages;
-	u64 off = *poff;
-	int num_pages, ret;
+	u64 off = iocb->ki_pos;
+	int num_pages, ret, i;
 
-	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
+	dout("sync_read on file %p %llu~%u %s\n", file, off,
+	     (unsigned)iocb->ki_left,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
-
-	if (file->f_flags & O_DIRECT) {
-		num_pages = calc_pages_for((unsigned long)data, len);
-		pages = ceph_get_direct_page_vector(data, num_pages, true);
-	} else {
-		num_pages = calc_pages_for(off, len);
-		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-	}
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
 	/*
 	 * flush any page cache pages in this range.  this
 	 * will make concurrent normal and sync io slow,
 	 * but it will at least behave sensibly when they are
 	 * in sequence.
 	 */
-	ret = filemap_write_and_wait(inode->i_mapping);
+	ret = filemap_write_and_wait_range(inode->i_mapping, off,
+						off + iocb->ki_left);
 	if (ret < 0)
-		goto done;
-
-	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
-			   file->f_flags & O_DIRECT,
-			   (unsigned long)data & ~PAGE_MASK);
+		return ret;
 
-	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
-	if (ret >= 0)
-		*poff = off + ret;
+	if (file->f_flags & O_DIRECT) {
+		for (i = 0; i < nr_segs; i++) {
+			void __user *data = iov[i].iov_base;
+			size_t len = iov[i].iov_len;
+
+			num_pages = calc_pages_for((unsigned long)data, len);
+			pages = ceph_get_direct_page_vector(data,
+							    num_pages, true);
+			if (IS_ERR(pages))
+				return PTR_ERR(pages);
+
+			ret = striped_read(inode, off, len,
+					   pages, num_pages, checkeof,
+					   1, (unsigned long)data & ~PAGE_MASK);
+			ceph_put_page_vector(pages, num_pages, true);
+
+			if (ret <= 0)
+				break;
+			off += ret;
+			if (ret < len)
+				break;
+		}
+		if (off > iocb->ki_pos) {
+			ret = off - iocb->ki_pos;
+			iocb->ki_pos = off;
+			iocb->ki_left -= ret;
+		}
+	} else {
+		size_t len = iocb->ki_left;
 
-done:
-	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages, true);
-	else
+		num_pages = calc_pages_for(off, len);
+		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+		ret = striped_read(inode, off, len, pages,
+					num_pages, checkeof, 0, 0);
+		len = ret;
+		if (len) {
+			int i, l, k = 0;
+			size_t left = len;
+
+			for (i = 0; i < nr_segs && left; i++) {
+				void __user *data = iov[i].iov_base;
+				l = min(left, iov[i].iov_len);
+				ret = ceph_copy_page_vector_to_user(&pages[k],
+								    data, off,
+								    l);
+				if (ret > 0) {
+					left -= ret;
+					off += ret;
+					k = calc_pages_for(iocb->ki_pos,
+							   len - left + 1) - 1;
+					BUG_ON(k >= num_pages && left);
+				} else
+					break;
+			}
+
+			if (left == 0) {
+				iocb->ki_pos += len;
+				iocb->ki_left -= len;
+				ret = len;
+			}
+		}
 		ceph_release_page_vector(pages, num_pages);
+	}
+
 	dout("sync_read result %d\n", ret);
 	return ret;
 }
@@ -647,55 +691,82 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
-	loff_t *ppos = &iocb->ki_pos;
-	size_t len = iov->iov_len;
+	size_t len = iocb->ki_left;
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	void __user *base = iov->iov_base;
 	ssize_t ret;
 	int want, got = 0;
 	int checkeof = 0, read = 0;
 
+
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
-again:
+
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
 		want = CEPH_CAP_FILE_CACHE;
 	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
 	if (ret < 0)
-		goto out;
+		return ret;
+
 	dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len,
 	     ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (fi->flags & CEPH_F_SYNC))
+	    (fi->flags & CEPH_F_SYNC)) {
+		unsigned long curr_seg = 0;
+		struct iovec *iov_clone;
+
+		iov_clone = kmalloc(nr_segs * sizeof(struct iovec), GFP_KERNEL);
+		if (iov_clone == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(iov_clone, iov, nr_segs * sizeof(struct iovec));
+again:
 		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
-	else
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		ret = ceph_sync_read(iocb, &iov_clone[curr_seg],
+					nr_segs - curr_seg, &checkeof);
+
+		if (checkeof && ret >= 0) {
+			int statret = ceph_do_getattr(inode,
+						      CEPH_STAT_CAP_SIZE);
+
+			/* hit EOF or hole? */
+			if (statret == 0 && iocb->ki_pos < inode->i_size &&
+				iocb->ki_left) {
+				size_t tmp = 0;
+				dout("%s sync_read hit hole, ppos %lld < size %lld"
+				     ", reading more\n", __func__, iocb->ki_pos,
+				     inode->i_size);
+
+				read += ret;
+				for (; curr_seg < nr_segs; curr_seg++) {
+					if ((tmp + iov_clone[curr_seg].iov_len)
+						> ret)
+						break;
+					tmp += iov_clone[curr_seg].iov_len;
+				}
+
+				BUG_ON(curr_seg == nr_segs);
+				iov_clone[curr_seg].iov_base += ret - tmp;
+				iov_clone[curr_seg].iov_len -= ret - tmp;
+				checkeof = 0;
+				goto again;
+			}
+		}
+		kfree(iov_clone);
 
+	} else
+		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 out:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
 
-	if (checkeof && ret >= 0) {
-		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
-
-		/* hit EOF or hole? */
-		if (statret == 0 && *ppos < inode->i_size) {
-			dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
-			read += ret;
-			base += ret;
-			len -= ret;
-			checkeof = 0;
-			goto again;
-		}
-	}
 	if (ret >= 0)
 		ret += read;
 
-- 
1.8.1.2

             reply	other threads:[~2013-09-03  8:52 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-03  8:52 majianpeng [this message]
2013-09-04 12:57 ` [PATCH 1/2] ceph: Implement readv/preadv for sync operation Yan, Zheng
2013-09-05  0:28   ` majianpeng
2013-09-05  2:51     ` Yan, Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201309031652087109940@gmail.com \
    --to=majianpeng@gmail.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sage@inktank.com \
    --cc=zheng.z.yan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.