From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
To: linux-btrfs@vger.kernel.org, clm@fb.com, jbacik@fb.com
Cc: Chandan Rajendra <chandan@linux.vnet.ibm.com>,
aneesh.kumar@linux.vnet.ibm.com
Subject: [RFC PATCH 1/8] Btrfs: subpagesize-blocksize: Get rid of whole page reads.
Date: Wed, 21 May 2014 15:11:11 +0530 [thread overview]
Message-ID: <1400665278-4091-2-git-send-email-chandan@linux.vnet.ibm.com> (raw)
In-Reply-To: <1400665278-4091-1-git-send-email-chandan@linux.vnet.ibm.com>
Based on original patch from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions
to track the file offset range operated on by the bio. Hence this patch adds
two new members to 'struct btrfs_io_bio' to track the file offset range.
This patch also brings back check_page_locked() to reliably unlock pages in
readpage's end bio function.
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
fs/btrfs/extent_io.c | 120 +++++++++++++++++++++++++++++++++------------------
fs/btrfs/volumes.h | 3 ++
2 files changed, 80 insertions(+), 43 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fbe501d..fd6f011 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1943,15 +1943,29 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
* helper function to set a given page up to date if all the
* extents in the tree for that page are up to date
*/
-static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
+static void check_page_uptodate(struct extent_io_tree *tree, struct page *page,
+ struct extent_state *cached)
{
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
+ if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached))
SetPageUptodate(page);
}
/*
+ * helper function to unlock a page if all the extents in the tree
+ * for that page are unlocked
+ */
+static void check_page_locked(struct extent_io_tree *tree, struct page *page)
+{
+ u64 start = page_offset(page);
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+
+ if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
+ unlock_page(page);
+ }
+}
+
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
@@ -2414,16 +2428,33 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
bio_put(bio);
}
-static void
-endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
- int uptodate)
+static void unlock_extent_and_page(struct address_space *mapping,
+ struct extent_io_tree *tree,
+ struct btrfs_io_bio *io_bio)
{
- struct extent_state *cached = NULL;
- u64 end = start + len - 1;
+ pgoff_t index;
+ u64 offset, len;
+ /*
+ * This btrfs_io_bio may span multiple pages.
+ * We need to unlock the pages convered by them
+ * if we got endio callback for all the blocks in the page.
+ * btrfs_io_bio also contain "contigous blocks of the file"
+ * look at submit_extent_page for more details.
+ */
- if (uptodate && tree->track_uptodate)
- set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
- unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
+ offset = io_bio->start_offset;
+ len = io_bio->len;
+ unlock_extent(tree, offset, offset + len - 1);
+
+ index = offset >> PAGE_CACHE_SHIFT;
+ while (offset < io_bio->start_offset + len) {
+ struct page *page;
+ page = find_get_page(mapping, index);
+ check_page_locked(tree, page);
+ page_cache_release(page);
+ index++;
+ offset += PAGE_CACHE_SIZE;
+ }
}
/*
@@ -2443,13 +2474,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
struct extent_io_tree *tree;
+ struct extent_state *cached = NULL;
u64 offset = 0;
u64 start;
u64 end;
u64 len;
- u64 extent_start = 0;
- u64 extent_len = 0;
int mirror;
int ret;
@@ -2482,8 +2513,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
bvec->bv_offset, bvec->bv_len);
}
- start = page_offset(page);
- end = start + bvec->bv_offset + bvec->bv_len - 1;
+ start = page_offset(page) + bvec->bv_offset;
+ end = start + bvec->bv_len - 1;
len = bvec->bv_len;
if (++bvec <= bvec_end)
@@ -2540,40 +2571,24 @@ readpage_ok:
offset = i_size & (PAGE_CACHE_SIZE-1);
if (page->index == end_index && offset)
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
- SetPageUptodate(page);
+ if (tree->track_uptodate)
+ set_extent_uptodate(tree, start, end, &cached,
+ GFP_ATOMIC);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
- unlock_page(page);
- offset += len;
- if (unlikely(!uptodate)) {
- if (extent_len) {
- endio_readpage_release_extent(tree,
- extent_start,
- extent_len, 1);
- extent_start = 0;
- extent_len = 0;
- }
- endio_readpage_release_extent(tree, start,
- end - start + 1, 0);
- } else if (!extent_len) {
- extent_start = start;
- extent_len = end + 1 - start;
- } else if (extent_start + extent_len == start) {
- extent_len += end + 1 - start;
- } else {
- endio_readpage_release_extent(tree, extent_start,
- extent_len, uptodate);
- extent_start = start;
- extent_len = end + 1 - start;
- }
+ offset += len;
+ /*
+ * Check whether the page in the bvec can be marked uptodate
+ */
+ check_page_uptodate(tree, page, cached);
} while (bvec <= bvec_end);
-
- if (extent_len)
- endio_readpage_release_extent(tree, extent_start, extent_len,
- uptodate);
+ /*
+ * Unlock the btrfs_bio and associated page
+ */
+ unlock_extent_and_page(mapping, tree, io_bio);
if (io_bio->end_io)
io_bio->end_io(io_bio, err);
bio_put(bio);
@@ -2700,6 +2715,18 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
else
contig = bio_end_sector(bio) == sector;
+ if (contig) {
+ /*
+ * Check whether we are contig if file offsets.
+ * We should mostly be for readpage/readpages
+ * We need to do this because we use btrfs_io_bio
+ * start_offset and len to unlock in endio routines.
+ */
+ if ((page_offset(page) + offset) !=
+ (btrfs_io_bio(bio)->start_offset +
+ btrfs_io_bio(bio)->len))
+ contig = 0;
+ }
if (prev_bio_flags != bio_flags || !contig ||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
@@ -2709,6 +2736,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
return ret;
bio = NULL;
} else {
+ /*
+ * update btrfs_io_bio len. So that we can unlock
+ * correctly in end_io callback.
+ */
+ btrfs_io_bio(bio)->len += page_size;
return 0;
}
}
@@ -2724,6 +2756,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ btrfs_io_bio(bio)->start_offset = page_offset(page) + offset;
+ btrfs_io_bio(bio)->len = page_size;
if (bio_ret)
*bio_ret = bio;
@@ -2914,7 +2948,7 @@ static int __do_readpage(struct extent_io_tree *tree,
/* the get_extent function already copied into the page */
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
- check_page_uptodate(tree, page);
+ check_page_uptodate(tree, page, NULL);
if (!parent_locked)
unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 80754f9..fb2dbdc 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -173,6 +173,9 @@ struct btrfs_io_bio {
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
u8 *csum_allocated;
btrfs_io_bio_end_io_t *end_io;
+ /* Track file offset range operated on by the bio.*/
+ u64 start_offset;
+ u64 len;
struct bio bio;
};
--
1.8.3.1
next prev parent reply other threads:[~2014-05-21 9:41 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-21 9:41 [RFC PATCH 0/8] Btrfs: Subpagesize-blocksize: Get rid of whole page I/O Chandan Rajendra
2014-05-21 9:41 ` Chandan Rajendra [this message]
2014-05-21 9:41 ` [RFC PATCH 2/8] Btrfs: subpagesize-blocksize: Get rid of whole page writes Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 3/8] Btrfs: subpagesize-blocksize: __btrfs_buffered_write: Reserve/release extents aligned to block size Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 4/8] Btrfs: subpagesize-blocksize: Define extent_buffer_head Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 5/8] Btrfs: subpagesize-blocksize: Read tree blocks whose size is <PAGE_CACHE_SIZE Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 6/8] Btrfs: subpagesize-blocksize: Write only dirty extent buffers belonging to a page Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 7/8] Btrfs: subpagesize-blocksize: Allow mounting filesystems where sectorsize != PAGE_SIZE Chandan Rajendra
2014-05-21 9:41 ` [RFC PATCH 8/8] Btrfs: subpagesize-blocksize: Compute and look up csums based on sectorsized blocks Chandan Rajendra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1400665278-4091-2-git-send-email-chandan@linux.vnet.ibm.com \
--to=chandan@linux.vnet.ibm.com \
--cc=aneesh.kumar@linux.vnet.ibm.com \
--cc=clm@fb.com \
--cc=jbacik@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).