From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
To: clm@fb.com, jbacik@fb.com, dsterba@suse.cz, bo.li.liu@oracle.com
Cc: Chandan Rajendra <chandan@linux.vnet.ibm.com>,
linux-btrfs@vger.kernel.org, chandan@mykolab.com
Subject: [RFC PATCH V11 16/21] Btrfs: subpagesize-blocksize: Prevent writes to an extent buffer when PG_writeback flag is set.
Date: Mon, 1 Jun 2015 20:52:51 +0530 [thread overview]
Message-ID: <1433172176-8742-17-git-send-email-chandan@linux.vnet.ibm.com> (raw)
In-Reply-To: <1433172176-8742-1-git-send-email-chandan@linux.vnet.ibm.com>
In non-subpagesize-blocksize scenario, BTRFS_HEADER_FLAG_WRITTEN flag prevents
Btrfs code from writing into an extent buffer whose pages are under
writeback. This facility isn't sufficient for achieving the same in
subpagesize-blocksize scenario, since we have more than one extent buffer
mapped to a page.
Hence this patch adds a new flag (i.e. EXTENT_BUFFER_HEAD_WRITEBACK) and
corresponding code to track the writeback status of the page and to prevent
writes to any of the extent buffers mapped to the page while writeback is
going on.
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
fs/btrfs/ctree.c | 21 ++++++-
fs/btrfs/extent-tree.c | 11 ++++
fs/btrfs/extent_io.c | 150 ++++++++++++++++++++++++++++++++++++++++---------
fs/btrfs/extent_io.h | 1 +
4 files changed, 155 insertions(+), 28 deletions(-)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b28f14d..ba6fbb0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1535,6 +1535,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct extent_buffer *parent, int parent_slot,
struct extent_buffer **cow_ret)
{
+ struct extent_buffer_head *ebh = eb_head(buf);
u64 search_start;
int ret;
@@ -1548,6 +1549,14 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
+ if (test_bit(EXTENT_BUFFER_HEAD_WRITEBACK, &ebh->bflags)) {
+ if (parent)
+ btrfs_set_lock_blocking(parent);
+ btrfs_set_lock_blocking(buf);
+ wait_on_bit_io(&ebh->bflags,
+ EXTENT_BUFFER_HEAD_WRITEBACK,
+ TASK_UNINTERRUPTIBLE);
+ }
*cow_ret = buf;
return 0;
}
@@ -2665,6 +2674,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow)
{
+ struct extent_buffer_head *ebh;
struct extent_buffer *b;
int slot;
int ret;
@@ -2767,8 +2777,17 @@ again:
* then we don't want to set the path blocking,
* so we test it here
*/
- if (!should_cow_block(trans, root, b))
+ if (!should_cow_block(trans, root, b)) {
+ ebh = eb_head(b);
+ if (test_bit(EXTENT_BUFFER_HEAD_WRITEBACK,
+ &ebh->bflags)) {
+ btrfs_set_path_blocking(p);
+ wait_on_bit_io(&ebh->bflags,
+ EXTENT_BUFFER_HEAD_WRITEBACK,
+ TASK_UNINTERRUPTIBLE);
+ }
goto cow_done;
+ }
/*
* must have write locks on this node and the
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b93a922..fc324b8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7435,14 +7435,25 @@ static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, int level)
{
+ struct extent_buffer_head *ebh;
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return ERR_PTR(-ENOMEM);
+
+ ebh = eb_head(buf);
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
+
+ if (test_bit(EXTENT_BUFFER_HEAD_WRITEBACK,
+ &ebh->bflags)) {
+ btrfs_set_lock_blocking(buf);
+ wait_on_bit_io(&ebh->bflags, EXTENT_BUFFER_HEAD_WRITEBACK,
+ TASK_UNINTERRUPTIBLE);
+ }
+
clean_tree_block(trans, root->fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->ebflags);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 55f900a..1ae1059 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3638,6 +3638,52 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
+static void lock_extent_buffers(struct extent_buffer_head *ebh,
+ struct extent_page_data *epd)
+{
+ struct extent_buffer *locked_eb = NULL;
+ struct extent_buffer *eb;
+again:
+ eb = &ebh->eb;
+ do {
+ if (eb == locked_eb)
+ continue;
+
+ if (!btrfs_try_tree_write_lock(eb))
+ goto backoff;
+
+ } while ((eb = eb->eb_next) != NULL);
+
+ return;
+
+backoff:
+ if (locked_eb && (locked_eb->start > eb->start))
+ btrfs_tree_unlock(locked_eb);
+
+ locked_eb = eb;
+
+ eb = &ebh->eb;
+ while (eb != locked_eb) {
+ btrfs_tree_unlock(eb);
+ eb = eb->eb_next;
+ }
+
+ flush_write_bio(epd);
+
+ btrfs_tree_lock(locked_eb);
+
+ goto again;
+}
+
+static void unlock_extent_buffers(struct extent_buffer_head *ebh)
+{
+ struct extent_buffer *eb = &ebh->eb;
+
+ do {
+ btrfs_tree_unlock(eb);
+ } while ((eb = eb->eb_next) != NULL);
+}
+
static void lock_extent_buffer_pages(struct extent_buffer_head *ebh,
struct extent_page_data *epd)
{
@@ -3657,21 +3703,17 @@ static void lock_extent_buffer_pages(struct extent_buffer_head *ebh,
}
static int noinline_for_stack
-lock_extent_buffer_for_io(struct extent_buffer *eb,
+mark_extent_buffer_writeback(struct extent_buffer *eb,
struct btrfs_fs_info *fs_info,
struct extent_page_data *epd)
{
+ struct extent_buffer_head *ebh = eb_head(eb);
+ struct extent_buffer *cur;
int dirty;
int ret = 0;
- if (!btrfs_try_tree_write_lock(eb)) {
- flush_write_bio(epd);
- btrfs_tree_lock(eb);
- }
-
if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->ebflags)) {
dirty = test_bit(EXTENT_BUFFER_DIRTY, &eb->ebflags);
- btrfs_tree_unlock(eb);
if (!epd->sync_io) {
if (!dirty)
return 1;
@@ -3679,15 +3721,23 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
return 2;
}
+ cur = &ebh->eb;
+ do {
+ btrfs_set_lock_blocking(cur);
+ } while ((cur = cur->eb_next) != NULL);
+
flush_write_bio(epd);
while (1) {
wait_on_extent_buffer_writeback(eb);
- btrfs_tree_lock(eb);
if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->ebflags))
break;
- btrfs_tree_unlock(eb);
}
+
+ cur = &ebh->eb;
+ do {
+ btrfs_clear_lock_blocking(cur);
+ } while ((cur = cur->eb_next) != NULL);
}
/*
@@ -3695,22 +3745,20 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
* under IO since we can end up having no IO bits set for a short period
* of time.
*/
- spin_lock(&eb_head(eb)->refs_lock);
+ spin_lock(&ebh->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->ebflags)) {
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->ebflags);
- spin_unlock(&eb_head(eb)->refs_lock);
+ spin_unlock(&ebh->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-eb->len,
fs_info->dirty_metadata_batch);
ret = 0;
} else {
- spin_unlock(&eb_head(eb)->refs_lock);
+ spin_unlock(&ebh->refs_lock);
ret = 1;
}
- btrfs_tree_unlock(eb);
-
return ret;
}
@@ -3856,8 +3904,8 @@ static void set_btree_ioerr(struct extent_buffer *eb, struct page *page)
static void end_bio_subpagesize_blocksize_ebh_writepage(struct bio *bio, int err)
{
- struct bio_vec *bvec;
struct extent_buffer *eb;
+ struct bio_vec *bvec;
int i, done;
bio_for_each_segment_all(bvec, bio, i) {
@@ -3887,6 +3935,15 @@ static void end_bio_subpagesize_blocksize_ebh_writepage(struct bio *bio, int err
end_extent_buffer_writeback(eb);
+ if (done) {
+ struct extent_buffer_head *ebh = eb_head(eb);
+
+ clear_bit(EXTENT_BUFFER_HEAD_WRITEBACK,
+ &ebh->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&ebh->bflags,
+ EXTENT_BUFFER_HEAD_WRITEBACK);
+ }
} while ((eb = eb->eb_next) != NULL);
}
@@ -3896,6 +3953,7 @@ static void end_bio_subpagesize_blocksize_ebh_writepage(struct bio *bio, int err
static void end_bio_regular_ebh_writepage(struct bio *bio, int err)
{
+ struct extent_buffer_head *ebh;
struct extent_buffer *eb;
struct bio_vec *bvec;
int i, done;
@@ -3906,7 +3964,9 @@ static void end_bio_regular_ebh_writepage(struct bio *bio, int err)
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
- done = atomic_dec_and_test(&eb_head(eb)->io_bvecs);
+ ebh = eb_head(eb);
+
+ done = atomic_dec_and_test(&ebh->io_bvecs);
if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->ebflags)) {
ClearPageUptodate(page);
@@ -3919,6 +3979,10 @@ static void end_bio_regular_ebh_writepage(struct bio *bio, int err)
continue;
end_extent_buffer_writeback(eb);
+
+ clear_bit(EXTENT_BUFFER_HEAD_WRITEBACK, &ebh->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&ebh->bflags, EXTENT_BUFFER_HEAD_WRITEBACK);
}
bio_put(bio);
@@ -3960,8 +4024,14 @@ write_regular_ebh(struct extent_buffer_head *ebh,
set_btree_ioerr(eb, p);
end_page_writeback(p);
if (atomic_sub_and_test(num_pages - i,
- &eb_head(eb)->io_bvecs))
+ &ebh->io_bvecs)) {
end_extent_buffer_writeback(eb);
+ clear_bit(EXTENT_BUFFER_HEAD_WRITEBACK,
+ &ebh->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&ebh->bflags,
+ EXTENT_BUFFER_HEAD_WRITEBACK);
+ }
ret = -EIO;
break;
}
@@ -3995,6 +4065,7 @@ static int write_subpagesize_blocksize_ebh(struct extent_buffer_head *ebh,
unsigned long i;
unsigned long bio_flags = 0;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
+ int nr_eb_submitted = 0;
int ret = 0, err = 0;
eb = &ebh->eb;
@@ -4007,7 +4078,7 @@ static int write_subpagesize_blocksize_ebh(struct extent_buffer_head *ebh,
continue;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->ebflags);
- atomic_inc(&eb_head(eb)->io_bvecs);
+ atomic_inc(&ebh->io_bvecs);
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
@@ -4025,6 +4096,8 @@ static int write_subpagesize_blocksize_ebh(struct extent_buffer_head *ebh,
atomic_dec(&eb_head(eb)->io_bvecs);
end_extent_buffer_writeback(eb);
err = -EIO;
+ } else {
+ ++nr_eb_submitted;
}
} while ((eb = eb->eb_next) != NULL);
@@ -4032,6 +4105,12 @@ static int write_subpagesize_blocksize_ebh(struct extent_buffer_head *ebh,
update_nr_written(p, wbc, 1);
}
+ if (!nr_eb_submitted) {
+ clear_bit(EXTENT_BUFFER_HEAD_WRITEBACK, &ebh->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&ebh->bflags, EXTENT_BUFFER_HEAD_WRITEBACK);
+ }
+
unlock_page(p);
return ret;
@@ -4143,24 +4222,31 @@ retry:
j = 0;
ebs_to_write = dirty_ebs = 0;
+
+ lock_extent_buffers(ebh, &epd);
+
+ set_bit(EXTENT_BUFFER_HEAD_WRITEBACK, &ebh->bflags);
+
eb = &ebh->eb;
do {
BUG_ON(j >= BITS_PER_LONG);
- ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+ ret = mark_extent_buffer_writeback(eb, fs_info,
+ &epd);
switch (ret) {
case 0:
/*
- EXTENT_BUFFER_DIRTY was set and we were able to
- clear it.
+ EXTENT_BUFFER_DIRTY was set and we were
+ able to clear it.
*/
set_bit(j, &ebs_to_write);
break;
case 2:
/*
- EXTENT_BUFFER_DIRTY was set, but we were unable
- to clear EXTENT_BUFFER_WRITEBACK that was set
- before we got the extent buffer locked.
+ EXTENT_BUFFER_DIRTY was set, but we were
+ unable to clear EXTENT_BUFFER_WRITEBACK
+ that was set before we got the extent
+ buffer locked.
*/
set_bit(j, &dirty_ebs);
default:
@@ -4174,22 +4260,32 @@ retry:
ret = 0;
+ unlock_extent_buffers(ebh);
+
if (!ebs_to_write) {
+ clear_bit(EXTENT_BUFFER_HEAD_WRITEBACK,
+ &ebh->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&ebh->bflags,
+ EXTENT_BUFFER_HEAD_WRITEBACK);
free_extent_buffer(&ebh->eb);
continue;
}
/*
- Now that we know that atleast one of the extent buffer
+ Now that we know that atleast one of the extent buffers
belonging to the extent buffer head must be written to
the disk, lock the extent_buffer_head's pages.
*/
lock_extent_buffer_pages(ebh, &epd);
if (ebh->eb.len < PAGE_CACHE_SIZE) {
- ret = write_subpagesize_blocksize_ebh(ebh, fs_info, wbc, &epd, ebs_to_write);
+ ret = write_subpagesize_blocksize_ebh(ebh, fs_info,
+ wbc, &epd,
+ ebs_to_write);
if (dirty_ebs) {
- redirty_extent_buffer_pages_for_writepage(&ebh->eb, wbc);
+ redirty_extent_buffer_pages_for_writepage(&ebh->eb,
+ wbc);
}
} else {
ret = write_regular_ebh(ebh, fs_info, wbc, &epd);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c629e53..cbc7d73 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -42,6 +42,7 @@
#define EXTENT_BUFFER_DUMMY 9
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
+#define EXTENT_BUFFER_HEAD_WRITEBACK 12
/* these are flags for extent_clear_unlock_delalloc */
#define PAGE_UNLOCK (1 << 0)
--
2.1.0
next prev parent reply other threads:[~2015-06-01 15:26 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-01 15:22 [RFC PATCH V11 00/21] Btrfs: Subpagesize-blocksize: Allow I/O on blocks whose size is less than page size Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 01/21] Btrfs: subpagesize-blocksize: Fix whole page read Chandan Rajendra
2015-06-19 4:45 ` Liu Bo
2015-06-19 9:45 ` Chandan Rajendra
2015-06-23 8:37 ` Liu Bo
2016-02-10 10:44 ` David Sterba
2016-02-10 10:39 ` David Sterba
2016-02-11 5:42 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 02/21] Btrfs: subpagesize-blocksize: Fix whole page write Chandan Rajendra
2015-06-26 9:50 ` Liu Bo
2015-06-29 8:54 ` Chandan Rajendra
2015-07-01 14:27 ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 03/21] Btrfs: subpagesize-blocksize: __btrfs_buffered_write: Reserve/release extents aligned to block size Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 04/21] Btrfs: subpagesize-blocksize: Define extent_buffer_head Chandan Rajendra
2015-07-01 14:33 ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 05/21] Btrfs: subpagesize-blocksize: Read tree blocks whose size is < PAGE_SIZE Chandan Rajendra
2015-07-01 14:40 ` Liu Bo
2015-07-03 10:02 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 06/21] Btrfs: subpagesize-blocksize: Write only dirty extent buffers belonging to a page Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 07/21] Btrfs: subpagesize-blocksize: Allow mounting filesystems where sectorsize != PAGE_SIZE Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 08/21] Btrfs: subpagesize-blocksize: Compute and look up csums based on sectorsized blocks Chandan Rajendra
2015-07-01 14:37 ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 09/21] Btrfs: subpagesize-blocksize: Direct I/O read: Work " Chandan Rajendra
2015-07-01 14:45 ` Liu Bo
2015-07-03 10:05 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 10/21] Btrfs: subpagesize-blocksize: fallocate: Work with sectorsized units Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 11/21] Btrfs: subpagesize-blocksize: btrfs_page_mkwrite: Reserve space in " Chandan Rajendra
2015-07-06 3:18 ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 12/21] Btrfs: subpagesize-blocksize: Search for all ordered extents that could span across a page Chandan Rajendra
2015-07-01 14:47 ` Liu Bo
2015-07-03 10:08 ` Chandan Rajendra
2015-07-06 3:17 ` Liu Bo
2015-07-06 10:49 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 13/21] Btrfs: subpagesize-blocksize: Deal with partial ordered extent allocations Chandan Rajendra
2015-07-06 10:06 ` Liu Bo
2015-07-07 13:38 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 14/21] Btrfs: subpagesize-blocksize: Explicitly Track I/O status of blocks of an ordered extent Chandan Rajendra
2015-07-20 8:34 ` Liu Bo
2015-07-20 12:54 ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 15/21] Btrfs: subpagesize-blocksize: Revert commit fc4adbff823f76577ece26dcb88bf6f8392dbd43 Chandan Rajendra
2015-06-01 15:22 ` Chandan Rajendra [this message]
2015-06-01 15:22 ` [RFC PATCH V11 17/21] Btrfs: subpagesize-blocksize: Use (eb->start, seq) as search key for tree modification log Chandan Rajendra
2015-07-20 14:46 ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 18/21] Btrfs: subpagesize-blocksize: btrfs_submit_direct_hook: Handle map_length < bio vector length Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 19/21] Revert "btrfs: fix lockups from btrfs_clear_path_blocking" Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 20/21] Btrfs: subpagesize-blockssize: Limit inline extents to root->sectorsize Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 21/21] Btrfs: subpagesize-blocksize: Fix block size returned to user space Chandan Rajendra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1433172176-8742-17-git-send-email-chandan@linux.vnet.ibm.com \
--to=chandan@linux.vnet.ibm.com \
--cc=bo.li.liu@oracle.com \
--cc=chandan@mykolab.com \
--cc=clm@fb.com \
--cc=dsterba@suse.cz \
--cc=jbacik@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).