* [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-12 7:26 ` Christoph Hellwig
2015-08-11 22:49 ` [PATCH 2/8] xfs: io type needs to be part of the writepage context Dave Chinner
` (6 subsequent siblings)
7 siblings, 1 reply; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
xfs_vm_writepages() calls generic_writepages to writeback a range of
a file, but then xfs_vm_writepage() clusters pages itself as it does
not have any context it can pass between->writepage calls from
__write_cache_pages().
Introduce a writeback context for xfs_vm_writepages() and call
__write_cache_pages directly with our own writepage callback so that
we can pass that context to each writepage invocation.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 73 +++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 52 insertions(+), 21 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3859f5e..6dc1154 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,14 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
+/*
+ * structure owned by writepages passed to individual writepage calls
+ */
+struct xfs_writepage_ctx {
+ struct xfs_bmbt_irec imap;
+ bool imap_valid;
+};
+
void
xfs_count_page_state(
struct page *page,
@@ -334,7 +342,7 @@ xfs_map_blocks(
return 0;
}
-STATIC int
+STATIC bool
xfs_imap_valid(
struct inode *inode,
struct xfs_bmbt_irec *imap,
@@ -934,20 +942,21 @@ out_invalidate:
* For any other dirty buffer heads on the page we should flush them.
*/
STATIC int
-xfs_vm_writepage(
+xfs_do_writepage(
struct page *page,
- struct writeback_control *wbc)
+ struct writeback_control *wbc,
+ void *data)
{
+ struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
struct buffer_head *bh, *head;
- struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
ssize_t len;
- int err, imap_valid = 0, uptodate = 1;
+ int err, uptodate = 1;
int count = 0;
int nonblocking = 0;
@@ -1067,24 +1076,24 @@ xfs_vm_writepage(
* buffers covering holes here.
*/
if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- imap_valid = 0;
+ wpc->imap_valid = false;
continue;
}
if (buffer_unwritten(bh)) {
if (type != XFS_IO_UNWRITTEN) {
type = XFS_IO_UNWRITTEN;
- imap_valid = 0;
+ wpc->imap_valid = false;
}
} else if (buffer_delay(bh)) {
if (type != XFS_IO_DELALLOC) {
type = XFS_IO_DELALLOC;
- imap_valid = 0;
+ wpc->imap_valid = false;
}
} else if (buffer_uptodate(bh)) {
if (type != XFS_IO_OVERWRITE) {
type = XFS_IO_OVERWRITE;
- imap_valid = 0;
+ wpc->imap_valid = false;
}
} else {
if (PageUptodate(page))
@@ -1095,13 +1104,14 @@ xfs_vm_writepage(
* subsequent writeable buffers into a new
* ioend.
*/
- imap_valid = 0;
+ wpc->imap_valid = false;
continue;
}
- if (imap_valid)
- imap_valid = xfs_imap_valid(inode, &imap, offset);
- if (!imap_valid) {
+ if (wpc->imap_valid)
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
+ if (!wpc->imap_valid) {
/*
* If we didn't have a valid mapping then we need to
* put the new mapping into a separate ioend structure.
@@ -1111,16 +1121,17 @@ xfs_vm_writepage(
* time.
*/
new_ioend = 1;
- err = xfs_map_blocks(inode, offset, &imap, type,
+ err = xfs_map_blocks(inode, offset, &wpc->imap, type,
nonblocking);
if (err)
goto error;
- imap_valid = xfs_imap_valid(inode, &imap, offset);
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
}
- if (imap_valid) {
+ if (wpc->imap_valid) {
lock_buffer(bh);
if (type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &imap, offset);
+ xfs_map_at_offset(inode, bh, &wpc->imap, offset);
xfs_add_to_ioend(inode, bh, offset, type, &ioend,
new_ioend);
count++;
@@ -1147,10 +1158,10 @@ xfs_vm_writepage(
* completion path as we have marked the initial page as under writeback
* and unlocked it.
*/
- if (imap_valid) {
+ if (wpc->imap_valid) {
xfs_off_t end_index;
- end_index = imap.br_startoff + imap.br_blockcount;
+ end_index = wpc->imap.br_startoff + wpc->imap.br_blockcount;
/* to bytes */
end_index <<= inode->i_blkbits;
@@ -1162,7 +1173,7 @@ xfs_vm_writepage(
if (end_index > last_index)
end_index = last_index;
- xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+ xfs_cluster_write(inode, page->index + 1, &wpc->imap, &ioend,
wbc, end_index);
}
@@ -1188,6 +1199,8 @@ error:
xfs_aops_discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
+ mapping_set_error(page->mapping, err);
+ wpc->imap_valid = false;
return err;
redirty:
@@ -1197,12 +1210,30 @@ redirty:
}
STATIC int
+xfs_vm_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
+{
+ struct xfs_writepage_ctx wpc = {};
+
+ return xfs_do_writepage(page, wbc, &wpc);
+}
+
+STATIC int
xfs_vm_writepages(
struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct xfs_writepage_ctx wpc = {};
+ struct blk_plug plug;
+ int ret;
+
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- return generic_writepages(mapping, wbc);
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
+ blk_finish_plug(&plug);
+
+ return ret;
}
/*
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-11 22:49 ` [PATCH 1/8] xfs: Introduce writeback context for writepages Dave Chinner
@ 2015-08-12 7:26 ` Christoph Hellwig
2015-08-13 1:32 ` Dave Chinner
0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2015-08-12 7:26 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Introducing this separate from the actual users is a bit odd to follow.
Compare to my patch from a few years ago:
http://thread.gmane.org/gmane.comp.file-systems.xfs.general/39030/focus=39009
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-12 7:26 ` Christoph Hellwig
@ 2015-08-13 1:32 ` Dave Chinner
2015-08-13 6:52 ` Christoph Hellwig
0 siblings, 1 reply; 16+ messages in thread
From: Dave Chinner @ 2015-08-13 1:32 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Aug 12, 2015 at 12:26:35AM -0700, Christoph Hellwig wrote:
> Introducing this separate from the actual users is a bit odd to follow.
> Compare to my patch from a few years ago:
>
> http://thread.gmane.org/gmane.comp.file-systems.xfs.general/39030/focus=39009
I completely forgot you did this. I'll go back and look at it
and see what I can pull from it. Thanks for the reminder, Christoph.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-13 1:32 ` Dave Chinner
@ 2015-08-13 6:52 ` Christoph Hellwig
2015-08-14 1:53 ` Dave Chinner
0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2015-08-13 6:52 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, xfs
On Thu, Aug 13, 2015 at 11:32:44AM +1000, Dave Chinner wrote:
> > http://thread.gmane.org/gmane.comp.file-systems.xfs.general/39030/focus=39009
>
> I completely forgot you did this. I'll go back and look at it
> and see what I can pull from it. Thanks for the reminder, Christoph.
As far as I can tell your new version is superior in all aspects
except for the patch split :)
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-13 6:52 ` Christoph Hellwig
@ 2015-08-14 1:53 ` Dave Chinner
2015-08-15 13:23 ` Christoph Hellwig
0 siblings, 1 reply; 16+ messages in thread
From: Dave Chinner @ 2015-08-14 1:53 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Aug 12, 2015 at 11:52:58PM -0700, Christoph Hellwig wrote:
> On Thu, Aug 13, 2015 at 11:32:44AM +1000, Dave Chinner wrote:
> > > http://thread.gmane.org/gmane.comp.file-systems.xfs.general/39030/focus=39009
> >
> > I completely forgot you did this. I'll go back and look at it
> > and see what I can pull from it. Thanks for the reminder, Christoph.
>
> As far as I can tell your new version is superior in all aspects
> except for the patch split :)
Ok, I've gone back and had a look at this now and reminded myself of
the discussion we had and the problems found. The end results are
remarkably similar - maybe that's why I found it easy to write this
patch set. i.e. your patchset was buried somewhere in my brain, even
though I'd forgotten about it at the surface...
How would you like to see this patchset broken up into more
sensible/reviewable chunks? It's just folding and splitting patches,
so it's not a big deal to rework it...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-14 1:53 ` Dave Chinner
@ 2015-08-15 13:23 ` Christoph Hellwig
2015-08-15 22:57 ` Dave Chinner
0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2015-08-15 13:23 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, xfs
On Fri, Aug 14, 2015 at 11:53:09AM +1000, Dave Chinner wrote:
> Ok, I've gone back and had a look at this now and reminded myself of
> the discussion we had and the problems found. The end results are
> remarkably similar - maybe that's why I found it easy to write this
> patch set. i.e. your patchset was buried somewhere in my brain, even
> though I'd forgotten about it at the surface...
>
> How would you like to see this patchset broken up into more
> sensible/reviewable chunks? It's just folding and splitting patches,
> so it's not a big deal to rework it...
I think it should be less. Patch 3 should go first, and then 1,2,4 & 5
as a single one.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/8] xfs: Introduce writeback context for writepages
2015-08-15 13:23 ` Christoph Hellwig
@ 2015-08-15 22:57 ` Dave Chinner
0 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-15 22:57 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Sat, Aug 15, 2015 at 06:23:36AM -0700, Christoph Hellwig wrote:
> On Fri, Aug 14, 2015 at 11:53:09AM +1000, Dave Chinner wrote:
> > Ok, I've gone back and had a look at this now and reminded myself of
> > the discussion we had and the problems found. The end results are
> > remarkably similar - maybe that's why I found it easy to write this
> > patch set. i.e. your patchset was buried somewhere in my brain, even
> > though I'd forgotten about it at the surface...
> >
> > How would you like to see this patchset broken up into more
> > sensible/reviewable chunks? It's just folding and splitting patches,
> > so it's not a big deal to rework it...
>
> I think it should be less. Patch 3 should go first, and then 1,2,4 & 5
> as a single one.
No worries, I'll reorder and fold them, retest and repost. Thanks
Christoph!
-Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 2/8] xfs: io type needs to be part of the writepage context
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
2015-08-11 22:49 ` [PATCH 1/8] xfs: Introduce writeback context for writepages Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-11 22:49 ` [PATCH 3/8] xfs: remove nonblocking mode from xfs_vm_writepage Dave Chinner
` (5 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
If we don't pass the IO type we are mapping with the writepage
context, then the imap is recalculated on every delalloc page that
is passed to xfs_do_writepage(). This defeats the purpose of having
a cached imap between calls and increases the overhead of delalloc
writeback significantly.
Fix this by moving the io type into the writepage context structure
so that it moves with the cached imap through the stack.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 33 ++++++++++++++++++---------------
1 file changed, 18 insertions(+), 15 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6dc1154..4d5479d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -42,6 +42,7 @@
struct xfs_writepage_ctx {
struct xfs_bmbt_irec imap;
bool imap_valid;
+ unsigned int io_type;
};
void
@@ -952,7 +953,6 @@ xfs_do_writepage(
struct buffer_head *bh, *head;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
- unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
ssize_t len;
@@ -1056,7 +1056,6 @@ xfs_do_writepage(
bh = head = page_buffers(page);
offset = page_offset(page);
- type = XFS_IO_OVERWRITE;
if (wbc->sync_mode == WB_SYNC_NONE)
nonblocking = 1;
@@ -1081,18 +1080,18 @@ xfs_do_writepage(
}
if (buffer_unwritten(bh)) {
- if (type != XFS_IO_UNWRITTEN) {
- type = XFS_IO_UNWRITTEN;
+ if (wpc->io_type != XFS_IO_UNWRITTEN) {
+ wpc->io_type = XFS_IO_UNWRITTEN;
wpc->imap_valid = false;
}
} else if (buffer_delay(bh)) {
- if (type != XFS_IO_DELALLOC) {
- type = XFS_IO_DELALLOC;
+ if (wpc->io_type != XFS_IO_DELALLOC) {
+ wpc->io_type = XFS_IO_DELALLOC;
wpc->imap_valid = false;
}
} else if (buffer_uptodate(bh)) {
- if (type != XFS_IO_OVERWRITE) {
- type = XFS_IO_OVERWRITE;
+ if (wpc->io_type != XFS_IO_OVERWRITE) {
+ wpc->io_type = XFS_IO_OVERWRITE;
wpc->imap_valid = false;
}
} else {
@@ -1121,8 +1120,8 @@ xfs_do_writepage(
* time.
*/
new_ioend = 1;
- err = xfs_map_blocks(inode, offset, &wpc->imap, type,
- nonblocking);
+ err = xfs_map_blocks(inode, offset, &wpc->imap,
+ wpc->io_type, nonblocking);
if (err)
goto error;
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
@@ -1130,10 +1129,10 @@ xfs_do_writepage(
}
if (wpc->imap_valid) {
lock_buffer(bh);
- if (type != XFS_IO_OVERWRITE)
+ if (wpc->io_type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, &wpc->imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type, &ioend,
- new_ioend);
+ xfs_add_to_ioend(inode, bh, offset, wpc->io_type,
+ &ioend, new_ioend);
count++;
}
@@ -1214,7 +1213,9 @@ xfs_vm_writepage(
struct page *page,
struct writeback_control *wbc)
{
- struct xfs_writepage_ctx wpc = {};
+ struct xfs_writepage_ctx wpc = {
+ .io_type = XFS_IO_OVERWRITE,
+ };
return xfs_do_writepage(page, wbc, &wpc);
}
@@ -1224,7 +1225,9 @@ xfs_vm_writepages(
struct address_space *mapping,
struct writeback_control *wbc)
{
- struct xfs_writepage_ctx wpc = {};
+ struct xfs_writepage_ctx wpc = {
+ .io_type = XFS_IO_OVERWRITE,
+ };
struct blk_plug plug;
int ret;
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 3/8] xfs: remove nonblocking mode from xfs_vm_writepage
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
2015-08-11 22:49 ` [PATCH 1/8] xfs: Introduce writeback context for writepages Dave Chinner
2015-08-11 22:49 ` [PATCH 2/8] xfs: io type needs to be part of the writepage context Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-12 7:27 ` Christoph Hellwig
2015-08-11 22:49 ` [PATCH 4/8] xfs: add ioend and iohead to xfs_writepage_ctx Dave Chinner
` (4 subsequent siblings)
7 siblings, 1 reply; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Remove the nonblocking optimisation done for mapping lookups during
writeback. It's not clear that leaving a hole in the writeback range
just because we couldn't get a lock is really a win, as it makes us
do another small random IO later on rather than a large sequential
IO now.
As this gets inteh way of sane error handling later on, just remove
for the moment and we can re-introduce an equivalent optimisation in
future if we see problems due to extent map lock contention.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 19 +++----------------
1 file changed, 3 insertions(+), 16 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4d5479d..b718156 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -285,8 +285,7 @@ xfs_map_blocks(
struct inode *inode,
loff_t offset,
struct xfs_bmbt_irec *imap,
- int type,
- int nonblocking)
+ int type)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -302,12 +301,7 @@ xfs_map_blocks(
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
- if (nonblocking)
- return -EAGAIN;
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- }
-
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -958,7 +952,6 @@ xfs_do_writepage(
ssize_t len;
int err, uptodate = 1;
int count = 0;
- int nonblocking = 0;
trace_xfs_writepage(inode, page, 0, 0);
@@ -1057,9 +1050,6 @@ xfs_do_writepage(
bh = head = page_buffers(page);
offset = page_offset(page);
- if (wbc->sync_mode == WB_SYNC_NONE)
- nonblocking = 1;
-
do {
int new_ioend = 0;
@@ -1121,7 +1111,7 @@ xfs_do_writepage(
*/
new_ioend = 1;
err = xfs_map_blocks(inode, offset, &wpc->imap,
- wpc->io_type, nonblocking);
+ wpc->io_type);
if (err)
goto error;
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
@@ -1192,9 +1182,6 @@ error:
if (iohead)
xfs_cancel_ioend(iohead);
- if (err == -EAGAIN)
- goto redirty;
-
xfs_aops_discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH 3/8] xfs: remove nonblocking mode from xfs_vm_writepage
2015-08-11 22:49 ` [PATCH 3/8] xfs: remove nonblocking mode from xfs_vm_writepage Dave Chinner
@ 2015-08-12 7:27 ` Christoph Hellwig
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2015-08-12 7:27 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
This error handling was the biggest obstacle last time we tried it,
so let's remove it for now. However:
On Wed, Aug 12, 2015 at 08:49:43AM +1000, Dave Chinner wrote:
> Remove the nonblocking optimisation done for mapping lookups during
> writeback. It's not clear that leaving a hole in the writeback range
> just because we couldn't get a lock is really a win, as it makes us
> do another small random IO later on rather than a large sequential
> IO now.
The fs-writeback.c will usually move to a different inode, so I don't
think it's actually that bad.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 4/8] xfs: add ioend and iohead to xfs_writepage_ctx
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
` (2 preceding siblings ...)
2015-08-11 22:49 ` [PATCH 3/8] xfs: remove nonblocking mode from xfs_vm_writepage Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-11 22:49 ` [PATCH 5/8] xfs: writepage context needs to handle discontiguous page ranges Dave Chinner
` (3 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Now we have a cross-writepage context, we don't need to submit IO at
the end of the writepage call - we can continue to aggregate ioends
across the entire writepages call if the iohead and ioend are held
in the writepage context.
This requires us to move the ioend submission up to the level where
the writepage context is declared. This does mean we do not submit
IO until we packaged the entire writeback range, but with the block
plugging in the writepages call this is the way IO is submitted,
anyway.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 122 +++++++++++++++++++++++++++---------------------------
1 file changed, 60 insertions(+), 62 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index b718156..e4184f5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -43,6 +43,8 @@ struct xfs_writepage_ctx {
struct xfs_bmbt_irec imap;
bool imap_valid;
unsigned int io_type;
+ struct xfs_ioend *iohead;
+ struct xfs_ioend *ioend;
};
void
@@ -525,38 +527,6 @@ xfs_submit_ioend(
}
/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too. Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *next;
- struct buffer_head *bh, *next_bh;
-
- do {
- next = ioend->io_list;
- bh = ioend->io_buffer_head;
- do {
- next_bh = bh->b_private;
- clear_buffer_async_write(bh);
- /*
- * The unwritten flag is cleared when added to the
- * ioend. We're not submitting for I/O so mark the
- * buffer unwritten again for next time around.
- */
- if (ioend->io_type == XFS_IO_UNWRITTEN)
- set_buffer_unwritten(bh);
- unlock_buffer(bh);
- } while ((bh = next_bh) != NULL);
-
- mempool_free(ioend, xfs_ioend_pool);
- } while ((ioend = next) != NULL);
-}
-
-/*
* Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we
* can, otherwise we finish off any current ioend and start another.
@@ -928,6 +898,27 @@ out_invalidate:
return;
}
+static int
+xfs_writepage_submit(
+ struct xfs_writepage_ctx *wpc,
+ struct writeback_control *wbc,
+ int status)
+{
+ struct blk_plug plug;
+
+ /* Reserve log space if we might write beyond the on-disk inode size. */
+ if (!status && wpc->ioend && wpc->ioend->io_type != XFS_IO_UNWRITTEN &&
+ xfs_ioend_is_append(wpc->ioend))
+ status = xfs_setfilesize_trans_alloc(wpc->ioend);
+
+ if (wpc->iohead) {
+ blk_start_plug(&plug);
+ xfs_submit_ioend(wbc, wpc->iohead, status);
+ blk_finish_plug(&plug);
+ }
+ return status;
+}
+
/*
* Write out a dirty page.
*
@@ -945,7 +936,6 @@ xfs_do_writepage(
struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
struct buffer_head *bh, *head;
- xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
__uint64_t end_offset;
pgoff_t end_index, last_index;
@@ -1122,12 +1112,12 @@ xfs_do_writepage(
if (wpc->io_type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, &wpc->imap, offset);
xfs_add_to_ioend(inode, bh, offset, wpc->io_type,
- &ioend, new_ioend);
+ &wpc->ioend, new_ioend);
count++;
}
- if (!iohead)
- iohead = ioend;
+ if (!wpc->iohead)
+ wpc->iohead = wpc->ioend;
} while (offset += len, ((bh = bh->b_this_page) != head));
@@ -1137,10 +1127,10 @@ xfs_do_writepage(
xfs_start_page_writeback(page, 1, count);
/* if there is no IO to be submitted for this page, we are done */
- if (!ioend)
+ if (!count)
return 0;
- ASSERT(iohead);
+ ASSERT(wpc->iohead);
/*
* Any errors from this point onwards need tobe reported through the IO
@@ -1162,31 +1152,37 @@ xfs_do_writepage(
if (end_index > last_index)
end_index = last_index;
- xfs_cluster_write(inode, page->index + 1, &wpc->imap, &ioend,
- wbc, end_index);
+ xfs_cluster_write(inode, page->index + 1, &wpc->imap,
+ &wpc->ioend, wbc, end_index);
}
-
- /*
- * Reserve log space if we might write beyond the on-disk inode size.
- */
- err = 0;
- if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
- err = xfs_setfilesize_trans_alloc(ioend);
-
- xfs_submit_ioend(wbc, iohead, err);
-
return 0;
error:
- if (iohead)
- xfs_cancel_ioend(iohead);
+ /*
+ * We have to fail the iohead here because we buffers locked in the
+ * ioend chain. If we don't do this, we'll deadlock invalidating the
+ * page as that tries to lock the buffers on the page. Also, because we
+ * have set pages under writeback, we have to run IO completion to mark
+ * the error state of the IO appropriately, so we can't cancel the ioend
+ * directly here. That means we have to mark this page as under
+ * writeback if we included any buffers from it in the ioend chain.
+ */
+ if (count)
+ xfs_start_page_writeback(page, 0, count);
+ xfs_writepage_submit(wpc, wbc, err);
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- unlock_page(page);
+ /*
+ * We can only discard the page we had the IO error on if we haven't
+ * included it in the ioend above. If it has already been errored out,
+ * the it is unlocked and we can't touch it here.
+ */
+ if (!count) {
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ }
mapping_set_error(page->mapping, err);
- wpc->imap_valid = false;
return err;
redirty:
@@ -1203,8 +1199,12 @@ xfs_vm_writepage(
struct xfs_writepage_ctx wpc = {
.io_type = XFS_IO_OVERWRITE,
};
+ int ret;
- return xfs_do_writepage(page, wbc, &wpc);
+ ret = xfs_do_writepage(page, wbc, &wpc);
+ if (ret)
+ return ret;
+ return xfs_writepage_submit(&wpc, wbc, ret);
}
STATIC int
@@ -1215,15 +1215,13 @@ xfs_vm_writepages(
struct xfs_writepage_ctx wpc = {
.io_type = XFS_IO_OVERWRITE,
};
- struct blk_plug plug;
int ret;
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- blk_start_plug(&plug);
ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
- blk_finish_plug(&plug);
-
- return ret;
+ if (ret)
+ return ret;
+ return xfs_writepage_submit(&wpc, wbc, ret);
}
/*
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 5/8] xfs: writepage context needs to handle discontiguous page ranges
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
` (3 preceding siblings ...)
2015-08-11 22:49 ` [PATCH 4/8] xfs: add ioend and iohead to xfs_writepage_ctx Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-11 22:49 ` [PATCH 6/8] xfs: xfs_cluster_write is redundant Dave Chinner
` (2 subsequent siblings)
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
If the pages sent down by write_cache_pages to the writepage
callback are discontiguous, we need to detect this and put each
discontiguous page range into individual ioends. This is needed to
ensure that the ioend accurately represents the range of the file
that it covers so that file size updates during IO completion set
the size correctly. Failure to take into account the discontiguous
ranges results in files being too small when writeback patterns are
non-sequential.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 82 ++++++++++++++++++++++---------------------------------
1 file changed, 32 insertions(+), 50 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e4184f5..93bf13c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -45,6 +45,7 @@ struct xfs_writepage_ctx {
unsigned int io_type;
struct xfs_ioend *iohead;
struct xfs_ioend *ioend;
+ sector_t last_block;
};
void
@@ -537,29 +538,27 @@ xfs_add_to_ioend(
struct inode *inode,
struct buffer_head *bh,
xfs_off_t offset,
- unsigned int type,
- xfs_ioend_t **result,
- int need_ioend)
+ struct xfs_writepage_ctx *wpc)
{
- xfs_ioend_t *ioend = *result;
-
- if (!ioend || need_ioend || type != ioend->io_type) {
- xfs_ioend_t *previous = *result;
-
- ioend = xfs_alloc_ioend(inode, type);
- ioend->io_offset = offset;
- ioend->io_buffer_head = bh;
- ioend->io_buffer_tail = bh;
- if (previous)
- previous->io_list = ioend;
- *result = ioend;
+ if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+ bh->b_blocknr != wpc->last_block + 1) {
+ struct xfs_ioend *new;
+
+ new = xfs_alloc_ioend(inode, wpc->io_type);
+ new->io_offset = offset;
+ new->io_buffer_head = bh;
+ new->io_buffer_tail = bh;
+ if (wpc->ioend)
+ wpc->ioend->io_list = new;
+ wpc->ioend = new;
} else {
- ioend->io_buffer_tail->b_private = bh;
- ioend->io_buffer_tail = bh;
+ wpc->ioend->io_buffer_tail->b_private = bh;
+ wpc->ioend->io_buffer_tail = bh;
}
bh->b_private = NULL;
- ioend->io_size += bh->b_size;
+ wpc->ioend->io_size += bh->b_size;
+ wpc->last_block = bh->b_blocknr;
}
STATIC void
@@ -656,17 +655,15 @@ xfs_convert_page(
struct inode *inode,
struct page *page,
loff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
+ struct xfs_writepage_ctx *wpc,
struct writeback_control *wbc)
{
struct buffer_head *bh, *head;
xfs_off_t end_offset;
unsigned long p_offset;
- unsigned int type;
int len, page_dirty;
int count = 0, done = 0, uptodate = 1;
- xfs_off_t offset = page_offset(page);
+ xfs_off_t offset = page_offset(page);
if (page->index != tindex)
goto fail;
@@ -676,7 +673,7 @@ xfs_convert_page(
goto fail_unlock_page;
if (page->mapping != inode->i_mapping)
goto fail_unlock_page;
- if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
+ if (!xfs_check_page_type(page, wpc->ioend->io_type, false))
goto fail_unlock_page;
/*
@@ -712,7 +709,7 @@ xfs_convert_page(
* writeback. Hence for more optimal IO patterns, we should always
* avoid partial page writeback due to multiple mappings on a page here.
*/
- if (!xfs_imap_valid(inode, imap, end_offset))
+ if (!xfs_imap_valid(inode, &wpc->imap, end_offset))
goto fail_unlock_page;
len = 1 << inode->i_blkbits;
@@ -744,23 +741,22 @@ xfs_convert_page(
if (buffer_unwritten(bh) || buffer_delay(bh) ||
buffer_mapped(bh)) {
if (buffer_unwritten(bh))
- type = XFS_IO_UNWRITTEN;
+ wpc->io_type = XFS_IO_UNWRITTEN;
else if (buffer_delay(bh))
- type = XFS_IO_DELALLOC;
+ wpc->io_type = XFS_IO_DELALLOC;
else
- type = XFS_IO_OVERWRITE;
+ wpc->io_type = XFS_IO_OVERWRITE;
/*
* imap should always be valid because of the above
* partial page end_offset check on the imap.
*/
- ASSERT(xfs_imap_valid(inode, imap, offset));
+ ASSERT(xfs_imap_valid(inode, &wpc->imap, offset));
lock_buffer(bh);
- if (type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type,
- ioendp, done);
+ if (wpc->io_type != XFS_IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, wpc);
page_dirty--;
count++;
@@ -795,8 +791,7 @@ STATIC void
xfs_cluster_write(
struct inode *inode,
pgoff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
+ struct xfs_writepage_ctx *wpc,
struct writeback_control *wbc,
pgoff_t tlast)
{
@@ -812,7 +807,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc);
+ wpc, wbc);
if (done)
break;
}
@@ -1041,8 +1036,6 @@ xfs_do_writepage(
offset = page_offset(page);
do {
- int new_ioend = 0;
-
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
@@ -1091,15 +1084,6 @@ xfs_do_writepage(
wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
offset);
if (!wpc->imap_valid) {
- /*
- * If we didn't have a valid mapping then we need to
- * put the new mapping into a separate ioend structure.
- * This ensures non-contiguous extents always have
- * separate ioends, which is particularly important
- * for unwritten extent conversion at I/O completion
- * time.
- */
- new_ioend = 1;
err = xfs_map_blocks(inode, offset, &wpc->imap,
wpc->io_type);
if (err)
@@ -1111,8 +1095,7 @@ xfs_do_writepage(
lock_buffer(bh);
if (wpc->io_type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, &wpc->imap, offset);
- xfs_add_to_ioend(inode, bh, offset, wpc->io_type,
- &wpc->ioend, new_ioend);
+ xfs_add_to_ioend(inode, bh, offset, wpc);
count++;
}
@@ -1152,8 +1135,7 @@ xfs_do_writepage(
if (end_index > last_index)
end_index = last_index;
- xfs_cluster_write(inode, page->index + 1, &wpc->imap,
- &wpc->ioend, wbc, end_index);
+ xfs_cluster_write(inode, page->index + 1, wpc, wbc, end_index);
}
return 0;
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 6/8] xfs: xfs_cluster_write is redundant
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
` (4 preceding siblings ...)
2015-08-11 22:49 ` [PATCH 5/8] xfs: writepage context needs to handle discontiguous page ranges Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-11 22:49 ` [PATCH 7/8] xfs: factor mapping out of xfs_do_writepage Dave Chinner
2015-08-11 22:49 ` [PATCH 8/8] xfs: bufferheads are not needed in ->writepage Dave Chinner
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
xfs_cluster_write() is not necessary now that xfs_vm_writepages()
aggregates writepage calls across a single mapping. This means we no
longer need to do page lookups in xfs_cluster_write, so writeback
only needs to look up th epage cache once per page being written.
This also removes a large amount of mostly duplicate code between
xfs_do_writepage() and xfs_convert_page().
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 215 ++----------------------------------------------------
1 file changed, 6 insertions(+), 209 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 93bf13c..1fb1ec9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -644,179 +644,6 @@ xfs_check_page_type(
return false;
}
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
- struct inode *inode,
- struct page *page,
- loff_t tindex,
- struct xfs_writepage_ctx *wpc,
- struct writeback_control *wbc)
-{
- struct buffer_head *bh, *head;
- xfs_off_t end_offset;
- unsigned long p_offset;
- int len, page_dirty;
- int count = 0, done = 0, uptodate = 1;
- xfs_off_t offset = page_offset(page);
-
- if (page->index != tindex)
- goto fail;
- if (!trylock_page(page))
- goto fail;
- if (PageWriteback(page))
- goto fail_unlock_page;
- if (page->mapping != inode->i_mapping)
- goto fail_unlock_page;
- if (!xfs_check_page_type(page, wpc->ioend->io_type, false))
- goto fail_unlock_page;
-
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decremented as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- i_size_read(inode));
-
- /*
- * If the current map does not span the entire page we are about to try
- * to write, then give up. The only way we can write a page that spans
- * multiple mappings in a single writeback iteration is via the
- * xfs_vm_writepage() function. Data integrity writeback requires the
- * entire page to be written in a single attempt, otherwise the part of
- * the page we don't write here doesn't get written as part of the data
- * integrity sync.
- *
- * For normal writeback, we also don't attempt to write partial pages
- * here as it simply means that write_cache_pages() will see it under
- * writeback and ignore the page until some point in the future, at
- * which time this will be the only page in the file that needs
- * writeback. Hence for more optimal IO patterns, we should always
- * avoid partial page writeback due to multiple mappings on a page here.
- */
- if (!xfs_imap_valid(inode, &wpc->imap, end_offset))
- goto fail_unlock_page;
-
- len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
-
- /*
- * The moment we find a buffer that doesn't match our current type
- * specification or can't be written, abort the loop and start
- * writeback. As per the above xfs_imap_valid() check, only
- * xfs_vm_writepage() can handle partial page writeback fully - we are
- * limited here to the buffers that are contiguous with the current
- * ioend, and hence a buffer we can't write breaks that contiguity and
- * we have to defer the rest of the IO to xfs_vm_writepage().
- */
- bh = head = page_buffers(page);
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh))) {
- done = 1;
- break;
- }
-
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- buffer_mapped(bh)) {
- if (buffer_unwritten(bh))
- wpc->io_type = XFS_IO_UNWRITTEN;
- else if (buffer_delay(bh))
- wpc->io_type = XFS_IO_DELALLOC;
- else
- wpc->io_type = XFS_IO_OVERWRITE;
-
- /*
- * imap should always be valid because of the above
- * partial page end_offset check on the imap.
- */
- ASSERT(xfs_imap_valid(inode, &wpc->imap, offset));
-
- lock_buffer(bh);
- if (wpc->io_type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &wpc->imap, offset);
- xfs_add_to_ioend(inode, bh, offset, wpc);
-
- page_dirty--;
- count++;
- } else {
- done = 1;
- break;
- }
- } while (offset += len, (bh = bh->b_this_page) != head);
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- if (count) {
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE)
- done = 1;
- }
- xfs_start_page_writeback(page, !page_dirty, count);
-
- return done;
- fail_unlock_page:
- unlock_page(page);
- fail:
- return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
- struct inode *inode,
- pgoff_t tindex,
- struct xfs_writepage_ctx *wpc,
- struct writeback_control *wbc,
- pgoff_t tlast)
-{
- struct pagevec pvec;
- int done = 0, i;
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tlast) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- wpc, wbc);
- if (done)
- break;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
STATIC void
xfs_vm_invalidatepage(
struct page *page,
@@ -933,7 +760,7 @@ xfs_do_writepage(
struct buffer_head *bh, *head;
loff_t offset;
__uint64_t end_offset;
- pgoff_t end_index, last_index;
+ pgoff_t end_index;
ssize_t len;
int err, uptodate = 1;
int count = 0;
@@ -963,12 +790,9 @@ xfs_do_writepage(
if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
goto redirty;
- /* Is this page beyond the end of the file? */
- offset = i_size_read(inode);
- end_index = offset >> PAGE_CACHE_SHIFT;
- last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-
/*
+ * Is this page beyond the end of the file?
+ *
* The page index is less than the end_index, adjust the end_offset
* to the highest offset that this page should represent.
* -----------------------------------------------------
@@ -979,6 +803,8 @@ xfs_do_writepage(
* | desired writeback range | see else |
* ---------------------------------^------------------|
*/
+ offset = i_size_read(inode);
+ end_index = offset >> PAGE_CACHE_SHIFT;
if (page->index < end_index)
end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
else {
@@ -1108,36 +934,7 @@ xfs_do_writepage(
SetPageUptodate(page);
xfs_start_page_writeback(page, 1, count);
-
- /* if there is no IO to be submitted for this page, we are done */
- if (!count)
- return 0;
-
- ASSERT(wpc->iohead);
-
- /*
- * Any errors from this point onwards need tobe reported through the IO
- * completion path as we have marked the initial page as under writeback
- * and unlocked it.
- */
- if (wpc->imap_valid) {
- xfs_off_t end_index;
-
- end_index = wpc->imap.br_startoff + wpc->imap.br_blockcount;
-
- /* to bytes */
- end_index <<= inode->i_blkbits;
-
- /* to pages */
- end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
- /* check against file size */
- if (end_index > last_index)
- end_index = last_index;
-
- xfs_cluster_write(inode, page->index + 1, wpc, wbc, end_index);
- }
-
+ ASSERT(wpc->iohead || !count);
return 0;
error:
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 7/8] xfs: factor mapping out of xfs_do_writepage
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
` (5 preceding siblings ...)
2015-08-11 22:49 ` [PATCH 6/8] xfs: xfs_cluster_write is redundant Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
2015-08-11 22:49 ` [PATCH 8/8] xfs: bufferheads are not needed in ->writepage Dave Chinner
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Separate out the bufferhead based mapping from the writepage code so
that we have a clear separation of the page operations and the
bufferhead state.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 221 +++++++++++++++++++++++++++++-------------------------
1 file changed, 119 insertions(+), 102 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1fb1ec9..08a0205 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -741,6 +741,116 @@ xfs_writepage_submit(
return status;
}
+static int
+xfs_writepage_map(
+ struct xfs_writepage_ctx *wpc,
+ struct inode *inode,
+ struct page *page,
+ loff_t offset,
+ __uint64_t end_offset)
+{
+ struct buffer_head *bh, *head;
+ ssize_t len = 1 << inode->i_blkbits;
+ int error = 0;
+ int uptodate = 1;
+ int count = 0;
+
+ bh = head = page_buffers(page);
+ offset = page_offset(page);
+
+ do {
+ if (offset >= end_offset)
+ break;
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+
+ /*
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so skip
+ * buffers covering holes here.
+ */
+ if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+ wpc->imap_valid = false;
+ continue;
+ }
+
+ if (buffer_unwritten(bh)) {
+ if (wpc->io_type != XFS_IO_UNWRITTEN) {
+ wpc->io_type = XFS_IO_UNWRITTEN;
+ wpc->imap_valid = false;
+ }
+ } else if (buffer_delay(bh)) {
+ if (wpc->io_type != XFS_IO_DELALLOC) {
+ wpc->io_type = XFS_IO_DELALLOC;
+ wpc->imap_valid = false;
+ }
+ } else if (buffer_uptodate(bh)) {
+ if (wpc->io_type != XFS_IO_OVERWRITE) {
+ wpc->io_type = XFS_IO_OVERWRITE;
+ wpc->imap_valid = false;
+ }
+ } else {
+ if (PageUptodate(page))
+ ASSERT(buffer_mapped(bh));
+ /*
+ * This buffer is not uptodate and will not be
+ * written to disk. Ensure that we will put any
+ * subsequent writeable buffers into a new
+ * ioend.
+ */
+ wpc->imap_valid = false;
+ continue;
+ }
+
+ if (wpc->imap_valid)
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
+ if (!wpc->imap_valid) {
+ error = xfs_map_blocks(inode, offset, &wpc->imap,
+ wpc->io_type);
+ if (error)
+ goto out_error;
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
+ }
+ if (wpc->imap_valid) {
+ lock_buffer(bh);
+ if (wpc->io_type != XFS_IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, wpc);
+ count++;
+ }
+
+ if (!wpc->iohead)
+ wpc->iohead = wpc->ioend;
+
+ } while (offset += len, ((bh = bh->b_this_page) != head));
+
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ xfs_start_page_writeback(page, 1, count);
+ ASSERT(wpc->iohead || !count);
+ return 0;
+
+out_error:
+ /*
+ * We can only discard the page we had the IO error on if we haven't
+ * included it in the ioend above. If it has already been added to the
+ * ioend, then we can't touch it here and need to rely on IO submission
+ * to unlock it.
+ */
+ if (count)
+ xfs_start_page_writeback(page, 0, count);
+ else {
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ }
+ return error;
+}
+
/*
* Write out a dirty page.
*
@@ -757,13 +867,10 @@ xfs_do_writepage(
{
struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
- struct buffer_head *bh, *head;
loff_t offset;
__uint64_t end_offset;
pgoff_t end_index;
- ssize_t len;
- int err, uptodate = 1;
- int count = 0;
+ int error = 0;
trace_xfs_writepage(inode, page, 0, 0);
@@ -856,113 +963,23 @@ xfs_do_writepage(
end_offset = offset;
}
- len = 1 << inode->i_blkbits;
-
- bh = head = page_buffers(page);
- offset = page_offset(page);
-
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
-
- /*
- * set_page_dirty dirties all buffers in a page, independent
- * of their state. The dirty state however is entirely
- * meaningless for holes (!mapped && uptodate), so skip
- * buffers covering holes here.
- */
- if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- wpc->imap_valid = false;
- continue;
- }
-
- if (buffer_unwritten(bh)) {
- if (wpc->io_type != XFS_IO_UNWRITTEN) {
- wpc->io_type = XFS_IO_UNWRITTEN;
- wpc->imap_valid = false;
- }
- } else if (buffer_delay(bh)) {
- if (wpc->io_type != XFS_IO_DELALLOC) {
- wpc->io_type = XFS_IO_DELALLOC;
- wpc->imap_valid = false;
- }
- } else if (buffer_uptodate(bh)) {
- if (wpc->io_type != XFS_IO_OVERWRITE) {
- wpc->io_type = XFS_IO_OVERWRITE;
- wpc->imap_valid = false;
- }
- } else {
- if (PageUptodate(page))
- ASSERT(buffer_mapped(bh));
- /*
- * This buffer is not uptodate and will not be
- * written to disk. Ensure that we will put any
- * subsequent writeable buffers into a new
- * ioend.
- */
- wpc->imap_valid = false;
- continue;
- }
-
- if (wpc->imap_valid)
- wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
- offset);
- if (!wpc->imap_valid) {
- err = xfs_map_blocks(inode, offset, &wpc->imap,
- wpc->io_type);
- if (err)
- goto error;
- wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
- offset);
- }
- if (wpc->imap_valid) {
- lock_buffer(bh);
- if (wpc->io_type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &wpc->imap, offset);
- xfs_add_to_ioend(inode, bh, offset, wpc);
- count++;
- }
-
- if (!wpc->iohead)
- wpc->iohead = wpc->ioend;
-
- } while (offset += len, ((bh = bh->b_this_page) != head));
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- xfs_start_page_writeback(page, 1, count);
- ASSERT(wpc->iohead || !count);
+ error = xfs_writepage_map(wpc, inode, page, offset, end_offset);
+ if (error)
+ goto out_error;
return 0;
-error:
+out_error:
/*
* We have to fail the iohead here because we buffers locked in the
* ioend chain. If we don't do this, we'll deadlock invalidating the
* page as that tries to lock the buffers on the page. Also, because we
* have set pages under writeback, we have to run IO completion to mark
* the error state of the IO appropriately, so we can't cancel the ioend
- * directly here. That means we have to mark this page as under
- * writeback if we included any buffers from it in the ioend chain.
+ * directly here.
*/
- if (count)
- xfs_start_page_writeback(page, 0, count);
- xfs_writepage_submit(wpc, wbc, err);
-
- /*
- * We can only discard the page we had the IO error on if we haven't
- * included it in the ioend above. If it has already been errored out,
- * the it is unlocked and we can't touch it here.
- */
- if (!count) {
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- unlock_page(page);
- }
- mapping_set_error(page->mapping, err);
- return err;
+ xfs_writepage_submit(wpc, wbc, error);
+ mapping_set_error(page->mapping, error);
+ return error;
redirty:
redirty_page_for_writepage(wbc, page);
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 8/8] xfs: bufferheads are not needed in ->writepage
2015-08-11 22:49 [RFC, PATCH 0/8] xfs: get rid of xfs_cluster_write() Dave Chinner
` (6 preceding siblings ...)
2015-08-11 22:49 ` [PATCH 7/8] xfs: factor mapping out of xfs_do_writepage Dave Chinner
@ 2015-08-11 22:49 ` Dave Chinner
7 siblings, 0 replies; 16+ messages in thread
From: Dave Chinner @ 2015-08-11 22:49 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
TO get rid of bufferheads from the writepage path, we have to get
rid of the bufferhead chaining that is done in the ioends to keep
track of the blocks under IO. We also mark the page clean indirectly
through bufferhead IO completion callbacks.
To move away from bufferheads, we need to track bios rather than
bufferheads, and on ioend completion we need to mark pages clean
directly. This makes it "interesting" for filesystems with sub-page
block size, because the bufferheads are used to track sub-page dirty
state. That is, only when all the bufferheads are clean is the page
marked clean. For now, we will ignore the sub-page block
size problem and address the block size = page size configuration
first. Once the bio/page handling infrastructure is in place we can
add support of sub-page block sizes.
Right now an xfs_ioend tracks a sequential region via a bufferhead
chain that is, at IO submission, converted to bios and then
submitted. A single xfs_ioend may require multiple bios to be
submitted, and so the ioend keeps a reference count of the number of
bios it needs completions from before it can process the IO
completion of the bufferhead chain across that region.
As such, we have a dual layer IO submission/completion process.
Assumming block size = page size, what we have is this:
pages +-+-+-+-+-+-+-+-+-+
bufferhead +-+-+-+-+-+-+-+-+-+
xfs_ioend +eeeeeee+eeeeeeeee+
bios +bbb+bbb+bbbbbb+bb+
So IO submission looks like:
- .writepage is given a page
- XFS creates an ioend or pulls the existing one from the
writepage context,
- XFS walks the bufferheads on the page and adds the
bufferheads to it.
- XFS will chains ioends together when some kind of IO
discontiguity occurs
- When all the page walks are complete, XFS "submits" the
ioend
- XFS walks the bufferheads, marking them as under async
writeback
- XFS walks the bufferheads again, building bios from the
pages backing the bufferheads. When bios are too large to
have more pages added to them or there is a discontinuity
in the IO mapping, the bio is submitted and anew one is
started.
On IO completion:
- xfs grabs the ioend from the bio, drops the bio and
decrements the reference count on the ioend.
- ioend reference count goes to zero, runs endio callbacks
(e.g. size update, unwritten extent conversion).
- ioend is destroyed
- destroy walks bufferhead chain on ioend, calling
bufferhead IO completion
- bufferhead IO completion calls page_end_writeback
appropriately.
IOWs, the xfs_ioend is really a mapping layer between bufferheads
and bios, and the bufferheads kind of hide us from pages in the
IO submission path.
To get rid of bufferheads, we have to get rid of the dependency on
bufferhead chaining for building bios and marking pages clean on IO
completion. What we really want is this:
pages +-+-+-+-+-+-+-+-+-+
xfs_ioend +eeeeeee+eeeeeeeee+
bios +bbb+bbb+bbbbbb+bb+
And for us to be able to hold on to the bios being completed until
they are all done before we start ioend processing. It looks like we
can use chaining via the bi_private field (i.e. a single linked
list) to attach all the bios to the ioend prior to submission, we
replace that with a reference count and apointer to the ioend during
submission, and then rebuild the chain during IO completion. We
then don't drop the bio references until we destroy the ioend, after
we've walked all the pages held by the bios and ended writeback on
them.
This will also handle sub-page block sizes that may require multiple
bios to clean a page as long as submission always creates page
granularity ioends.
Hence IO submission should look like:
- .writepage is given a page
- XFS creates an ioend or pulls the existing one from the
writepage context
- XFS grabs the iomap from from the wpc or gets a new one
- XFS checks page is adjacent to previous. Yes, checks
mapping is valid. No to either, grabs new iomap, create
new bio, chain bio to ioend. Then add page to bio,
mark page as under io.
- When all the page walks are complete, XFS "submits" the
ioend
- XFS walks the bio chain, removing them, taking references
to the ioend, bi_private = ioend, and then submitting i
them in order.
On IO completion:
- xfs grabs the ioend for the bio, chains the bio back to
the ioend. Stashes the error in the ioend. drops the
refernce to the ioend.
- ioend reference count goes to zero, runs endio callbacks
(e.g. size update, unwritten extent conversion).
- ioend is destroyed
- destroy walks the bio chain, calling page_end_writeback()
on the pages within, dropping bio references to free them.
Simples, yes?
In a few patches time, writepage will no longer have any bufferheads
in it. However, until we get rid of bufferheads completely, we still
need to make sure their state reflects the page state. Hence as a
stop-gap measure, the ioend bio submission and destruction will need
to walk the buffers on the pages and change their state
appropriately. This will be a wart on the side that will get removed
when bufferheads are removed from the other buffered IO paths in
XFS.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_aops.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 08a0205..e52eb0e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,7 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
+
/*
* structure owned by writepages passed to individual writepage calls
*/
--
2.5.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 16+ messages in thread