* [Cluster-devel] [PATCH v2] GFS2: directly write blocks past i_size
@ 2011-03-18 2:54 Benjamin Marzinski
2011-03-21 10:08 ` Steven Whitehouse
0 siblings, 1 reply; 2+ messages in thread
From: Benjamin Marzinski @ 2011-03-18 2:54 UTC (permalink / raw)
To: cluster-devel.redhat.com
GFS2 was relying on the writepage code to write out the zeroed data for
fallocate. However, with FALLOC_FL_KEEP_SIZE set, this may be past i_size.
If it is, it will be ignored. To work around this, gfs2 now calls
write_dirty_buffer directly on the buffer_heads when FALLOC_FL_KEEP_SIZE
is set, and it's writing past i_size.
This version is just a cleanup of my last version
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
fs/gfs2/file.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 48 insertions(+), 10 deletions(-)
Index: gfs2-2.6-nmw/fs/gfs2/file.c
===================================================================
--- gfs2-2.6-nmw.orig/fs/gfs2/file.c
+++ gfs2-2.6-nmw/fs/gfs2/file.c
@@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struc
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
-static void empty_write_end(struct page *page, unsigned from,
- unsigned to)
+static int empty_write_end(struct page *page, unsigned from,
+ unsigned to, int mode)
{
- struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+ struct inode *inode = page->mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct buffer_head *bh;
+ unsigned offset, blksize = 1 << inode->i_blkbits;
+ pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
zero_user(page, from, to-from);
mark_page_accessed(page);
- if (!gfs2_is_writeback(ip))
- gfs2_page_add_databufs(ip, page, from, to);
+ if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
+ if (!gfs2_is_writeback(ip))
+ gfs2_page_add_databufs(ip, page, from, to);
+
+ block_commit_write(page, from, to);
+ return 0;
+ }
+
+ offset = 0;
+ bh = page_buffers(page);
+ while (offset < to) {
+ if (offset >= from) {
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ clear_buffer_new(bh);
+ write_dirty_buffer(bh, WRITE);
+ }
+ offset += blksize;
+ bh = bh->b_this_page;
+ }
- block_commit_write(page, from, to);
+ offset = 0;
+ bh = page_buffers(page);
+ while (offset < to) {
+ if (offset >= from) {
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ return -EIO;
+ }
+ offset += blksize;
+ bh = bh->b_this_page;
+ }
+ return 0;
}
static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +676,8 @@ static int needs_empty_write(sector_t bl
return !buffer_mapped(&bh_map);
}
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
+ int mode)
{
struct inode *inode = page->mapping->host;
unsigned start, end, next, blksize;
@@ -668,7 +702,9 @@ static int write_empty_blocks(struct pag
gfs2_block_map);
if (unlikely(ret))
return ret;
- empty_write_end(page, start, end);
+ ret = empty_write_end(page, start, end, mode);
+ if (unlikely(ret))
+ return ret;
end = 0;
}
start = next;
@@ -682,7 +718,9 @@ static int write_empty_blocks(struct pag
ret = __block_write_begin(page, start, end - start, gfs2_block_map);
if (unlikely(ret))
return ret;
- empty_write_end(page, start, end);
+ ret = empty_write_end(page, start, end, mode);
+ if (unlikely(ret))
+ return ret;
}
return 0;
@@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode
if (curr == end)
to = end_offset;
- error = write_empty_blocks(page, from, to);
+ error = write_empty_blocks(page, from, to, mode);
if (!error && offset + to > inode->i_size &&
!(mode & FALLOC_FL_KEEP_SIZE)) {
i_size_write(inode, offset + to);
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Cluster-devel] [PATCH v2] GFS2: directly write blocks past i_size
2011-03-18 2:54 [Cluster-devel] [PATCH v2] GFS2: directly write blocks past i_size Benjamin Marzinski
@ 2011-03-21 10:08 ` Steven Whitehouse
0 siblings, 0 replies; 2+ messages in thread
From: Steven Whitehouse @ 2011-03-21 10:08 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
That looks much better, thanks for fixing that. I've pushed it into the
-fixes tree,
Steve.
On Thu, 2011-03-17 at 21:54 -0500, Benjamin Marzinski wrote:
> GFS2 was relying on the writepage code to write out the zeroed data for
> fallocate. However, with FALLOC_FL_KEEP_SIZE set, this may be past i_size.
> If it is, it will be ignored. To work around this, gfs2 now calls
> write_dirty_buffer directly on the buffer_heads when FALLOC_FL_KEEP_SIZE
> is set, and it's writing past i_size.
>
> This version is just a cleanup of my last version
>
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
> fs/gfs2/file.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 48 insertions(+), 10 deletions(-)
>
> Index: gfs2-2.6-nmw/fs/gfs2/file.c
> ===================================================================
> --- gfs2-2.6-nmw.orig/fs/gfs2/file.c
> +++ gfs2-2.6-nmw/fs/gfs2/file.c
> @@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struc
> return generic_file_aio_write(iocb, iov, nr_segs, pos);
> }
>
> -static void empty_write_end(struct page *page, unsigned from,
> - unsigned to)
> +static int empty_write_end(struct page *page, unsigned from,
> + unsigned to, int mode)
> {
> - struct gfs2_inode *ip = GFS2_I(page->mapping->host);
> + struct inode *inode = page->mapping->host;
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct buffer_head *bh;
> + unsigned offset, blksize = 1 << inode->i_blkbits;
> + pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
>
> zero_user(page, from, to-from);
> mark_page_accessed(page);
>
> - if (!gfs2_is_writeback(ip))
> - gfs2_page_add_databufs(ip, page, from, to);
> + if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
> + if (!gfs2_is_writeback(ip))
> + gfs2_page_add_databufs(ip, page, from, to);
> +
> + block_commit_write(page, from, to);
> + return 0;
> + }
> +
> + offset = 0;
> + bh = page_buffers(page);
> + while (offset < to) {
> + if (offset >= from) {
> + set_buffer_uptodate(bh);
> + mark_buffer_dirty(bh);
> + clear_buffer_new(bh);
> + write_dirty_buffer(bh, WRITE);
> + }
> + offset += blksize;
> + bh = bh->b_this_page;
> + }
>
> - block_commit_write(page, from, to);
> + offset = 0;
> + bh = page_buffers(page);
> + while (offset < to) {
> + if (offset >= from) {
> + wait_on_buffer(bh);
> + if (!buffer_uptodate(bh))
> + return -EIO;
> + }
> + offset += blksize;
> + bh = bh->b_this_page;
> + }
> + return 0;
> }
>
> static int needs_empty_write(sector_t block, struct inode *inode)
> @@ -643,7 +676,8 @@ static int needs_empty_write(sector_t bl
> return !buffer_mapped(&bh_map);
> }
>
> -static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
> +static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
> + int mode)
> {
> struct inode *inode = page->mapping->host;
> unsigned start, end, next, blksize;
> @@ -668,7 +702,9 @@ static int write_empty_blocks(struct pag
> gfs2_block_map);
> if (unlikely(ret))
> return ret;
> - empty_write_end(page, start, end);
> + ret = empty_write_end(page, start, end, mode);
> + if (unlikely(ret))
> + return ret;
> end = 0;
> }
> start = next;
> @@ -682,7 +718,9 @@ static int write_empty_blocks(struct pag
> ret = __block_write_begin(page, start, end - start, gfs2_block_map);
> if (unlikely(ret))
> return ret;
> - empty_write_end(page, start, end);
> + ret = empty_write_end(page, start, end, mode);
> + if (unlikely(ret))
> + return ret;
> }
>
> return 0;
> @@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode
>
> if (curr == end)
> to = end_offset;
> - error = write_empty_blocks(page, from, to);
> + error = write_empty_blocks(page, from, to, mode);
> if (!error && offset + to > inode->i_size &&
> !(mode & FALLOC_FL_KEEP_SIZE)) {
> i_size_write(inode, offset + to);
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2011-03-21 10:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-18 2:54 [Cluster-devel] [PATCH v2] GFS2: directly write blocks past i_size Benjamin Marzinski
2011-03-21 10:08 ` Steven Whitehouse
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).