* [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification.
@ 2008-02-22 14:39 Aneesh Kumar K.V
2008-02-22 14:39 ` [PATCH] ext4: Fix fallocate error path Aneesh Kumar K.V
2008-02-22 18:10 ` [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Mingming Cao
0 siblings, 2 replies; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-02-22 14:39 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4, Aneesh Kumar K.V
We would like to get notified when we are doing a write on mmap section.
This is needed with respect to preallocated area. We split the preallocated
area into initialzed extent and uninitialzed extent in the call back. This
let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
that would result in data loss. The changes are also needed to handle ENOSPC
when writing to an mmap section of files with holes.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/file.c | 19 ++++++++++++++-
fs/ext4/inode.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/ext4_fs.h | 1 +
3 files changed, 79 insertions(+), 1 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 20507a2..77341c1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -123,6 +123,23 @@ force_commit:
return ret;
}
+static struct vm_operations_struct ext4_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = ext4_page_mkwrite,
+};
+
+static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct address_space *mapping = file->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+ file_accessed(file);
+ vma->vm_ops = &ext4_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+ return 0;
+}
+
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ext4_file_mmap,
.open = generic_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b5d63d..00af97d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3490,3 +3490,63 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return err;
}
+
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ unsigned long end;
+ loff_t size;
+ handle_t *handle;
+ int ret = -EINVAL, needed_blocks;
+ struct file *file = vma->vm_file;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ needed_blocks = ext4_writepage_trans_blocks(inode);
+ /* We need to take inode mutex to prevent parallel write */
+ mutex_lock(&inode->i_mutex);
+ lock_page(page);
+ size = i_size_read(inode);
+ if ((page->mapping != inode->i_mapping) ||
+ (page_offset(page) > size)) {
+ /* page got truncated out from underneath us */
+ goto out_unlock;
+ }
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+ end = size & ~PAGE_CACHE_MASK;
+ else
+ end = PAGE_CACHE_SIZE;
+
+ handle = ext4_journal_start(inode, needed_blocks);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out_unlock;
+ }
+ /* Will zero out the pages if buffer is marked new */
+ ret = block_prepare_write(page, 0, end, ext4_get_block);
+
+ if (!ret && ext4_should_journal_data(inode)) {
+ ret = walk_page_buffers(handle, page_buffers(page),
+ 0, end, NULL, do_journal_get_write_access);
+ if (!ret)
+ ret = walk_page_buffers(handle, page_buffers(page),
+ 0, end, NULL, write_end_fn);
+ /*
+ * we don't want to call block_commit_write in journalled mode
+ */
+ ext4_journal_stop(handle);
+ goto out_unlock;
+ }
+ if (!ret && ext4_should_order_data(inode)) {
+ ret = walk_page_buffers(handle, page_buffers(page),
+ 0, end, NULL, ext4_journal_dirty_data);
+ }
+ if (!ret)
+ ret = block_commit_write(page, 0, end);
+
+ ext4_journal_stop(handle);
+out_unlock:
+ unlock_page(page);
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 22810b1..8f5a563 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
struct address_space *mapping, loff_t from);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
--
1.5.4.1.97.g40aab-dirty
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH] ext4: Fix fallocate error path.
2008-02-22 14:39 [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Aneesh Kumar K.V
@ 2008-02-22 14:39 ` Aneesh Kumar K.V
2008-02-22 14:39 ` [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full Aneesh Kumar K.V
2008-02-22 18:10 ` [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Mingming Cao
1 sibling, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-02-22 14:39 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4, Aneesh Kumar K.V
Put the old extent details back if we fail to split the
uninitialized extent.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/extents.c | 26 ++++++++++++++++++++++++--
1 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 39d5315..d315cc1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2152,7 +2152,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_lblk_t iblock,
unsigned long max_blocks)
{
- struct ext4_extent *ex, newex;
+ struct ext4_extent *ex, newex, orig_ex;
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
@@ -2171,6 +2171,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;
+ orig_ex.ee_block = ex->ee_block;
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -2199,13 +2202,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
- if (err)
+ if (err) {
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
goto out;
+ }
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
*/
newdepth = ext_depth(inode);
+ /*
+ * update the extent length after successfull insert of the
+ * split extent
+ */
+ orig_ex.ee_len = cpu_to_le16(ee_len -
+ ext4_ext_get_actual_len(ex3));
if (newdepth != depth) {
depth = newdepth;
ext4_ext_drop_refs(path);
@@ -2280,6 +2295,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
+ if (err) {
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ }
out:
return err ? err : allocated;
}
--
1.5.4.1.97.g40aab-dirty
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full
2008-02-22 14:39 ` [PATCH] ext4: Fix fallocate error path Aneesh Kumar K.V
@ 2008-02-22 14:39 ` Aneesh Kumar K.V
2008-02-22 15:07 ` Aneesh Kumar K.V
0 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-02-22 14:39 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4, Aneesh Kumar K.V
A write to prealloc area cause the split of unititalized extent into a initialized
and uninitialized extent. If we don't have space to add new extent information instead
of returning error convert the existing uninitialized extent to initialized one. We
need to zero out the blocks corresponding to the extent to prevent wrong data reaching
userspace.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/extents.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 144 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d315cc1..cdc7dca 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2136,6 +2136,124 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
+static int extend_credit_for_zeroout(handle_t *handle, struct inode *inode)
+{
+ int retval = 0, needed;
+
+ if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ return 0;
+
+ /* number of filesytem blocks in one page */
+ needed = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ if (ext4_journal_extend(handle, needed) != 0)
+ retval = ext4_journal_restart(handle, needed);
+
+ return retval;
+}
+
+/* FIXME!! we need to try to merge to left or right after zerout */
+static int ext4_ext_zeroout(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock, struct ext4_extent *ex)
+{
+ ext4_lblk_t ee_block;
+ unsigned int ee_len, blkcount, blocksize;
+ loff_t pos;
+ pgoff_t index, skip_index;
+ unsigned long offset;
+ struct page *page;
+ struct address_space *mapping = inode->i_mapping;
+ struct buffer_head *head, *bh;
+ int err = 0;
+
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = blkcount = ext4_ext_get_actual_len(ex);
+ blocksize = inode->i_sb->s_blocksize;
+
+ /*
+ * find the skip index. We can't call __grab_cache_page for this
+ * because we are in the writeout of this page and we already have
+ * taken the lock on this page
+ */
+ pos = iblock << inode->i_blkbits;
+ skip_index = pos >> PAGE_CACHE_SHIFT;
+
+ while (blkcount) {
+ pos = (ee_block + ee_len - blkcount) << inode->i_blkbits;
+ index = pos >> PAGE_CACHE_SHIFT;
+ offset = (pos & (PAGE_CACHE_SIZE - 1));
+ if (index == skip_index) {
+ /* Page will already be locked via
+ * write_begin or writepage
+ */
+ read_lock_irq(&mapping->tree_lock);
+ page = radix_tree_lookup(&mapping->page_tree, index);
+ read_unlock_irq(&mapping->tree_lock);
+ if (page)
+ page_cache_get(page);
+ else
+ return -ENOMEM;
+ } else {
+ page = __grab_cache_page(mapping, index);
+ if (!page)
+ return -ENOMEM;
+ }
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+
+ /* extent the credit in the journal */
+ extend_credit_for_zeroout(handle, inode);
+
+ head = page_buffers(page);
+ /* Look for the buffer_head which map the block */
+ bh = head;
+ while (offset > 0) {
+ bh = bh->b_this_page;
+ offset -= blocksize;
+ }
+ offset = (pos & (PAGE_CACHE_SIZE - 1));
+
+ /* Now write all the buffer_heads in the page */
+ do {
+ set_buffer_uptodate(bh);
+ if (ext4_should_journal_data(inode)) {
+ err = ext4_journal_get_write_access(handle, bh);
+ if (err)
+ goto err_out;
+ }
+ zero_user(page, offset, blocksize);
+ offset += blocksize;
+ if (ext4_should_journal_data(inode)) {
+ err = ext4_journal_dirty_metadata(handle, bh);
+ if (err)
+ goto err_out;
+ } else {
+ if (ext4_should_order_data(inode)) {
+ err = ext4_journal_dirty_data(handle,
+ bh);
+ if (err)
+ goto err_out;
+ }
+ mark_buffer_dirty(bh);
+ }
+
+ bh = bh->b_this_page;
+ blkcount--;
+ } while ((bh != head) && (blkcount > 0));
+ /* only unlock if we have locked */
+ if (index != skip_index)
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ return 0;
+err_out:
+ unlock_page(page);
+ page_cache_release(page);
+ return err;
+}
+
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -2202,14 +2320,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
- if (err) {
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(handle, inode,
+ iblock, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
- goto out;
- }
+ return le16_to_cpu(ex->ee_len);
+
+ } else if (err)
+ goto fix_extent_len;
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
@@ -2295,15 +2419,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
- if (err) {
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(handle, inode, iblock, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
- }
+ return le16_to_cpu(ex->ee_len);
+ } else if (err)
+ goto fix_extent_len;
out:
return err ? err : allocated;
+
+fix_extent_len:
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
}
/*
--
1.5.4.1.97.g40aab-dirty
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full
2008-02-22 14:39 ` [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full Aneesh Kumar K.V
@ 2008-02-22 15:07 ` Aneesh Kumar K.V
0 siblings, 0 replies; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-02-22 15:07 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4
Test results for the patch.
mmaptest simply maps a range and write to it.
The length of the extent indicate whether it is initialized or not.
There is no space in the file system for another block.
So the uninitialized extent have to be converted to initialized extent.
root@qemu-image:/ext4# /root/mmaptest testfile 0 100
mmaping 0 to 100
[kvaneesh@llm59 linux-2.6.25-rc2]$ ~/ext4migrate --display /home/kvaneesh/test-images/ext3.img testfile
extent: block=0-3 len=4 start=1153 start_hi=0
extent: block=4-32807 len=32804 start=1157 start_hi=0
root@qemu-image:/ext4# dd if=/root/a.c of=testfile seek=6 bs=4096 conv=notrunc
[kvaneesh@llm59 linux-2.6.25-rc2]$ ~/ext4migrate --display /home/kvaneesh/test-images/ext3.img testfile
extent: block=0-3 len=4 start=1153 start_hi=0
extent: block=4-32791 len=32788 start=1157 start_hi=0
extent: block=24-24 len=1 start=1177 start_hi=0
extent: block=25-32807 len=32783 start=1178 start_hi=0
root@qemu-image:/ext4# dd if=/root/a.c of=testfile seek=8 bs=4096 conv=notrunc
[kvaneesh@llm59 linux-2.6.25-rc2]$ ~/ext4migrate --display /home/kvaneesh/test-images/ext3.img testfile
extent: block=0-3 len=4 start=1153 start_hi=0
extent: block=4-32791 len=32788 start=1157 start_hi=0
extent: block=24-24 len=1 start=1177 start_hi=0
extent: block=25-39 len=15 start=1178 start_hi=0
root@qemu-image:/ext4# /root/mmaptest testfile 4096 5000
mmaping 4096 to 5000
[kvaneesh@llm59 linux-2.6.25-rc2]$ ~/ext4migrate --display /home/kvaneesh/test-images/ext3.img testfile
extent: block=0-3 len=4 start=1153 start_hi=0
extent: block=4-23 len=20 start=1157 start_hi=0
extent: block=24-24 len=1 start=1177 start_hi=0
extent: block=25-39 len=15 start=1178 start_hi=0
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification.
2008-02-22 14:39 [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Aneesh Kumar K.V
2008-02-22 14:39 ` [PATCH] ext4: Fix fallocate error path Aneesh Kumar K.V
@ 2008-02-22 18:10 ` Mingming Cao
2008-02-22 18:23 ` Aneesh Kumar K.V
1 sibling, 1 reply; 9+ messages in thread
From: Mingming Cao @ 2008-02-22 18:10 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: tytso, linux-ext4
On Fri, 2008-02-22 at 20:09 +0530, Aneesh Kumar K.V wrote:
> We would like to get notified when we are doing a write on mmap section.
> This is needed with respect to preallocated area. We split the preallocated
> area into initialzed extent and uninitialzed extent in the call back. This
> let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> that would result in data loss. The changes are also needed to handle ENOSPC
> when writing to an mmap section of files with holes.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
> fs/ext4/file.c | 19 ++++++++++++++-
> fs/ext4/inode.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/ext4_fs.h | 1 +
> 3 files changed, 79 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 20507a2..77341c1 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -123,6 +123,23 @@ force_commit:
> return ret;
> }
>
> +static struct vm_operations_struct ext4_file_vm_ops = {
> + .fault = filemap_fault,
> + .page_mkwrite = ext4_page_mkwrite,
> +};
> +
> +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct address_space *mapping = file->f_mapping;
> +
> + if (!mapping->a_ops->readpage)
> + return -ENOEXEC;
> + file_accessed(file);
> + vma->vm_ops = &ext4_file_vm_ops;
> + vma->vm_flags |= VM_CAN_NONLINEAR;
> + return 0;
> +}
> +
> const struct file_operations ext4_file_operations = {
> .llseek = generic_file_llseek,
> .read = do_sync_read,
> @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = ext4_compat_ioctl,
> #endif
> - .mmap = generic_file_mmap,
> + .mmap = ext4_file_mmap,
> .open = generic_file_open,
> .release = ext4_release_file,
> .fsync = ext4_sync_file,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 5b5d63d..00af97d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3490,3 +3490,63 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>
> return err;
> }
> +
> +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> + unsigned long end;
> + loff_t size;
> + handle_t *handle;
> + int ret = -EINVAL, needed_blocks;
> + struct file *file = vma->vm_file;
> + struct inode *inode = file->f_path.dentry->d_inode;
> +
> + needed_blocks = ext4_writepage_trans_blocks(inode);
> + /* We need to take inode mutex to prevent parallel write */
> + mutex_lock(&inode->i_mutex);
> + lock_page(page);
> + size = i_size_read(inode);
> + if ((page->mapping != inode->i_mapping) ||
> + (page_offset(page) > size)) {
> + /* page got truncated out from underneath us */
> + goto out_unlock;
> + }
> +
> + /* page is wholly or partially inside EOF */
> + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
> + end = size & ~PAGE_CACHE_MASK;
> + else
> + end = PAGE_CACHE_SIZE;
> +
> + handle = ext4_journal_start(inode, needed_blocks);
> + if (IS_ERR(handle)) {
> + ret = PTR_ERR(handle);
> + goto out_unlock;
> + }
> + /* Will zero out the pages if buffer is marked new */
> + ret = block_prepare_write(page, 0, end, ext4_get_block);
> +
> + if (!ret && ext4_should_journal_data(inode)) {
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, do_journal_get_write_access);
> + if (!ret)
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, write_end_fn);
> + /*
> + * we don't want to call block_commit_write in journalled mode
> + */
> + ext4_journal_stop(handle);
> + goto out_unlock;
> + }
> + if (!ret && ext4_should_order_data(inode)) {
> + ret = walk_page_buffers(handle, page_buffers(page),
> + 0, end, NULL, ext4_journal_dirty_data);
> + }
> + if (!ret)
> + ret = block_commit_write(page, 0, end);
> +
Hmm, it seems wired to do commit_write when the page is about becoming
writable, but maybe that's the way it needs to?
Don't we need to update the i_size somewhere?
> + ext4_journal_stop(handle);
> +out_unlock:
> + unlock_page(page);
> + mutex_unlock(&inode->i_mutex);
> + return ret;
> +}
It seems this combined the three journalling mode prepare_write() code
here:(
Since prepare_write() and commit_write() is going to sunset, why not
simply calling mappings->a_ops->write_begin() and then write_end()? that
should take care of pretty much the journalling and the page operations,
no?
Mingming
> diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
> index 22810b1..8f5a563 100644
> --- a/include/linux/ext4_fs.h
> +++ b/include/linux/ext4_fs.h
> @@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
> extern int ext4_writepage_trans_blocks(struct inode *);
> extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
> struct address_space *mapping, loff_t from);
> +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification.
2008-02-22 18:10 ` [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Mingming Cao
@ 2008-02-22 18:23 ` Aneesh Kumar K.V
2008-02-22 19:28 ` Mingming Cao
0 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-02-22 18:23 UTC (permalink / raw)
To: Mingming Cao; +Cc: tytso, linux-ext4
On Fri, Feb 22, 2008 at 10:10:48AM -0800, Mingming Cao wrote:
> On Fri, 2008-02-22 at 20:09 +0530, Aneesh Kumar K.V wrote:
.....
> > + ext4_journal_stop(handle);
> > + goto out_unlock;
> > + }
> > + if (!ret && ext4_should_order_data(inode)) {
> > + ret = walk_page_buffers(handle, page_buffers(page),
> > + 0, end, NULL, ext4_journal_dirty_data);
> > + }
> > + if (!ret)
> > + ret = block_commit_write(page, 0, end);
> > +
> Hmm, it seems wired to do commit_write when the page is about becoming
> writable, but maybe that's the way it needs to?
>
> Don't we need to update the i_size somewhere?
block_commit_write simply iterate over buffer_head of page and mark them
dirty. That is why we don't want to call that for data=journalled mode.
>
> > + ext4_journal_stop(handle);
> > +out_unlock:
> > + unlock_page(page);
> > + mutex_unlock(&inode->i_mutex);
> > + return ret;
> > +}
>
> It seems this combined the three journalling mode prepare_write() code
> here:(
>
> Since prepare_write() and commit_write() is going to sunset, why not
> simply calling mappings->a_ops->write_begin() and then write_end()? that
> should take care of pretty much the journalling and the page operations,
> no?
write_begin and write_end works with the user space buffer. In this case
we don't have one. Also what ext4_page_mkwrite does is mostly what
write_begin/write_end does except the copy of user space buffer.
-aneesh
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification.
2008-02-22 18:23 ` Aneesh Kumar K.V
@ 2008-02-22 19:28 ` Mingming Cao
0 siblings, 0 replies; 9+ messages in thread
From: Mingming Cao @ 2008-02-22 19:28 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: tytso, linux-ext4
On Fri, 2008-02-22 at 23:53 +0530, Aneesh Kumar K.V wrote:
> On Fri, Feb 22, 2008 at 10:10:48AM -0800, Mingming Cao wrote:
> > On Fri, 2008-02-22 at 20:09 +0530, Aneesh Kumar K.V wrote:
>
> .....
>
> > > + ext4_journal_stop(handle);
> > > + goto out_unlock;
> > > + }
> > > + if (!ret && ext4_should_order_data(inode)) {
> > > + ret = walk_page_buffers(handle, page_buffers(page),
> > > + 0, end, NULL, ext4_journal_dirty_data);
> > > + }
> > > + if (!ret)
> > > + ret = block_commit_write(page, 0, end);
> > > +
> > Hmm, it seems wired to do commit_write when the page is about becoming
> > writable, but maybe that's the way it needs to?
> >
> > Don't we need to update the i_size somewhere?
>
ah, i_size didn't change with mapped IO.
> block_commit_write simply iterate over buffer_head of page and mark them
> dirty. That is why we don't want to call that for data=journalled mode.
>
Right, but it still seems odd to mark the buffer_heard dirty *before*
the write happens.
I am confused, if i_size is not changing, then what we are journalling
about? Keep journal ordering? but we haven't write anything yet....
Mingming
> >
> > > + ext4_journal_stop(handle);
> > > +out_unlock:
> > > + unlock_page(page);
> > > + mutex_unlock(&inode->i_mutex);
> > > + return ret;
> > > +}
> >
> > It seems this combined the three journalling mode prepare_write() code
> > here:(
> >
> > Since prepare_write() and commit_write() is going to sunset, why not
> > simply calling mappings->a_ops->write_begin() and then write_end()? that
> > should take care of pretty much the journalling and the page operations,
> > no?
>
> write_begin and write_end works with the user space buffer. In this case
> we don't have one. Also what ext4_page_mkwrite does is mostly what
> write_begin/write_end does except the copy of user space buffer.
>
>
> -aneesh
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full
@ 2008-03-04 12:43 Aneesh Kumar K.V
2008-03-05 0:47 ` Mingming Cao
0 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2008-03-04 12:43 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4, Aneesh Kumar K.V
A write to prealloc area cause the split of unititalized extent
into a initialized and uninitialized extent. If we don't have
space to add new extent information instead of returning error
convert the existing uninitialized extent to initialized one. We
need to zero out the blocks corresponding to the extent to prevent
wrong data reaching userspace.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/extents.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 99 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d315cc1..839caf2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2136,6 +2136,80 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+/* FIXME!! we need to try to merge to left or right after zerout */
+static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+ int ret = -EIO;
+ struct bio *bio;
+ int blkbits, blocksize;
+ sector_t ee_pblock;
+ struct completion event;
+ unsigned int ee_len, len, done, offset;
+
+
+ blkbits = inode->i_blkbits;
+ blocksize = inode->i_sb->s_blocksize;
+ ee_len = ext4_ext_get_actual_len(ex);
+ ee_pblock = ext_pblock(ex);
+
+ /* convert ee_pblock in 512 byte sector */
+ ee_pblock = ee_pblock << (blkbits >> 9);
+
+
+ while (ee_len > 0) {
+
+ if (ee_len > BIO_MAX_PAGES)
+ len = BIO_MAX_PAGES;
+ else
+ len = ee_len;
+
+ bio = bio_alloc(GFP_NOIO, len);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_sector = ee_pblock;
+ bio->bi_bdev = inode->i_sb->s_bdev;
+
+ done = 0;
+ offset = 0;
+ while (done < len) {
+ ret = bio_add_page(bio, ZERO_PAGE(0),
+ blocksize, offset);
+ if (ret != blocksize) {
+ /* We can't add any more page because of
+ * hardware limitation. Start a new bio
+ */
+ break;
+ }
+ done++;
+ offset += blocksize;
+ if (offset >= PAGE_CACHE_SIZE)
+ offset = 0;
+ }
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(WRITE, bio);
+ wait_for_completion(&event);
+
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ ret = 0;
+ else {
+ ret = -EIO;
+ break;
+ }
+ bio_put(bio);
+ ee_len -= done;
+ ee_pblock += done << (blkbits - 9);
+ }
+ return ret;
+}
+
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -2202,14 +2276,19 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
- if (err) {
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
- goto out;
- }
+ return le16_to_cpu(ex->ee_len);
+
+ } else if (err)
+ goto fix_extent_len;
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
@@ -2295,15 +2374,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
- if (err) {
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
- }
+ return le16_to_cpu(ex->ee_len);
+ } else if (err)
+ goto fix_extent_len;
out:
return err ? err : allocated;
+
+fix_extent_len:
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
}
/*
--
1.5.4.3.422.g34cd6.dirty
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full
2008-03-04 12:43 [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full Aneesh Kumar K.V
@ 2008-03-05 0:47 ` Mingming Cao
0 siblings, 0 replies; 9+ messages in thread
From: Mingming Cao @ 2008-03-05 0:47 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: tytso, linux-ext4
On Tue, 2008-03-04 at 18:13 +0530, Aneesh Kumar K.V wrote:
> A write to prealloc area cause the split of unititalized extent
> into a initialized and uninitialized extent. If we don't have
> space to add new extent information instead of returning error
> convert the existing uninitialized extent to initialized one. We
> need to zero out the blocks corresponding to the extent to prevent
> wrong data reaching userspace.
>
Looks good!
Added to patch queue. I modified the summary slightly.
Mingming
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
> fs/ext4/extents.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 files changed, 99 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index d315cc1..839caf2 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2136,6 +2136,80 @@ void ext4_ext_release(struct super_block *sb)
> #endif
> }
>
> +static void bi_complete(struct bio *bio, int error)
> +{
> + complete((struct completion *)bio->bi_private);
> +}
> +
> +/* FIXME!! we need to try to merge to left or right after zerout */
> +static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
> +{
> + int ret = -EIO;
> + struct bio *bio;
> + int blkbits, blocksize;
> + sector_t ee_pblock;
> + struct completion event;
> + unsigned int ee_len, len, done, offset;
> +
> +
> + blkbits = inode->i_blkbits;
> + blocksize = inode->i_sb->s_blocksize;
> + ee_len = ext4_ext_get_actual_len(ex);
> + ee_pblock = ext_pblock(ex);
> +
> + /* convert ee_pblock in 512 byte sector */
> + ee_pblock = ee_pblock << (blkbits >> 9);
> +
> +
> + while (ee_len > 0) {
> +
> + if (ee_len > BIO_MAX_PAGES)
> + len = BIO_MAX_PAGES;
> + else
> + len = ee_len;
> +
> + bio = bio_alloc(GFP_NOIO, len);
> + if (!bio)
> + return -ENOMEM;
> + bio->bi_sector = ee_pblock;
> + bio->bi_bdev = inode->i_sb->s_bdev;
> +
> + done = 0;
> + offset = 0;
> + while (done < len) {
> + ret = bio_add_page(bio, ZERO_PAGE(0),
> + blocksize, offset);
> + if (ret != blocksize) {
> + /* We can't add any more page because of
> + * hardware limitation. Start a new bio
> + */
> + break;
> + }
> + done++;
> + offset += blocksize;
> + if (offset >= PAGE_CACHE_SIZE)
> + offset = 0;
> + }
> +
> + init_completion(&event);
> + bio->bi_private = &event;
> + bio->bi_end_io = bi_complete;
> + submit_bio(WRITE, bio);
> + wait_for_completion(&event);
> +
> + if (test_bit(BIO_UPTODATE, &bio->bi_flags))
> + ret = 0;
> + else {
> + ret = -EIO;
> + break;
> + }
> + bio_put(bio);
> + ee_len -= done;
> + ee_pblock += done << (blkbits - 9);
> + }
> + return ret;
> +}
> +
> /*
> * This function is called by ext4_ext_get_blocks() if someone tries to write
> * to an uninitialized extent. It may result in splitting the uninitialized
> @@ -2202,14 +2276,19 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> ex3->ee_len = cpu_to_le16(allocated - max_blocks);
> ext4_ext_mark_uninitialized(ex3);
> err = ext4_ext_insert_extent(handle, inode, path, ex3);
> - if (err) {
> + if (err == -ENOSPC) {
> + err = ext4_ext_zeroout(inode, &orig_ex);
> + if (err)
> + goto fix_extent_len;
> + /* update the extent length and mark as initialized */
> ex->ee_block = orig_ex.ee_block;
> ex->ee_len = orig_ex.ee_len;
> ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> - ext4_ext_mark_uninitialized(ex);
> ext4_ext_dirty(handle, inode, path + depth);
> - goto out;
> - }
> + return le16_to_cpu(ex->ee_len);
> +
> + } else if (err)
> + goto fix_extent_len;
> /*
> * The depth, and hence eh & ex might change
> * as part of the insert above.
> @@ -2295,15 +2374,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> goto out;
> insert:
> err = ext4_ext_insert_extent(handle, inode, path, &newex);
> - if (err) {
> + if (err == -ENOSPC) {
> + err = ext4_ext_zeroout(inode, &orig_ex);
> + if (err)
> + goto fix_extent_len;
> + /* update the extent length and mark as initialized */
> ex->ee_block = orig_ex.ee_block;
> ex->ee_len = orig_ex.ee_len;
> ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> - ext4_ext_mark_uninitialized(ex);
> ext4_ext_dirty(handle, inode, path + depth);
> - }
> + return le16_to_cpu(ex->ee_len);
> + } else if (err)
> + goto fix_extent_len;
> out:
> return err ? err : allocated;
> +
> +fix_extent_len:
> + ex->ee_block = orig_ex.ee_block;
> + ex->ee_len = orig_ex.ee_len;
> + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> + ext4_ext_mark_uninitialized(ex);
> + ext4_ext_dirty(handle, inode, path + depth);
> + return err;
> }
>
> /*
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-03-05 0:47 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-22 14:39 [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Aneesh Kumar K.V
2008-02-22 14:39 ` [PATCH] ext4: Fix fallocate error path Aneesh Kumar K.V
2008-02-22 14:39 ` [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full Aneesh Kumar K.V
2008-02-22 15:07 ` Aneesh Kumar K.V
2008-02-22 18:10 ` [PATCH] ext4: Use page_mkwrite vma_operations to get mmap write notification Mingming Cao
2008-02-22 18:23 ` Aneesh Kumar K.V
2008-02-22 19:28 ` Mingming Cao
-- strict thread matches above, loose matches on Subject: below --
2008-03-04 12:43 [PATCH] ext4: Convert uninitialized extent to initialized extent in case of file system full Aneesh Kumar K.V
2008-03-05 0:47 ` Mingming Cao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).