linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data
@ 2009-12-02 19:16 Jan Kara
  2009-12-02 19:16 ` [PATCH 2/3] ext4: " Jan Kara
  2009-12-09 15:42 ` [PATCH 1/3] ext3: " saeed bishara
  0 siblings, 2 replies; 5+ messages in thread
From: Jan Kara @ 2009-12-02 19:16 UTC (permalink / raw)
  To: LKML; +Cc: Andrew Morton, Jan Kara, linux-ext4

When ext3_write_begin fails after allocating some blocks or
generic_perform_write fails to copy data to write, we truncate blocks already
instantiated beyond i_size. Although these blocks were never inside i_size, we
have to truncate pagecache of these blocks so that corresponding buffers get
unmapped. Otherwise subsequent __block_prepare_write (called because we are
retrying the write) will find the buffers mapped, not call ->get_block, and
thus the page will be backed by already freed blocks leading to filesystem and
data corruption.

CC: linux-ext4@vger.kernel.org
Reported-by: James Y Knight <foom@fuhm.net>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c |   18 ++++++++++++++----
 1 files changed, 14 insertions(+), 4 deletions(-)

I will take care of merging this patch. I'm just sending it for completeness...

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 354ed3b..f9d6937 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext3_truncate_failed_write(struct inode *inode)
+{
+	truncate_inode_pages(inode->i_mapping, inode->i_size);
+	ext3_truncate(inode);
+}
+
 static int ext3_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
@@ -1209,7 +1219,7 @@ write_begin_failed:
 		unlock_page(page);
 		page_cache_release(page);
 		if (pos + len > inode->i_size)
-			ext3_truncate(inode);
+			ext3_truncate_failed_write(inode);
 	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
@@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
 	page_cache_release(page);
 
 	if (pos + len > inode->i_size)
-		ext3_truncate(inode);
+		ext3_truncate_failed_write(inode);
 	return ret ? ret : copied;
 }
 
@@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
 	page_cache_release(page);
 
 	if (pos + len > inode->i_size)
-		ext3_truncate(inode);
+		ext3_truncate_failed_write(inode);
 	return ret ? ret : copied;
 }
 
@@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
 	page_cache_release(page);
 
 	if (pos + len > inode->i_size)
-		ext3_truncate(inode);
+		ext3_truncate_failed_write(inode);
 	return ret ? ret : copied;
 }
 
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] ext4: Fix data / filesystem corruption when write fails to copy data
  2009-12-02 19:16 [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data Jan Kara
@ 2009-12-02 19:16 ` Jan Kara
  2009-12-09  2:26   ` tytso
  2009-12-09 15:42 ` [PATCH 1/3] ext3: " saeed bishara
  1 sibling, 1 reply; 5+ messages in thread
From: Jan Kara @ 2009-12-02 19:16 UTC (permalink / raw)
  To: LKML; +Cc: Andrew Morton, Jan Kara, linux-ext4, tytso

When ext4_write_begin fails after allocating some blocks or
generic_perform_write fails to copy data to write, we truncate blocks already
instantiated beyond i_size. Although these blocks were never inside i_size, we
have to truncate pagecache of these blocks so that corresponding buffers get
unmapped. Otherwise subsequent __block_prepare_write (called because we are
retrying the write) will find the buffers mapped, not call ->get_block, and
thus the page will be backed by already freed blocks leading to filesystem and
data corruption.

CC: linux-ext4@vger.kernel.org
CC: tytso@mit.edu
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c |   20 +++++++++++++++-----
 1 files changed, 15 insertions(+), 5 deletions(-)

Ted, will you please merge this patch? Thanks.

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c8caa5..18b9416 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1534,6 +1534,16 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext4_truncate_failed_write(struct inode *inode)
+{
+	truncate_inode_pages(inode->i_mapping, inode->i_size);
+        ext4_truncate(inode);
+}
+
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
 			    struct page **pagep, void **fsdata)
@@ -1599,7 +1609,7 @@ retry:
 
 		ext4_journal_stop(handle);
 		if (pos + len > inode->i_size) {
-			ext4_truncate(inode);
+			ext4_truncate_failed_write(inode);
 			/*
 			 * If truncate failed early the inode might
 			 * still be on the orphan list; we need to
@@ -1709,7 +1719,7 @@ static int ext4_ordered_write_end(struct file *file,
 		ret = ret2;
 
 	if (pos + len > inode->i_size) {
-		ext4_truncate(inode);
+		ext4_truncate_failed_write(inode);
 		/*
 		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1761,7 @@ static int ext4_writeback_write_end(struct file *file,
 		ret = ret2;
 
 	if (pos + len > inode->i_size) {
-		ext4_truncate(inode);
+		ext4_truncate_failed_write(inode);
 		/*
 		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1824,7 @@ static int ext4_journalled_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 	if (pos + len > inode->i_size) {
-		ext4_truncate(inode);
+		ext4_truncate_failed_write(inode);
 		/*
 		 * If truncate failed early the inode might still be
 		 * on the orphan list; we need to make sure the inode
@@ -3091,7 +3101,7 @@ retry:
 		 * i_size_read because we hold i_mutex.
 		 */
 		if (pos + len > inode->i_size)
-			ext4_truncate(inode);
+			ext4_truncate_failed_write(inode);
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] ext4: Fix data / filesystem corruption when write fails to copy data
  2009-12-02 19:16 ` [PATCH 2/3] ext4: " Jan Kara
@ 2009-12-09  2:26   ` tytso
  0 siblings, 0 replies; 5+ messages in thread
From: tytso @ 2009-12-09  2:26 UTC (permalink / raw)
  To: Jan Kara; +Cc: LKML, Andrew Morton, linux-ext4

On Wed, Dec 02, 2009 at 08:16:48PM +0100, Jan Kara wrote:
> When ext4_write_begin fails after allocating some blocks or
> generic_perform_write fails to copy data to write, we truncate blocks already
> instantiated beyond i_size. Although these blocks were never inside i_size, we
> have to truncate pagecache of these blocks so that corresponding buffers get
> unmapped. Otherwise subsequent __block_prepare_write (called because we are
> retrying the write) will find the buffers mapped, not call ->get_block, and
> thus the page will be backed by already freed blocks leading to filesystem and
> data corruption.

Added to the ext4 patch queue.

						- Ted

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data
  2009-12-02 19:16 [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data Jan Kara
  2009-12-02 19:16 ` [PATCH 2/3] ext4: " Jan Kara
@ 2009-12-09 15:42 ` saeed bishara
  2009-12-09 16:07   ` Jan Kara
  1 sibling, 1 reply; 5+ messages in thread
From: saeed bishara @ 2009-12-09 15:42 UTC (permalink / raw)
  To: Jan Kara; +Cc: LKML, Andrew Morton, linux-ext4

Hi,
I came a cross data corruption bug when using ext3, this patch fixed
it. the bug exists in 2.6.31 and 32.
saeed


On Wed, Dec 2, 2009 at 9:16 PM, Jan Kara <jack@suse.cz> wrote:
> When ext3_write_begin fails after allocating some blocks or
> generic_perform_write fails to copy data to write, we truncate blocks already
> instantiated beyond i_size. Although these blocks were never inside i_size, we
> have to truncate pagecache of these blocks so that corresponding buffers get
> unmapped. Otherwise subsequent __block_prepare_write (called because we are
> retrying the write) will find the buffers mapped, not call ->get_block, and
> thus the page will be backed by already freed blocks leading to filesystem and
> data corruption.
>
> CC: linux-ext4@vger.kernel.org
> Reported-by: James Y Knight <foom@fuhm.net>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/ext3/inode.c |   18 ++++++++++++++----
>  1 files changed, 14 insertions(+), 4 deletions(-)
>
> I will take care of merging this patch. I'm just sending it for completeness...
>
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 354ed3b..f9d6937 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
>        return ext3_journal_get_write_access(handle, bh);
>  }
>
> +/*
> + * Truncate blocks that were not used by write. We have to truncate the
> + * pagecache as well so that corresponding buffers get properly unmapped.
> + */
> +static void ext3_truncate_failed_write(struct inode *inode)
> +{
> +       truncate_inode_pages(inode->i_mapping, inode->i_size);
> +       ext3_truncate(inode);
> +}
> +
>  static int ext3_write_begin(struct file *file, struct address_space *mapping,
>                                loff_t pos, unsigned len, unsigned flags,
>                                struct page **pagep, void **fsdata)
> @@ -1209,7 +1219,7 @@ write_begin_failed:
>                unlock_page(page);
>                page_cache_release(page);
>                if (pos + len > inode->i_size)
> -                       ext3_truncate(inode);
> +                       ext3_truncate_failed_write(inode);
>        }
>        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
>                goto retry;
> @@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
>        page_cache_release(page);
>
>        if (pos + len > inode->i_size)
> -               ext3_truncate(inode);
> +               ext3_truncate_failed_write(inode);
>        return ret ? ret : copied;
>  }
>
> @@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
>        page_cache_release(page);
>
>        if (pos + len > inode->i_size)
> -               ext3_truncate(inode);
> +               ext3_truncate_failed_write(inode);
>        return ret ? ret : copied;
>  }
>
> @@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
>        page_cache_release(page);
>
>        if (pos + len > inode->i_size)
> -               ext3_truncate(inode);
> +               ext3_truncate_failed_write(inode);
>        return ret ? ret : copied;
>  }
>
> --
> 1.6.4.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data
  2009-12-09 15:42 ` [PATCH 1/3] ext3: " saeed bishara
@ 2009-12-09 16:07   ` Jan Kara
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Kara @ 2009-12-09 16:07 UTC (permalink / raw)
  To: saeed bishara; +Cc: Jan Kara, LKML, Andrew Morton, linux-ext4

  Hi,

On Wed 09-12-09 17:42:12, saeed bishara wrote:
> I came a cross data corruption bug when using ext3, this patch fixed
> it. the bug exists in 2.6.31 and 32.
  Yes, I plan to send the fix to stable@kernel.org so that it gets fixed in
the stable releases for these kernels as well. Thanks for your notice.

									Honza

> On Wed, Dec 2, 2009 at 9:16 PM, Jan Kara <jack@suse.cz> wrote:
> > When ext3_write_begin fails after allocating some blocks or
> > generic_perform_write fails to copy data to write, we truncate blocks already
> > instantiated beyond i_size. Although these blocks were never inside i_size, we
> > have to truncate pagecache of these blocks so that corresponding buffers get
> > unmapped. Otherwise subsequent __block_prepare_write (called because we are
> > retrying the write) will find the buffers mapped, not call ->get_block, and
> > thus the page will be backed by already freed blocks leading to filesystem and
> > data corruption.
> >
> > CC: linux-ext4@vger.kernel.org
> > Reported-by: James Y Knight <foom@fuhm.net>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/ext3/inode.c |   18 ++++++++++++++----
> >  1 files changed, 14 insertions(+), 4 deletions(-)
> >
> > I will take care of merging this patch. I'm just sending it for completeness...
> >
> > diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> > index 354ed3b..f9d6937 100644
> > --- a/fs/ext3/inode.c
> > +++ b/fs/ext3/inode.c
> > @@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
> >        return ext3_journal_get_write_access(handle, bh);
> >  }
> >
> > +/*
> > + * Truncate blocks that were not used by write. We have to truncate the
> > + * pagecache as well so that corresponding buffers get properly unmapped.
> > + */
> > +static void ext3_truncate_failed_write(struct inode *inode)
> > +{
> > +       truncate_inode_pages(inode->i_mapping, inode->i_size);
> > +       ext3_truncate(inode);
> > +}
> > +
> >  static int ext3_write_begin(struct file *file, struct address_space *mapping,
> >                                loff_t pos, unsigned len, unsigned flags,
> >                                struct page **pagep, void **fsdata)
> > @@ -1209,7 +1219,7 @@ write_begin_failed:
> >                unlock_page(page);
> >                page_cache_release(page);
> >                if (pos + len > inode->i_size)
> > -                       ext3_truncate(inode);
> > +                       ext3_truncate_failed_write(inode);
> >        }
> >        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
> >                goto retry;
> > @@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
> >        page_cache_release(page);
> >
> >        if (pos + len > inode->i_size)
> > -               ext3_truncate(inode);
> > +               ext3_truncate_failed_write(inode);
> >        return ret ? ret : copied;
> >  }
> >
> > @@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
> >        page_cache_release(page);
> >
> >        if (pos + len > inode->i_size)
> > -               ext3_truncate(inode);
> > +               ext3_truncate_failed_write(inode);
> >        return ret ? ret : copied;
> >  }
> >
> > @@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
> >        page_cache_release(page);
> >
> >        if (pos + len > inode->i_size)
> > -               ext3_truncate(inode);
> > +               ext3_truncate_failed_write(inode);
> >        return ret ? ret : copied;
> >  }
> >
> > --
> > 1.6.4.2
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> >
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-12-09 16:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-02 19:16 [PATCH 1/3] ext3: Fix data / filesystem corruption when write fails to copy data Jan Kara
2009-12-02 19:16 ` [PATCH 2/3] ext4: " Jan Kara
2009-12-09  2:26   ` tytso
2009-12-09 15:42 ` [PATCH 1/3] ext3: " saeed bishara
2009-12-09 16:07   ` Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).