linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: amir73il@users.sourceforge.net
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, Amir Goldstein <amir73il@users.sf.net>,
	Yongqiang Yang <xiaoqiangnk@gmail.com>
Subject: [PATCH RFC 06/30] ext4: snapshot hooks - move data blocks
Date: Mon,  9 May 2011 19:41:24 +0300	[thread overview]
Message-ID: <1304959308-11122-7-git-send-email-amir73il@users.sourceforge.net> (raw)
In-Reply-To: <1304959308-11122-1-git-send-email-amir73il@users.sourceforge.net>

From: Amir Goldstein <amir73il@users.sf.net>

Before every regular file data buffer write, the function
ext4_get_block() is called to map the buffer to disk. We add a
new function ext4_get_block_mow() which is called when we want to
snapshot the blocks. We use this hook to call the snapshot API
snapshot_get_move_access(), to optionally move the block
to the snapshot file.

Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
---
 fs/ext4/ext4.h      |   15 +++-
 fs/ext4/ext4_jbd2.h |   17 ++++
 fs/ext4/inode.c     |  242 +++++++++++++++++++++++++++++++++++++++++++++++----
 fs/ext4/mballoc.c   |   23 +++++
 4 files changed, 280 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4e9e46a..013eec2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -156,9 +156,10 @@ struct ext4_allocation_request {
 #define EXT4_MAP_UNWRITTEN	(1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY	(1 << BH_Boundary)
 #define EXT4_MAP_UNINIT		(1 << BH_Uninit)
+#define EXT4_MAP_REMAP		(1 << BH_Remap)
 #define EXT4_MAP_FLAGS		(EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
 				 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-				 EXT4_MAP_UNINIT)
+				 EXT4_MAP_UNINIT | EXT4_MAP_REMAP)
 
 struct ext4_map_blocks {
 	ext4_fsblk_t m_pblk;
@@ -525,6 +526,12 @@ struct ext4_new_group_data {
 	/* Convert extent to initialized after IO complete */
 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
 					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
+	/* Look up if mapped block is used by snapshot,
+	 * if so and EXT4_GET_BLOCKS_CREATE is set, move it to snapshot
+	 * and allocate a new block for new data.
+	 * if EXT4_GET_BLOCKS_CREATE is not set, return REMAP flags.
+	 */
+#define EXT4_GET_BLOCKS_MOVE_ON_WRITE		0x0100
 
 /*
  * Flags used by ext4_free_blocks
@@ -2128,10 +2135,16 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
 enum ext4_state_bits {
 	BH_Uninit	/* blocks are allocated but uninitialized on disk */
 	  = BH_JBDPrivateStart,
+	BH_Remap,	/* Data block need to be remapped,
+			 * now used by snapshot to do mow
+			 */
+	BH_Partial_Write,	/* Buffer should be uptodate before write */
 };
 
 BUFFER_FNS(Uninit, uninit)
 TAS_BUFFER_FNS(Uninit, uninit)
+BUFFER_FNS(Remap, remap)
+BUFFER_FNS(Partial_Write, partial_write)
 
 /*
  * Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 707b810..1c119cc 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -360,5 +360,22 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
 }
 
 #ifdef CONFIG_EXT4_FS_SNAPSHOT
+/*
+ * check if @inode data blocks should be moved-on-write
+ */
+static inline int ext4_snapshot_should_move_data(struct inode *inode)
+{
+	if (!EXT4_SNAPSHOTS(inode->i_sb))
+		return 0;
+	if (EXT4_JOURNAL(inode) == NULL)
+		return 0;
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		return 0;
+	/* when a data block is journaled, it is already COWed as metadata */
+	if (ext4_should_journal_data(inode))
+		return 0;
+	return 1;
+}
+
 #endif
 #endif	/* _EXT4_JBD2_H */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b848072..3ed64bb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -78,7 +78,8 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
 static int __ext4_journalled_writepage(struct page *page, unsigned int len);
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
+static int ext4_bh_delay_or_unwritten_or_remap(handle_t *handle,
+		struct buffer_head *bh);
 
 /*
  * Test whether an inode is a fast symlink.
@@ -987,6 +988,51 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 
 	partial = ext4_get_branch(inode, depth, offsets, chain, &err);
 
+	err = 0;
+	if (!partial && (flags & EXT4_GET_BLOCKS_MOVE_ON_WRITE)) {
+		BUG_ON(!ext4_snapshot_should_move_data(inode));
+		first_block = le32_to_cpu(chain[depth - 1].key);
+		if (!(flags & EXT4_GET_BLOCKS_CREATE)) {
+			/*
+			 * First call from ext4_map_blocks():
+			 * test if first_block should be moved to snapshot?
+			 */
+			err = ext4_snapshot_get_move_access(handle, inode,
+							    first_block,
+							    &map->m_len, 0);
+			if (err < 0) {
+				/* cleanup the whole chain and exit */
+				partial = chain + depth - 1;
+				goto cleanup;
+			}
+			if (err > 0) {
+				/*
+				 * Return EXT4_MAP_REMAP via map->m_flags
+				 * to tell ext4_map_blocks() that the
+				 * found block should be moved to snapshot.
+				 */
+				map->m_flags |= EXT4_MAP_REMAP;
+			}
+			/*
+			 * Set max. blocks to map to max. blocks, which
+			 * ext4_snapshot_get_move_access() allows us to handle
+			 * (move or not move) in one ext4_map_blocks() call.
+			 */
+			err = 0;
+		} else if (map->m_flags & EXT4_MAP_REMAP &&
+				map->m_pblk == first_block) {
+			/*
+			 * Second call from ext4_map_blocks():
+			 * If mapped block hasn't change, we can rely the
+			 * cached result from the first call.
+			 */
+			err = 1;
+		}
+	}
+	if (err)
+		/* do not map found block - it should be moved to snapshot */
+		partial = chain + depth - 1;
+
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
@@ -1021,8 +1067,12 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 	 * Next look up the indirect map to count the totoal number of
 	 * direct blocks to allocate for this branch.
 	 */
-	count = ext4_blks_to_allocate(partial, indirect_blks,
-				      map->m_len, blocks_to_boundary);
+	if (map->m_flags & EXT4_MAP_REMAP) {
+		BUG_ON(indirect_blks != 0);
+		count = map->m_len;
+	} else
+		count = ext4_blks_to_allocate(partial, indirect_blks,
+					      map->m_len, blocks_to_boundary);
 	/*
 	 * Block out ext4_truncate while we alter the tree
 	 */
@@ -1030,6 +1080,23 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 				&count, goal,
 				offsets + (partial - chain), partial);
 
+	if (map->m_flags & EXT4_MAP_REMAP) {
+		map->m_len = count;
+		/* move old block to snapshot */
+		err = ext4_snapshot_get_move_access(handle, inode,
+						    le32_to_cpu(*(partial->p)),
+						    &map->m_len, 1);
+		if (err <= 0) {
+			/* failed to move to snapshot - abort! */
+			err = err ? : -EIO;
+			ext4_journal_abort_handle(__func__, __LINE__,
+					"ext4_snapshot_get_move_access", NULL,
+					handle, err);
+			goto cleanup;
+		}
+		/* block moved to snapshot - continue to splice new block */
+		err = 0;
+	}
 	/*
 	 * The ext4_splice_branch call will free and forget any buffers
 	 * on the new chain if there is a failure, but that risks using
@@ -1045,7 +1112,8 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 
 	map->m_flags |= EXT4_MAP_NEW;
 
-	ext4_update_inode_fsync_trans(handle, inode, 1);
+	if (!IS_COWING(handle))
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 got_it:
 	map->m_flags |= EXT4_MAP_MAPPED;
 	map->m_pblk = le32_to_cpu(chain[depth-1].key);
@@ -1291,7 +1359,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		retval = ext4_ext_map_blocks(handle, inode, map, 0);
 	} else {
-		retval = ext4_ind_map_blocks(handle, inode, map, 0);
+		retval = ext4_ind_map_blocks(handle, inode, map,
+				flags & EXT4_GET_BLOCKS_MOVE_ON_WRITE);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
@@ -1312,7 +1381,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 	 * ext4_ext_get_block() returns th create = 0
 	 * with buffer head unmapped.
 	 */
-	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
+	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED &&
+		!(map->m_flags & EXT4_MAP_REMAP))
 		return retval;
 
 	/*
@@ -1375,6 +1445,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
 	up_write((&EXT4_I(inode)->i_data_sem));
+	/* Clear EXT4_MAP_REMAP, it is not needed any more. */
+	map->m_flags &= ~EXT4_MAP_REMAP;
 	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
 		int ret = check_block_validity(inode, map);
 		if (ret != 0)
@@ -1383,6 +1455,41 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 	return retval;
 }
 
+/*
+ * Block may need to be moved to snapshot and we need to writeback part of the
+ * existing block data to the new block, so make sure the buffer and page are
+ * uptodate before moving the existing block to snapshot.
+ */
+static int ext4_partial_write_begin(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh)
+{
+	struct ext4_map_blocks map;
+	int ret;
+
+	BUG_ON(!buffer_partial_write(bh));
+	BUG_ON(!bh->b_page || !PageLocked(bh->b_page));
+	map.m_lblk = iblock;
+	map.m_len = 1;
+
+	ret = ext4_map_blocks(NULL, inode, &map, 0);
+	if (ret <= 0)
+		return ret;
+
+	if (!buffer_uptodate(bh) && !buffer_unwritten(bh)) {
+		/* map existing block for read */
+		map_bh(bh, inode->i_sb, map.m_pblk);
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		/* clear existing block mapping */
+		clear_buffer_mapped(bh);
+		if (!buffer_uptodate(bh))
+			return -EIO;
+	}
+	/* prevent zero out of page with BH_New flag in block_write_begin() */
+	SetPageUptodate(bh->b_page);
+	return 0;
+}
+
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 
@@ -1405,11 +1512,18 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 		handle = ext4_journal_start(inode, dio_credits);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
-			return ret;
+			goto out;
 		}
 		started = 1;
 	}
 
+	if ((flags & EXT4_GET_BLOCKS_MOVE_ON_WRITE) &&
+	     buffer_partial_write(bh)) {
+		/* Read existing block data before moving it to snapshot */
+		ret = ext4_partial_write_begin(inode, iblock, bh);
+		if (ret < 0)
+			goto out;
+	}
 	ret = ext4_map_blocks(handle, inode, &map, flags);
 	if (ret > 0) {
 		map_bh(bh, inode->i_sb, map.m_pblk);
@@ -1417,11 +1531,30 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 		bh->b_size = inode->i_sb->s_blocksize * map.m_len;
 		ret = 0;
 	}
+out:
 	if (started)
 		ext4_journal_stop(handle);
+	/*
+	 * BH_Partial_Write flags are only used to pass
+	 * hints to this function and should be cleared on exit.
+	 */
+	clear_buffer_partial_write(bh);
 	return ret;
 }
 
+/*
+ * ext4_get_block_mow is used when a block may be needed to be snapshotted.
+ */
+int ext4_get_block_mow(struct inode *inode, sector_t iblock,
+		   struct buffer_head *bh, int create)
+{
+	int flags = create ? EXT4_GET_BLOCKS_CREATE : 0;
+
+	if (ext4_snapshot_should_move_data(inode))
+		flags |= EXT4_GET_BLOCKS_MOVE_ON_WRITE;
+	return _ext4_get_block(inode, iblock, bh, flags);
+}
+
 int ext4_get_block(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh, int create)
 {
@@ -1600,6 +1733,45 @@ static void ext4_truncate_failed_write(struct inode *inode)
 	ext4_truncate(inode);
 }
 
+/*
+ * Prepare for snapshot.
+ * Clear mapped flag of buffers,
+ * Set partial write flag of buffers in non-delayed-mow case.
+ */
+static void ext4_snapshot_write_begin(struct inode *inode,
+				struct page *page, unsigned len, int delay)
+{
+	struct buffer_head *bh = NULL;
+	/*
+	 * XXX: We can also check ext4_snapshot_has_active() here and we don't
+	 * need to unmap the buffers is there is no active snapshot, but the
+	 * result must be valid throughout the writepage() operation and to
+	 * guarantee this we have to know that the transaction is not restarted.
+	 * Can we count on that?
+	 */
+	if (!ext4_snapshot_should_move_data(inode))
+		return;
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
+	/* snapshots only work when blocksize == pagesize */
+	bh = page_buffers(page);
+	/*
+	 * make sure that get_block() is called even if the buffer is
+	 * mapped, but not if it is already a part of any transaction.
+	 * in data=ordered,the only mode supported by ext4, all dirty
+	 * data buffers are flushed on snapshot take via freeze_fs()
+	 * API.
+	 */
+	if (!buffer_jbd(bh) && !buffer_delay(bh)) {
+		clear_buffer_mapped(bh);
+		/* explicitly request move-on-write */
+		if (!delay && len < PAGE_CACHE_SIZE)
+			/* read block before moving it to snapshot */
+			set_buffer_partial_write(bh);
+	}
+}
+
 static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create);
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
@@ -1642,11 +1814,13 @@ retry:
 		goto out;
 	}
 	*pagep = page;
+	if (EXT4_SNAPSHOTS(inode->i_sb))
+		ext4_snapshot_write_begin(inode, page, len, 0);
 
 	if (ext4_should_dioread_nolock(inode))
 		ret = __block_write_begin(page, pos, len, ext4_get_block_write);
 	else
-		ret = __block_write_begin(page, pos, len, ext4_get_block);
+		ret = __block_write_begin(page, pos, len, ext4_get_block_mow);
 
 	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -2114,6 +2288,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 						clear_buffer_delay(bh);
 						bh->b_blocknr = pblock;
 					}
+					if (buffer_remap(bh)) {
+						clear_buffer_remap(bh);
+						bh->b_blocknr = pblock;
+					}
 					if (buffer_unwritten(bh) ||
 					    buffer_mapped(bh))
 						BUG_ON(bh->b_blocknr != pblock);
@@ -2123,7 +2301,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 				}
 
 				/* skip page if block allocation undone */
-				if (buffer_delay(bh) || buffer_unwritten(bh))
+				if (buffer_delay(bh) || buffer_unwritten(bh) ||
+				    buffer_remap(bh))
 					skip_page = 1;
 				bh = bh->b_this_page;
 				block_start += bh->b_size;
@@ -2243,7 +2422,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 	if ((mpd->b_size == 0) ||
 	    ((mpd->b_state  & (1 << BH_Mapped)) &&
 	     !(mpd->b_state & (1 << BH_Delay)) &&
-	     !(mpd->b_state & (1 << BH_Unwritten))))
+	     !(mpd->b_state & (1 << BH_Unwritten)) &&
+		 !(mpd->b_state & (1 << BH_Remap))))
 		goto submit_io;
 
 	handle = ext4_journal_current_handle();
@@ -2274,6 +2454,9 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
 	if (mpd->b_state & (1 << BH_Delay))
 		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+	if (mpd->b_state & (1 << BH_Remap))
+		get_blocks_flags |= EXT4_GET_BLOCKS_MOVE_ON_WRITE |
+				EXT4_GET_BLOCKS_DELALLOC_RESERVE;
 
 	blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
 	if (blks < 0) {
@@ -2357,7 +2540,7 @@ submit_io:
 }
 
 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
-		(1 << BH_Delay) | (1 << BH_Unwritten))
+		(1 << BH_Delay) | (1 << BH_Unwritten) | (1 << BH_Remap))
 
 /*
  * mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -2434,9 +2617,11 @@ flush_it:
 	return;
 }
 
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
+static int ext4_bh_delay_or_unwritten_or_remap(handle_t *handle,
+		struct buffer_head *bh)
 {
-	return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
+	return ((buffer_delay(bh) || buffer_unwritten(bh)) &&
+		buffer_dirty(bh)) || buffer_remap(bh);
 }
 
 /*
@@ -2456,6 +2641,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 {
 	struct ext4_map_blocks map;
 	int ret = 0;
+	handle_t *handle = ext4_journal_current_handle();
+	int flags = 0;
 	sector_t invalid_block = ~((sector_t) 0xffff);
 
 	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
@@ -2467,12 +2654,15 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 	map.m_lblk = iblock;
 	map.m_len = 1;
 
+	if (ext4_snapshot_should_move_data(inode))
+		flags |= EXT4_GET_BLOCKS_MOVE_ON_WRITE;
+
 	/*
 	 * first, we need to know whether the block is allocated already
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_map_blocks(NULL, inode, &map, 0);
+	ret = ext4_map_blocks(handle, inode, &map, flags);
 	if (ret < 0)
 		return ret;
 	if (ret == 0) {
@@ -2492,6 +2682,11 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 		return 0;
 	}
 
+	if (map.m_flags & EXT4_MAP_REMAP) {
+		ret = ext4_da_reserve_space(inode, iblock);
+		if (ret < 0)
+			return ret;
+	}
 	map_bh(bh, inode->i_sb, map.m_pblk);
 	bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
 
@@ -2659,7 +2854,7 @@ static int ext4_writepage(struct page *page,
 	}
 	page_bufs = page_buffers(page);
 	if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-			      ext4_bh_delay_or_unwritten)) {
+			      ext4_bh_delay_or_unwritten_or_remap)) {
 		/*
 		 * We don't want to do block allocation, so redirty
 		 * the page and return.  We may reach here when we do
@@ -2840,7 +3035,8 @@ static int write_cache_pages_da(struct address_space *mapping,
 					 * Otherwise we won't make progress
 					 * with the page in ext4_writepage
 					 */
-					if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+					if (ext4_bh_delay_or_unwritten_or_remap(
+								NULL, bh)) {
 						mpage_add_bh_to_extent(mpd, logical,
 								       bh->b_size,
 								       bh->b_state);
@@ -3175,6 +3371,8 @@ retry:
 		goto out;
 	}
 	*pagep = page;
+	if (EXT4_SNAPSHOTS(inode->i_sb))
+		ext4_snapshot_write_begin(inode, page, len, 1);
 
 	ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
 	if (ret < 0) {
@@ -4002,6 +4200,18 @@ int ext4_block_truncate_page(handle_t *handle,
 			goto unlock;
 	}
 
+	/* check if block needs to be moved to snapshot before zeroing */
+	if (ext4_snapshot_should_move_data(inode)) {
+		err = ext4_get_block_mow(inode, iblock, bh, 1);
+		if (err)
+			goto unlock;
+		if (buffer_new(bh)) {
+			unmap_underlying_metadata(bh->b_bdev,
+					bh->b_blocknr);
+			clear_buffer_new(bh);
+		}
+	}
+
 	if (ext4_should_journal_data(inode)) {
 		BUFFER_TRACE(bh, "get write access");
 		err = ext4_journal_get_write_access(handle, bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3b1c6d1..5eced75 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3174,6 +3174,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	struct ext4_prealloc_space *pa, *cpa = NULL;
 	ext4_fsblk_t goal_block;
 
+	/*
+	 * All inode preallocations allocated before the time when the
+	 * active snapshot is taken need to be discarded, otherwise blocks
+	 * maybe used by both a regular file and the snapshot file that we
+	 * are taking in the below case.
+	 *
+	 * Case: An user take a snapshot when an inode has a preallocation
+	 * 12/512, of which 12/64 has been used by the inode. Here 12 is the
+	 * logical block number. After the snapshot is taken, an user issues
+	 * a write request on the 12th block, then an allocation on 12 is
+	 * needed and allocator will use blocks from the preallocations.As
+	 * a result, the event above happens.
+	 *
+	 *
+	 * For now, all preallocations are discarded.
+	 *
+	 * Please refer to code and comments about preallocation in
+	 * mballoc.c for more information.
+	 */
+	if (ext4_snapshot_active(EXT4_SB(ac->ac_inode->i_sb)) &&
+	    !ext4_snapshot_mow_in_tid(ac->ac_inode)) {
+		ext4_discard_preallocations(ac->ac_inode);
+	}
 	/* only data can be preallocated */
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
 		return 0;
-- 
1.7.0.4


  parent reply	other threads:[~2011-05-09 16:43 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-09 16:41 [PATCH RFC 00/30] Ext4 snapshots - core patches amir73il
2011-05-09 16:41 ` [PATCH RFC 01/30] ext4: EXT4 snapshots (Experimental) amir73il
2011-06-06 14:50   ` Lukas Czerner
2011-06-07  9:28     ` Amir G.
2011-06-07 10:42       ` Lukas Czerner
2011-06-07 13:20         ` Amir G.
2011-05-09 16:41 ` [PATCH RFC 02/30] ext4: snapshot debugging support amir73il
2011-06-06 15:08   ` Lukas Czerner
2011-06-07  9:59     ` Amir G.
2011-06-07 10:49       ` Lukas Czerner
2011-05-09 16:41 ` [PATCH RFC 03/30] ext4: snapshot hooks - inside JBD hooks amir73il
2011-06-06 15:53   ` Lukas Czerner
2011-06-06 16:08     ` Amir G.
2011-06-06 19:01     ` Amir G.
2011-05-09 16:41 ` [PATCH RFC 04/30] ext4: snapshot hooks - block bitmap access amir73il
2011-05-09 16:41 ` [PATCH RFC 05/30] ext4: snapshot hooks - delete blocks amir73il
2011-06-07 11:24   ` Lukas Czerner
2011-06-07 13:24     ` Amir G.
2011-06-07 13:32       ` Lukas Czerner
2011-05-09 16:41 ` amir73il [this message]
2011-05-09 16:41 ` [PATCH RFC 07/30] ext4: snapshot hooks - direct I/O amir73il
2011-05-09 16:41 ` [PATCH RFC 08/30] ext4: snapshot hooks - move extent file data blocks amir73il
2011-05-09 16:41 ` [PATCH RFC 09/30] ext4: snapshot file amir73il
2011-06-02 11:52   ` Amir G.
2011-05-09 16:41 ` [PATCH RFC 10/30] ext4: snapshot file - read through to block device amir73il
2011-05-09 16:41 ` [PATCH RFC 11/30] ext4: snapshot file - permissions amir73il
2011-05-09 16:41 ` [PATCH RFC 12/30] ext4: snapshot file - store on disk amir73il
2011-05-09 16:41 ` [PATCH RFC 13/30] ext4: snapshot file - increase maximum file size limit to 16TB amir73il
2011-06-02 11:47   ` Amir G.
2011-06-03  0:48     ` Ted Ts'o
2011-06-03  4:45       ` Amir G.
2011-05-09 16:41 ` [PATCH RFC 14/30] ext4: snapshot block operations amir73il
2011-05-09 16:41 ` [PATCH RFC 15/30] ext4: snapshot block operation - copy blocks to snapshot amir73il
2011-05-09 16:41 ` [PATCH RFC 16/30] ext4: snapshot block operation - move " amir73il
2011-05-09 16:41 ` [PATCH RFC 17/30] ext4: snapshot control amir73il
2011-05-09 16:41 ` [PATCH RFC 18/30] ext4: snapshot control - fix new snapshot amir73il
2011-05-09 16:41 ` [PATCH RFC 19/30] ext4: snapshot control - reserve disk space for snapshot amir73il
2011-05-09 16:41 ` [PATCH RFC 20/30] ext4: snapshot journaled - increase transaction credits amir73il
2011-05-09 16:41 ` [PATCH RFC 21/30] ext4: snapshot journaled - implement journal_release_buffer() amir73il
2011-05-09 16:41 ` [PATCH RFC 22/30] ext4: snapshot journaled - bypass to save credits amir73il
2011-05-09 16:41 ` [PATCH RFC 23/30] ext4: snapshot journaled - trace COW/buffer credits amir73il
2011-05-09 16:41 ` [PATCH RFC 24/30] ext4: snapshot list support amir73il
2011-05-09 16:41 ` [PATCH RFC 25/30] ext4: snapshot race conditions - concurrent COW operations amir73il
2011-05-09 16:41 ` [PATCH RFC 26/30] ext4: snapshot race conditions - tracked reads amir73il
2011-05-09 16:41 ` [PATCH RFC 27/30] ext4: snapshot exclude - the exclude bitmap amir73il
2011-05-09 16:41 ` [PATCH RFC 28/30] ext4: snapshot cleanup amir73il
2011-05-09 16:41 ` [PATCH RFC 29/30] ext4: snapshot cleanup - shrink deleted snapshots amir73il
2011-05-09 16:41 ` [PATCH RFC 30/30] ext4: snapshot rocompat - enable rw mount amir73il
2011-06-06 13:08 ` [PATCH RFC 00/30] Ext4 snapshots - core patches Lukas Czerner
2011-06-06 14:32   ` Amir G.
2011-06-06 15:31     ` Eric Sandeen
2011-06-06 16:05       ` Lukas Czerner
2011-06-06 20:40         ` Ted Ts'o
2011-06-07 13:59           ` Ric Wheeler
2011-06-07 15:37             ` Ted Ts'o
2011-06-06 16:33       ` Andreas Dilger
2011-06-06 16:42         ` Eric Sandeen
2011-06-06 19:58           ` Lukáš Czerner
2011-06-06 18:25         ` Amir G.
2011-06-06 20:55       ` Ted Ts'o
2011-06-07  5:17         ` Andreas Dilger
2011-06-07  5:58           ` Amir G.
2011-06-07 10:09             ` Lukas Czerner
2011-06-07 13:01               ` Amir G.
2011-06-07 13:50                 ` Ric Wheeler
2011-06-07 14:39                   ` Amir G.
2011-06-07  6:40         ` Amir G.
2011-06-07 15:26 ` Josef Bacik
2011-06-07 16:46   ` Amir G.
2011-06-07 16:54     ` Josef Bacik
2011-06-07 18:22       ` Amir G.
2011-06-07 17:14     ` Sunil Mushran
2011-06-07 17:30       ` Ted Ts'o
2011-06-07 17:54       ` Amir G.

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1304959308-11122-7-git-send-email-amir73il@users.sourceforge.net \
    --to=amir73il@users.sourceforge.net \
    --cc=amir73il@users.sf.net \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=xiaoqiangnk@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).