linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: amir73il@users.sourceforge.net
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, lczerner@redhat.com,
	Amir Goldstein <amir73il@users.sf.net>,
	Yongqiang Yang <xiaoqiangnk@gmail.com>
Subject: [PATCH v1 22/36] ext4: snapshot journaled - increase transaction credits
Date: Tue,  7 Jun 2011 18:07:49 +0300	[thread overview]
Message-ID: <1307459283-22130-23-git-send-email-amir73il@users.sourceforge.net> (raw)
In-Reply-To: <1307459283-22130-1-git-send-email-amir73il@users.sourceforge.net>

From: Amir Goldstein <amir73il@users.sf.net>

Snapshot operations are journaled as part of the running transaction.
The amount of requested credits is multiplied with a factor, to ensure
that enough buffer credits are reserved in the running transaction.
The new field h_base_credits stored to original credits request and
the new filed u_user_credits counts the number of credits used by
non-COW operations.  They are especially useful when exteding a large
transaction, which did not use the extra COW credits it requested.
In this case, only the missing extra credits are requested.


Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
---
 fs/ext4/ext4_jbd2.c |   21 +++++++
 fs/ext4/ext4_jbd2.h |  159 ++++++++++++++++++++++++++++++++++++++++++++++-----
 fs/ext4/resize.c    |    2 +-
 fs/ext4/snapshot.c  |   12 ++++
 fs/ext4/super.c     |   38 ++++++++++++-
 5 files changed, 214 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c44c362..015f727 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -131,6 +131,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 				 handle_t *handle, struct inode *inode,
 				 struct buffer_head *bh)
 {
+	struct super_block *sb;
 	int err = 0;
 
 	if (ext4_handle_valid(handle)) {
@@ -138,6 +139,26 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 		if (err)
 			ext4_journal_abort_handle(where, line, __func__,
 						  bh, handle, err);
+		if (err)
+			return err;
+		sb = handle->h_transaction->t_journal->j_private;
+		if (EXT4_SNAPSHOTS(sb) && !IS_COWING(handle)) {
+			struct journal_head *jh = bh2jh(bh);
+			jbd_lock_bh_state(bh);
+			/*
+			 * buffer_credits was decremented when buffer was
+			 * modified for the first time in the current
+			 * transaction, which may have been during a COW
+			 * operation.  We decrement user_credits and mark
+			 * b_modified = 2, on the first time that the buffer
+			 * is modified not during a COW operation (!h_cowing).
+			 */
+			if (jh->b_modified == 1) {
+				jh->b_modified = 2;
+				handle->h_user_credits--;
+			}
+			jbd_unlock_bh_state(bh);
+		}
 	} else {
 		if (inode)
 			mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 4af0bb5..2b0e1bd 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -83,6 +83,62 @@
  * one block, plus two quota updates.  Quota allocations are not
  * needed. */
 
+/* on block write we have to journal the block itself */
+#define EXT4_WRITE_CREDITS 1
+/* on snapshot block alloc we have to journal block group bitmap, exclude
+   bitmap and gdb */
+#define EXT4_ALLOC_CREDITS 3
+/* number of credits for COW bitmap operation (allocated blocks are not
+   journalled): alloc(dind+ind+cow) = 9 */
+#define EXT4_COW_BITMAP_CREDITS	(3*EXT4_ALLOC_CREDITS)
+/* number of credits for other block COW operations:
+   alloc(dind+ind+cow)+write(dind+ind) = 11 */
+#define EXT4_COW_BLOCK_CREDITS	(3*EXT4_ALLOC_CREDITS+2*EXT4_WRITE_CREDITS)
+/* number of credits for the first COW operation in the block group, which
+ * is not the first group in a flex group (alloc 2 dind blocks):
+   9+11 = 20 */
+#define EXT4_COW_CREDITS	(EXT4_COW_BLOCK_CREDITS +	\
+				 EXT4_COW_BITMAP_CREDITS)
+/* number of credits for snapshot operations counted once per transaction:
+   write(sb+inode+tind) = 3 */
+#define EXT4_SNAPSHOT_CREDITS	(3*EXT4_WRITE_CREDITS)
+/*
+ * in total, for N COW operations, we may have to journal 20N+3 blocks,
+ * and we also want to reserve 20+3 credits for the last COW operation,
+ * so we add 20(N-1)+3+(20+3) to the requested N buffer credits
+ * and request 21N+6 buffer credits.
+ * that's a lot of extra credits and much more then needed for the common
+ * case, but what can we do?
+ *
+ * we are going to need a bigger journal to accommodate the
+ * extra snapshot credits.
+ * mke2fs -j uses the following default formula for fs-size above 1G:
+ * journal-size = MIN(128M, fs-size/32)
+ * mke2fs -j -J big uses the following formula:
+ * journal-size = MIN(3G, fs-size/32)
+ */
+#define EXT4_SNAPSHOT_TRANS_BLOCKS(n) \
+	((n)*(1+EXT4_COW_CREDITS)+EXT4_SNAPSHOT_CREDITS)
+#define EXT4_SNAPSHOT_START_TRANS_BLOCKS(n) \
+	((n)*(1+EXT4_COW_CREDITS)+2*EXT4_SNAPSHOT_CREDITS)
+
+/*
+ * check for sufficient buffer and COW credits
+ */
+#define EXT4_SNAPSHOT_HAS_TRANS_BLOCKS(handle, n)			\
+	((handle)->h_buffer_credits >= EXT4_SNAPSHOT_TRANS_BLOCKS(n) && \
+	 (handle)->h_user_credits >= (n))
+
+#define EXT4_RESERVE_COW_CREDITS	(EXT4_COW_CREDITS +		\
+					 EXT4_SNAPSHOT_CREDITS)
+
+/*
+ * Ext4 is not designed for filesystems under 4G with journal size < 128M
+ * Recommended journal size is 3G (created with 'mke2fs -j -J big')
+ */
+#define EXT4_MIN_JOURNAL_BLOCKS	32768U
+#define EXT4_BIG_JOURNAL_BLOCKS	(24*EXT4_MIN_JOURNAL_BLOCKS)
+
 #define EXT4_RESERVE_TRANS_BLOCKS	12U
 
 #define EXT4_INDEX_EXTRA_TRANS_BLOCKS	8
@@ -176,7 +232,19 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
 #define trace_cow_add(handle, name, num)
 #define trace_cow_inc(handle, name)
 
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+#define ext4_journal_trace(n, caller, handle, nblocks)
+
+handle_t *__ext4_journal_start(const char *where,
+		struct super_block *sb, int nblocks);
+
+#define ext4_journal_start_sb(sb, nblocks) \
+	__ext4_journal_start(__func__, \
+			(sb), (nblocks))
+
+#define ext4_journal_start(inode, nblocks) \
+	__ext4_journal_start(__func__, \
+			(inode)->i_sb, (nblocks))
+
 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
 
 #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -212,16 +280,20 @@ static inline int ext4_handle_is_aborted(handle_t *handle)
 
 static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
 {
-	if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
+	struct super_block *sb;
+
+	if (!ext4_handle_valid(handle))
+		return 1;
+
+	sb = handle->h_transaction->t_journal->j_private;
+	if (EXT4_SNAPSHOTS(sb))
+		return EXT4_SNAPSHOT_HAS_TRANS_BLOCKS(handle, needed);
+	/* sb has no snapshot feature */
+	if (handle->h_buffer_credits < needed)
 		return 0;
 	return 1;
 }
 
-static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
-{
-	return ext4_journal_start_sb(inode->i_sb, nblocks);
-}
-
 #define ext4_journal_stop(handle) \
 	__ext4_journal_stop(__func__, __LINE__, (handle))
 
@@ -230,20 +302,77 @@ static inline handle_t *ext4_journal_current_handle(void)
 	return journal_current_handle();
 }
 
-static inline int ext4_journal_extend(handle_t *handle, int nblocks)
+/*
+ * Ext4 wrapper for journal_extend()
+ * When transaction runs out of buffer credits it is possible to try and
+ * extend the buffer credits without restarting the transaction.
+ * Ext4 wrapper for journal_start() has increased the user requested buffer
+ * credits to include the extra credits for COW operations.
+ * This wrapper checks the remaining user credits and how many COW credits
+ * are missing and then tries to extend the transaction.
+ */
+static inline int __ext4_journal_extend(const char *where,
+					handle_t *handle, int nblocks)
 {
-	if (ext4_handle_valid(handle))
-		return jbd2_journal_extend(handle, nblocks);
-	return 0;
+	int credits = 0;
+	int err = 0;
+	struct super_block *sb;
+
+	if (!ext4_handle_valid((handle_t *)handle))
+		return 0;
+
+	credits = nblocks;
+	sb = handle->h_transaction->t_journal->j_private;
+	if (EXT4_SNAPSHOTS(sb)) {
+		/* extend transaction to valid buffer/user credits ratio */
+		credits = EXT4_SNAPSHOT_TRANS_BLOCKS(handle->h_user_credits +
+			nblocks) - handle->h_buffer_credits;
+	}
+	if (credits > 0)
+		err = jbd2_journal_extend((handle_t *)handle, credits);
+	if (EXT4_SNAPSHOTS(sb) && !err) {
+		/* update base/user credits for future extends */
+		handle->h_base_credits += nblocks;
+		handle->h_user_credits += nblocks;
+		ext4_journal_trace(SNAP_WARN, where, handle, nblocks);
+	}
+	return err;
 }
 
-static inline int ext4_journal_restart(handle_t *handle, int nblocks)
+/*
+ * Ext4 wrapper for journal_restart()
+ * When transaction runs out of buffer credits and cannot be extended,
+ * the alternative is to restart it (start a new transaction).
+ * This wrapper increases the user requested buffer credits to include the
+ * extra credits for COW operations.
+ */
+static inline int __ext4_journal_restart(const char *where,
+					 handle_t *handle, int nblocks)
 {
-	if (ext4_handle_valid(handle))
-		return jbd2_journal_restart(handle, nblocks);
-	return 0;
+	int err = 0;
+	int credits = 0;
+	struct super_block *sb;
+
+	if (!ext4_handle_valid((handle_t *)handle))
+		return 0;
+
+	sb = handle->h_transaction->t_journal->j_private;
+	credits = EXT4_SNAPSHOTS(sb) ?
+		  EXT4_SNAPSHOT_START_TRANS_BLOCKS(nblocks) : nblocks;
+	err = jbd2_journal_restart((handle_t *)handle, credits);
+	if (EXT4_SNAPSHOTS(sb) && !err) {
+		handle->h_base_credits = nblocks;
+		handle->h_user_credits = nblocks;
+		ext4_journal_trace(SNAP_WARN, where, handle, nblocks);
+	}
+	return err;
 }
 
+#define ext4_journal_extend(handle, nblocks) \
+	__ext4_journal_extend(__func__, (handle), (nblocks))
+
+#define ext4_journal_restart(handle, nblocks) \
+	__ext4_journal_restart(__func__, (handle), (nblocks))
 static inline int ext4_journal_blocks_per_page(struct inode *inode)
 {
 	if (EXT4_JOURNAL(inode) != NULL)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 91f5473..d341a5c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -668,7 +668,7 @@ static void update_backups(struct super_block *sb,
 
 		/* Out of journal space, and can't get more - abort - so sad */
 		if (ext4_handle_valid(handle) &&
-		    handle->h_buffer_credits == 0 &&
+		    !ext4_handle_has_enough_credits(handle, 1) &&
 		    ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
 		    (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
 			break;
diff --git a/fs/ext4/snapshot.c b/fs/ext4/snapshot.c
index 9fb5c2f..e86dc42 100644
--- a/fs/ext4/snapshot.c
+++ b/fs/ext4/snapshot.c
@@ -405,6 +405,18 @@ __ext4_snapshot_trace_cow(const char *where, handle_t *handle,
  */
 static inline void ext4_snapshot_cow_begin(handle_t *handle)
 {
+	if (!ext4_handle_has_enough_credits(handle, 1)) {
+		/*
+		 * The test above is based on lower limit heuristics of
+		 * user_credits/buffer_credits, which is not always accurate,
+		 * so it is possible that there is no bug here, just another
+		 * false alarm.
+		 */
+		snapshot_debug_hl(1, "warning: insufficient buffer/user "
+				  "credits (%d/%d) for COW operation?\n",
+				  handle->h_buffer_credits,
+				  handle->h_user_credits);
+	}
 	snapshot_debug_hl(4, "{\n");
 	handle->h_cowing = 1;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a7be485..0d996be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -264,8 +264,10 @@ static void ext4_put_nojournal(handle_t *handle)
  * ext4 prevents a new handle from being started by s_frozen, which
  * is in an upper layer.
  */
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
+handle_t *__ext4_journal_start(const char *where,
+		struct super_block *sb, int nblocks)
 {
+	int credits;
 	journal_t *journal;
 	handle_t  *handle;
 
@@ -296,7 +298,18 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 		ext4_abort(sb, "Detected aborted journal");
 		return ERR_PTR(-EROFS);
 	}
-	return jbd2_journal_start(journal, nblocks);
+
+	credits = EXT4_SNAPSHOTS(sb) ?
+		EXT4_SNAPSHOT_START_TRANS_BLOCKS(nblocks) : nblocks;
+	handle = jbd2_journal_start(journal, credits);
+	if (EXT4_SNAPSHOTS(sb) && !IS_ERR(handle)) {
+		if (handle->h_ref == 1) {
+			handle->h_base_credits = nblocks;
+			handle->h_user_credits = nblocks;
+		}
+		ext4_journal_trace(SNAP_WARN, where, handle, nblocks);
+	}
+	return handle;
 }
 
 /*
@@ -3874,6 +3887,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 		return NULL;
 	}
 
+	if (EXT4_SNAPSHOTS(sb) &&
+			(journal_inode->i_size >> EXT4_BLOCK_SIZE_BITS(sb)) <
+			EXT4_MIN_JOURNAL_BLOCKS) {
+		ext4_msg(sb, KERN_ERR,
+			"journal is too small (%lld < %u) for snapshots",
+			journal_inode->i_size >> EXT4_BLOCK_SIZE_BITS(sb),
+			EXT4_MIN_JOURNAL_BLOCKS);
+		iput(journal_inode);
+		return NULL;
+	}
+
+	if (EXT4_SNAPSHOTS(sb) &&
+			(journal_inode->i_size >> EXT4_BLOCK_SIZE_BITS(sb)) <
+			EXT4_BIG_JOURNAL_BLOCKS) {
+		snapshot_debug(1, "warning: journal is not big enough "
+			"(%lld < %u) - this might affect concurrent "
+			"filesystem writers performance!\n",
+			journal_inode->i_size >> EXT4_BLOCK_SIZE_BITS(sb),
+			EXT4_BIG_JOURNAL_BLOCKS);
+	}
+
 	journal = jbd2_journal_init_inode(journal_inode);
 	if (!journal) {
 		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
-- 
1.7.4.1


  parent reply	other threads:[~2011-06-07 15:09 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-07 15:07 [PATCH v1 00/30] Ext4 snapshots amir73il
2011-06-07 15:07 ` [PATCH v1 01/36] ext4: EXT4 snapshots (Experimental) amir73il
2011-06-07 15:07 ` [PATCH v1 02/36] ext4: snapshot debugging support amir73il
2011-06-07 15:07 ` [PATCH v1 03/36] ext4: snapshot hooks - inside JBD hooks amir73il
2011-06-07 15:07 ` [PATCH v1 04/36] ext4: snapshot hooks - block bitmap access amir73il
2011-06-07 15:07 ` [PATCH v1 05/36] ext4: snapshot hooks - delete blocks amir73il
2011-06-07 15:07 ` [PATCH v1 06/36] ext4: snapshot hooks - move data blocks amir73il
2011-06-07 15:07 ` [PATCH v1 07/36] ext4: snapshot hooks - direct I/O amir73il
2011-06-07 15:07 ` [PATCH v1 08/36] ext4: snapshot hooks - move extent file data blocks amir73il
2011-06-07 15:07 ` [PATCH v1 09/36] ext4: snapshot file amir73il
2011-06-07 15:07 ` [PATCH v1 10/36] ext4: snapshot file - read through to block device amir73il
2011-06-07 15:07 ` [PATCH v1 11/36] ext4: snapshot file - permissions amir73il
2011-06-07 15:07 ` [PATCH v1 12/36] ext4: snapshot file - store on disk amir73il
2011-06-07 15:07 ` [PATCH v1 13/36] ext4: snapshot file - increase maximum file size limit to 16TB amir73il
2011-06-07 15:07 ` [PATCH v1 14/36] ext4: snapshot block operations amir73il
2011-06-07 15:07 ` [PATCH v1 15/36] ext4: snapshot block operation - copy blocks to snapshot amir73il
2011-06-07 15:07 ` [PATCH v1 16/36] ext4: snapshot block operation - move " amir73il
2011-06-07 15:07 ` [PATCH v1 17/36] ext4: snapshot block operation - copy block bitmap " amir73il
2011-06-07 15:07 ` [PATCH v1 18/36] ext4: snapshot control amir73il
2011-06-07 15:07 ` [PATCH v1 19/36] ext4: snapshot control - init new snapshot amir73il
2011-06-07 15:07 ` [PATCH v1 20/36] ext4: snapshot control - fix " amir73il
2011-06-07 15:07 ` [PATCH v1 21/36] ext4: snapshot control - reserve disk space for snapshot amir73il
2011-06-07 15:07 ` amir73il [this message]
2011-06-07 15:07 ` [PATCH v1 23/36] ext4: snapshot journaled - implement journal_release_buffer() amir73il
2011-06-07 15:07 ` [PATCH v1 24/36] ext4: snapshot journaled - bypass to save credits amir73il
2011-06-07 15:07 ` [PATCH v1 25/36] ext4: snapshot journaled - cache last COW tid in journal_head amir73il
2011-06-07 15:07 ` [PATCH v1 26/36] ext4: snapshot journaled - trace COW/buffer credits amir73il
2011-06-07 15:07 ` [PATCH v1 27/36] ext4: snapshot list support amir73il
2011-06-07 15:07 ` [PATCH v1 28/36] ext4: snapshot list - read through to previous snapshot amir73il
2011-06-07 15:07 ` [PATCH v1 29/36] ext4: snapshot race conditions - concurrent COW bitmap operations amir73il
2011-06-07 15:07 ` [PATCH v1 30/36] ext4: snapshot race conditions - concurrent COW operations amir73il
2011-06-07 15:07 ` [PATCH v1 31/36] ext4: snapshot race conditions - tracked reads amir73il
2011-06-07 15:07 ` [PATCH v1 32/36] ext4: snapshot exclude - the exclude bitmap amir73il
2011-06-07 15:08 ` [PATCH v1 33/36] ext4: snapshot cleanup amir73il
2011-06-07 15:08 ` [PATCH v1 34/36] ext4: snapshot cleanup - shrink deleted snapshots amir73il
2011-06-07 15:08 ` [PATCH v1 35/36] ext4: snapshot cleanup - merge shrunk snapshots amir73il
2011-06-07 15:08 ` [PATCH v1 36/36] ext4: snapshot rocompat - enable rw mount amir73il
2011-06-07 15:56 ` [PATCH v1 00/30] Ext4 snapshots Lukas Czerner
2011-06-07 16:31   ` Amir G.
2011-06-08 10:09     ` Lukas Czerner
2011-06-08 14:04       ` Amir G.
2011-06-08 14:41         ` Eric Sandeen
2011-06-08 15:01           ` Amir G.
2011-06-08 15:22             ` Eric Sandeen
2011-06-08 15:33               ` Amir G.
2011-06-08 15:38         ` Lukas Czerner
2011-06-08 15:59           ` Amir G.
2011-06-08 16:19             ` Mike Snitzer
2011-06-09  1:59           ` Yongqiang Yang
2011-06-09  3:18             ` Amir G.
2011-06-09  3:51               ` Yongqiang Yang
2011-06-09  6:50                 ` Lukas Czerner
2011-06-09  7:57                   ` Amir G.
2011-06-09  8:13                     ` david
2011-06-09 10:06                       ` Amir G.
2011-06-09 10:17                         ` Lukas Czerner
2011-06-09  8:46                     ` Lukas Czerner
2011-06-09 10:54                       ` Amir G.
2011-06-09 12:59                         ` Lukas Czerner
2011-06-10  7:06                           ` Amir G.
2011-06-10  9:00                             ` Lukas Czerner
2011-06-10 12:02                               ` Amir G.
2011-06-13  9:56                               ` Amir G.
2011-06-13 10:54                                 ` Lukas Czerner
2011-06-13 12:56                                   ` Amir G.
2011-06-13 13:11                                     ` Lukas Czerner
2011-06-13 13:26                                       ` Amir G.
2011-06-13 13:50                                         ` Joe Thornber
2011-06-10 22:51                         ` Valdis.Kletnieks
2011-06-11  1:09                           ` Amir G.
2011-06-21 11:06 ` Amir G.
2011-06-21 15:45   ` Andreas Dilger
2011-06-22  6:38     ` Amir G.

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1307459283-22130-23-git-send-email-amir73il@users.sourceforge.net \
    --to=amir73il@users.sourceforge.net \
    --cc=amir73il@users.sf.net \
    --cc=lczerner@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=xiaoqiangnk@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).