- * [PATCH 2/6] jbd2: calculate statistics without holding j_state_lock and j_list_lock
  2014-03-09  6:05 [PATCH 1/6] jbd2: don't hold j_state_lock while calling wake_up() Theodore Ts'o
@ 2014-03-09  6:05 ` Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 3/6] jbd2: add transaction to checkpoint list earlier Theodore Ts'o
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Theodore Ts'o @ 2014-03-09  6:05 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Theodore Ts'o
The two hottest locks, and thus the biggest scalability bottlenecks,
in the jbd2 layer, are the j_list_lock and j_state_lock.  This has
inspired some people to do some truly unnatural things[1].
[1] https://www.usenix.org/system/files/conference/fast14/fast14-paper_kang.pdf
We don't need to be holding both j_state_lock and j_list_lock while
calculating the journal statistics, so move those calculations to the
very end of jbd2_journal_commit_transaction.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 765b31d..af36252 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1083,24 +1083,7 @@ restart_loop:
 		atomic_read(&commit_transaction->t_handle_count);
 	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
 			     commit_transaction->t_tid, &stats.run);
-
-	/*
-	 * Calculate overall stats
-	 */
-	spin_lock(&journal->j_history_lock);
-	journal->j_stats.ts_tid++;
-	if (commit_transaction->t_requested)
-		journal->j_stats.ts_requested++;
-	journal->j_stats.run.rs_wait += stats.run.rs_wait;
-	journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
-	journal->j_stats.run.rs_running += stats.run.rs_running;
-	journal->j_stats.run.rs_locked += stats.run.rs_locked;
-	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
-	journal->j_stats.run.rs_logging += stats.run.rs_logging;
-	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
-	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
-	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
-	spin_unlock(&journal->j_history_lock);
+	stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
 
 	commit_transaction->t_state = T_COMMIT_CALLBACK;
 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
@@ -1157,4 +1140,21 @@ restart_loop:
 	spin_unlock(&journal->j_list_lock);
 	write_unlock(&journal->j_state_lock);
 	wake_up(&journal->j_wait_done_commit);
+
+	/*
+	 * Calculate overall stats
+	 */
+	spin_lock(&journal->j_history_lock);
+	journal->j_stats.ts_tid++;
+	journal->j_stats.ts_requested += stats.ts_requested;
+	journal->j_stats.run.rs_wait += stats.run.rs_wait;
+	journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
+	journal->j_stats.run.rs_running += stats.run.rs_running;
+	journal->j_stats.run.rs_locked += stats.run.rs_locked;
+	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
+	journal->j_stats.run.rs_logging += stats.run.rs_logging;
+	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
+	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
+	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
+	spin_unlock(&journal->j_history_lock);
 }
-- 
1.9.0
^ permalink raw reply related	[flat|nested] 6+ messages in thread
- * [PATCH 3/6] jbd2: add transaction to checkpoint list earlier
  2014-03-09  6:05 [PATCH 1/6] jbd2: don't hold j_state_lock while calling wake_up() Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 2/6] jbd2: calculate statistics without holding j_state_lock and j_list_lock Theodore Ts'o
@ 2014-03-09  6:05 ` Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 4/6] jbd2: check jh->b_transaction without taking j_list_lock Theodore Ts'o
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Theodore Ts'o @ 2014-03-09  6:05 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Theodore Ts'o
We don't otherwise need j_list_lock during the rest of commit phase
#7, so add the transaction to the checkpoint list at the very end of
commit phase #6.  This allows us to drop j_list_lock earlier, which is
a good thing since it is a super hot lock.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index af36252..5f26139 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1065,6 +1065,25 @@ restart_loop:
 		goto restart_loop;
 	}
 
+	/* Add the transaction to the checkpoint list
+	 * __journal_remove_checkpoint() can not destroy transaction
+	 * under us because it is not marked as T_FINISHED yet */
+	if (journal->j_checkpoint_transactions == NULL) {
+		journal->j_checkpoint_transactions = commit_transaction;
+		commit_transaction->t_cpnext = commit_transaction;
+		commit_transaction->t_cpprev = commit_transaction;
+	} else {
+		commit_transaction->t_cpnext =
+			journal->j_checkpoint_transactions;
+		commit_transaction->t_cpprev =
+			commit_transaction->t_cpnext->t_cpprev;
+		commit_transaction->t_cpnext->t_cpprev =
+			commit_transaction;
+		commit_transaction->t_cpprev->t_cpnext =
+				commit_transaction;
+	}
+	spin_unlock(&journal->j_list_lock);
+
 	/* Done with this transaction! */
 
 	jbd_debug(3, "JBD2: commit phase 7\n");
@@ -1103,24 +1122,6 @@ restart_loop:
 
 	write_unlock(&journal->j_state_lock);
 
-	if (journal->j_checkpoint_transactions == NULL) {
-		journal->j_checkpoint_transactions = commit_transaction;
-		commit_transaction->t_cpnext = commit_transaction;
-		commit_transaction->t_cpprev = commit_transaction;
-	} else {
-		commit_transaction->t_cpnext =
-			journal->j_checkpoint_transactions;
-		commit_transaction->t_cpprev =
-			commit_transaction->t_cpnext->t_cpprev;
-		commit_transaction->t_cpnext->t_cpprev =
-			commit_transaction;
-		commit_transaction->t_cpprev->t_cpnext =
-				commit_transaction;
-	}
-	spin_unlock(&journal->j_list_lock);
-	/* Drop all spin_locks because commit_callback may be block.
-	 * __journal_remove_checkpoint() can not destroy transaction
-	 * under us because it is not marked as T_FINISHED yet */
 	if (journal->j_commit_callback)
 		journal->j_commit_callback(journal, commit_transaction);
 
@@ -1131,7 +1132,7 @@ restart_loop:
 	write_lock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
 	commit_transaction->t_state = T_FINISHED;
-	/* Recheck checkpoint lists after j_list_lock was dropped */
+	/* Check if the transaction can be dropped now that we are finished */
 	if (commit_transaction->t_checkpoint_list == NULL &&
 	    commit_transaction->t_checkpoint_io_list == NULL) {
 		__jbd2_journal_drop_transaction(journal, commit_transaction);
-- 
1.9.0
^ permalink raw reply related	[flat|nested] 6+ messages in thread
- * [PATCH 4/6] jbd2: check jh->b_transaction without taking j_list_lock
  2014-03-09  6:05 [PATCH 1/6] jbd2: don't hold j_state_lock while calling wake_up() Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 2/6] jbd2: calculate statistics without holding j_state_lock and j_list_lock Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 3/6] jbd2: add transaction to checkpoint list earlier Theodore Ts'o
@ 2014-03-09  6:05 ` Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 5/6] jbd2: minimize region locked by j_list_lock in journal_get_create_access() Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 6/6] jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() Theodore Ts'o
  4 siblings, 0 replies; 6+ messages in thread
From: Theodore Ts'o @ 2014-03-09  6:05 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Theodore Ts'o
jh->b_transaction is adequately protected for reading by the
jbd_lock_bh_state(bh), so we don't need to take j_list_lock in
__journal_try_to_free_buffer().
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 60bb365..78900a1 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1821,11 +1821,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
 	if (buffer_locked(bh) || buffer_dirty(bh))
 		goto out;
 
-	if (jh->b_next_transaction != NULL)
+	if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
 		goto out;
 
 	spin_lock(&journal->j_list_lock);
-	if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
+	if (jh->b_cp_transaction != NULL) {
 		/* written-back checkpointed metadata buffer */
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		__jbd2_journal_remove_checkpoint(jh);
-- 
1.9.0
^ permalink raw reply related	[flat|nested] 6+ messages in thread
- * [PATCH 5/6] jbd2: minimize region locked by j_list_lock in journal_get_create_access()
  2014-03-09  6:05 [PATCH 1/6] jbd2: don't hold j_state_lock while calling wake_up() Theodore Ts'o
                   ` (2 preceding siblings ...)
  2014-03-09  6:05 ` [PATCH 4/6] jbd2: check jh->b_transaction without taking j_list_lock Theodore Ts'o
@ 2014-03-09  6:05 ` Theodore Ts'o
  2014-03-09  6:05 ` [PATCH 6/6] jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() Theodore Ts'o
  4 siblings, 0 replies; 6+ messages in thread
From: Theodore Ts'o @ 2014-03-09  6:05 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Theodore Ts'o
It's not needed until we start trying to modifying fields in the
journal_head which are protected by j_list_lock.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 78900a1..357f3dc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1073,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	 * reused here.
 	 */
 	jbd_lock_bh_state(bh);
-	spin_lock(&journal->j_list_lock);
 	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
 		jh->b_transaction == NULL ||
 		(jh->b_transaction == journal->j_committing_transaction &&
@@ -1096,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 		jh->b_modified = 0;
 
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		spin_lock(&journal->j_list_lock);
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
 		/* first access by this transaction */
 		jh->b_modified = 0;
 
 		JBUFFER_TRACE(jh, "set next transaction");
+		spin_lock(&journal->j_list_lock);
 		jh->b_next_transaction = transaction;
 	}
 	spin_unlock(&journal->j_list_lock);
-- 
1.9.0
^ permalink raw reply related	[flat|nested] 6+ messages in thread
- * [PATCH 6/6] jbd2: minimize region locked by j_list_lock in jbd2_journal_forget()
  2014-03-09  6:05 [PATCH 1/6] jbd2: don't hold j_state_lock while calling wake_up() Theodore Ts'o
                   ` (3 preceding siblings ...)
  2014-03-09  6:05 ` [PATCH 5/6] jbd2: minimize region locked by j_list_lock in journal_get_create_access() Theodore Ts'o
@ 2014-03-09  6:05 ` Theodore Ts'o
  4 siblings, 0 replies; 6+ messages in thread
From: Theodore Ts'o @ 2014-03-09  6:05 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Theodore Ts'o
It's not needed until we start trying to modifying fields in the
journal_head which are protected by j_list_lock.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 357f3dc..d999b1f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1416,7 +1416,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 	BUFFER_TRACE(bh, "entry");
 
 	jbd_lock_bh_state(bh);
-	spin_lock(&journal->j_list_lock);
 
 	if (!buffer_jbd(bh))
 		goto not_jbd;
@@ -1469,6 +1468,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 		 * we know to remove the checkpoint after we commit.
 		 */
 
+		spin_lock(&journal->j_list_lock);
 		if (jh->b_cp_transaction) {
 			__jbd2_journal_temp_unlink_buffer(jh);
 			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
@@ -1481,6 +1481,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 				goto drop;
 			}
 		}
+		spin_unlock(&journal->j_list_lock);
 	} else if (jh->b_transaction) {
 		J_ASSERT_JH(jh, (jh->b_transaction ==
 				 journal->j_committing_transaction));
@@ -1492,7 +1493,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 
 		if (jh->b_next_transaction) {
 			J_ASSERT(jh->b_next_transaction == transaction);
+			spin_lock(&journal->j_list_lock);
 			jh->b_next_transaction = NULL;
+			spin_unlock(&journal->j_list_lock);
 
 			/*
 			 * only drop a reference if this transaction modified
@@ -1504,7 +1507,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 	}
 
 not_jbd:
-	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(bh);
 	__brelse(bh);
 drop:
-- 
1.9.0
^ permalink raw reply related	[flat|nested] 6+ messages in thread