[PATCH v3 2/2] ext4: Make cache hits/misses per-cpu counts

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Waiman Long <Waiman.Long@hpe.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
	Andreas Dilger <adilger.kernel@dilger.ca>
Cc: linux-ext4@vger.kernel.org, linux-kernel@vger.kernel.org,
	Tejun Heo <tj@kernel.org>, Christoph Lameter <cl@linux.com>,
	Scott J Norton <scott.norton@hpe.com>,
	Douglas Hatch <doug.hatch@hpe.com>,
	Toshimitsu Kani <toshi.kani@hpe.com>,
	Waiman Long <Waiman.Long@hpe.com>
Subject: [PATCH v3 2/2] ext4: Make cache hits/misses per-cpu counts
Date: Tue, 12 Apr 2016 14:12:55 -0400	[thread overview]
Message-ID: <1460484775-33359-3-git-send-email-Waiman.Long@hpe.com> (raw)
In-Reply-To: <1460484775-33359-1-git-send-email-Waiman.Long@hpe.com>

This patch changes the es_stats_cache_hits and es_stats_cache_misses
statistics counts to per-cpu variables to reduce cacheline contention
issues whem multiple threads are trying to update those counts
simultaneously. It uses the new per-cpu stats APIs provided by the
percpu_stats.h header file.

With a 38-threads fio I/O test with 2 shared files (on DAX-mount
NVDIMM) running on a 4-socket Haswell-EX server with 4.6-rc1 kernel,
the aggregated bandwidths before and after the patch were:

  Test          W/O patch       With patch      % change
  ----          ---------       ----------      --------
  Read-only     10173MB/s       16141MB/s        +58.7%
  Read-write     2830MB/s        4315MB/s        +52.5%

Signed-off-by: Waiman Long <Waiman.Long@hpe.com>
---
 fs/ext4/extents_status.c |   38 +++++++++++++++++++++++++++++---------
 fs/ext4/extents_status.h |    4 ++--
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e38b987..92ca56d 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -770,6 +770,15 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
 }
 
 /*
+ * For pure statistics count, use a large batch size to make sure that
+ * it does percpu update as much as possible.
+ */
+static inline void ext4_es_stats_inc(struct percpu_counter *fbc)
+{
+	__percpu_counter_add(fbc, 1, (1 << 30));
+}
+
+/*
  * ext4_es_lookup_extent() looks up an extent in extent status tree.
  *
  * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
@@ -825,9 +834,9 @@ out:
 		es->es_pblk = es1->es_pblk;
 		if (!ext4_es_is_referenced(es1))
 			ext4_es_set_referenced(es1);
-		stats->es_stats_cache_hits++;
+		ext4_es_stats_inc(&stats->es_stats_cache_hits);
 	} else {
-		stats->es_stats_cache_misses++;
+		ext4_es_stats_inc(&stats->es_stats_cache_misses);
 	}
 
 	read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -1113,9 +1122,9 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
 		   percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
 		   percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
-	seq_printf(seq, "  %lu/%lu cache hits/misses\n",
-		   es_stats->es_stats_cache_hits,
-		   es_stats->es_stats_cache_misses);
+	seq_printf(seq, "  %lld/%lld cache hits/misses\n",
+		   percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
+		   percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
 	if (inode_cnt)
 		seq_printf(seq, "  %d inodes on list\n", inode_cnt);
 
@@ -1142,8 +1151,6 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
 	sbi->s_es_nr_inode = 0;
 	spin_lock_init(&sbi->s_es_lock);
 	sbi->s_es_stats.es_stats_shrunk = 0;
-	sbi->s_es_stats.es_stats_cache_hits = 0;
-	sbi->s_es_stats.es_stats_cache_misses = 0;
 	sbi->s_es_stats.es_stats_scan_time = 0;
 	sbi->s_es_stats.es_stats_max_scan_time = 0;
 	err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
@@ -1153,15 +1160,26 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
 	if (err)
 		goto err1;
 
+	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0, GFP_KERNEL);
+	if (err)
+		goto err2;
+
+	err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0, GFP_KERNEL);
+	if (err)
+		goto err3;
+
 	sbi->s_es_shrinker.scan_objects = ext4_es_scan;
 	sbi->s_es_shrinker.count_objects = ext4_es_count;
 	sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
 	err = register_shrinker(&sbi->s_es_shrinker);
 	if (err)
-		goto err2;
+		goto err4;
 
 	return 0;
-
+err4:
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
+err3:
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
 err2:
 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
 err1:
@@ -1173,6 +1191,8 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
 {
 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
 	percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
 	unregister_shrinker(&sbi->s_es_shrinker);
 }
 
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f7aa24f..d537868 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -69,10 +69,10 @@ struct ext4_es_tree {
 
 struct ext4_es_stats {
 	unsigned long es_stats_shrunk;
-	unsigned long es_stats_cache_hits;
-	unsigned long es_stats_cache_misses;
 	u64 es_stats_scan_time;
 	u64 es_stats_max_scan_time;
+	struct percpu_counter es_stats_cache_hits;
+	struct percpu_counter es_stats_cache_misses;
 	struct percpu_counter es_stats_all_cnt;
 	struct percpu_counter es_stats_shk_cnt;
 };
-- 
1.7.1

     prev parent reply	other threads:[~2016-04-12 18:13 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-12 18:12 [PATCH v3 0/2] ext4: Improve parallel I/O performance on NVDIMM Waiman Long
2016-04-12 18:12 ` [PATCH v3 1/2] ext4: Pass in DIO_SKIP_DIO_COUNT flag if inode_dio_begin() called Waiman Long
2016-04-14  3:16   ` Dave Chinner
2016-04-14 16:21     ` Waiman Long
2016-04-15  8:17       ` Dave Chinner
2016-04-15 17:17         ` Waiman Long
2016-04-15 22:19           ` Dave Chinner
2016-04-18 19:46             ` Waiman Long
2016-04-19 23:01               ` Dave Chinner
2016-04-20 15:59                 ` Waiman Long
2016-04-20 20:58   ` Christoph Hellwig
2016-04-21 18:15     ` Waiman Long
2016-04-25 11:48       ` Christoph Hellwig
2016-04-26 16:32         ` Waiman Long
2016-04-12 18:12 ` Waiman Long [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e38b987 dfblob:92ca56d dfblob:f7aa24f dfblob:d537868 )
 OR (
bs:"[PATCH v3 2/2] ext4: Make cache hits/misses per-cpu counts" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460484775-33359-3-git-send-email-Waiman.Long@hpe.com \
    --to=waiman.long@hpe.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=cl@linux.com \
    --cc=doug.hatch@hpe.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=scott.norton@hpe.com \
    --cc=tj@kernel.org \
    --cc=toshi.kani@hpe.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).