From: Marcelo Tosatti <mtosatti@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Matthew Wilcox <willy@infradead.org>,
Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
Frederic Weisbecker <frederic@kernel.org>
Subject: [PATCH RFC] fs/buffer.c: update per-CPU bh_lru cache via RCU
Date: Thu, 5 Jan 2023 12:40:16 -0300 [thread overview]
Message-ID: <Y7bvYG7Vn/gYVq1S@tpad> (raw)
umount causes invalidate_bh_lrus which calls an IPI on each
CPU that has non empty per-CPU cache:
on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
This interrupts CPUs which might be executing code sensitive
to interferences.
To avoid the IPI, free the per-CPU caches remotely via RCU.
Two bh_lrus structures for each CPU are allocated: one is being
used (assigned to per-CPU bh_lru pointer), and the other is
being freed (or idle).
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/fs/buffer.c b/fs/buffer.c
index d9c6d1fbb6dd..9f9ed7fffec8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1203,7 +1203,21 @@ struct bh_lru {
struct buffer_head *bhs[BH_LRU_SIZE];
};
-static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
+
+/*
+ * Allocate two bh_lrus structures for each CPU. bh_lru points to the
+ * one that is currently in use, and the update path does
+ * (consider cpu->bh_lru = bh_lrus[0]).
+ *
+ * cpu->bh_lru = bh_lrus[1]
+ * synchronize_rcu()
+ * free bh's in bh_lrus[0]
+ */
+unsigned int bh_lru_idx;
+static DEFINE_PER_CPU(struct bh_lru, bh_lrus[2]) = {{{ NULL }}, {{NULL}}};
+static DEFINE_PER_CPU(struct bh_lru, *bh_lru);
+
+static DEFINE_MUTEX(bh_lru_invalidate_mutex);
#ifdef CONFIG_SMP
#define bh_lru_lock() local_irq_disable()
@@ -1245,16 +1259,19 @@ static void bh_lru_install(struct buffer_head *bh)
return;
}
- b = this_cpu_ptr(&bh_lrus);
+ rcu_read_lock();
+ b = rcu_dereference(per_cpu(bh_lru, smp_processor_id()));
for (i = 0; i < BH_LRU_SIZE; i++) {
swap(evictee, b->bhs[i]);
if (evictee == bh) {
+ rcu_read_unlock();
bh_lru_unlock();
return;
}
}
get_bh(bh);
+ rcu_read_unlock();
bh_lru_unlock();
brelse(evictee);
}
@@ -1266,28 +1283,32 @@ static struct buffer_head *
lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *ret = NULL;
+ struct bh_lru *lru;
unsigned int i;
check_irqs_on();
bh_lru_lock();
+ rcu_read_lock();
+
+ lru = rcu_dereference(per_cpu(bh_lru, smp_processor_id()));
for (i = 0; i < BH_LRU_SIZE; i++) {
- struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
+ struct buffer_head *bh = lru->bhs[i];
if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
bh->b_size == size) {
if (i) {
while (i) {
- __this_cpu_write(bh_lrus.bhs[i],
- __this_cpu_read(bh_lrus.bhs[i - 1]));
+ lru->bhs[i] = lru->bhs[i - 1];
i--;
}
- __this_cpu_write(bh_lrus.bhs[0], bh);
+ lru->bhs[0] = bh;
}
get_bh(bh);
ret = bh;
break;
}
}
+ rcu_read_unlock();
bh_lru_unlock();
return ret;
}
@@ -1381,35 +1402,56 @@ static void __invalidate_bh_lrus(struct bh_lru *b)
b->bhs[i] = NULL;
}
}
-/*
- * invalidate_bh_lrus() is called rarely - but not only at unmount.
- * This doesn't race because it runs in each cpu either in irq
- * or with preempt disabled.
- */
-static void invalidate_bh_lru(void *arg)
-{
- struct bh_lru *b = &get_cpu_var(bh_lrus);
-
- __invalidate_bh_lrus(b);
- put_cpu_var(bh_lrus);
-}
bool has_bh_in_lru(int cpu, void *dummy)
{
- struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
+ struct bh_lru *b;
int i;
-
+
+ rcu_read_lock();
+ b = rcu_dereference(per_cpu(bh_lru, cpu));
for (i = 0; i < BH_LRU_SIZE; i++) {
- if (b->bhs[i])
+ if (b->bhs[i]) {
+ rcu_read_unlock();
return true;
+ }
}
+ rcu_read_unlock();
return false;
}
+/*
+ * invalidate_bh_lrus() is called rarely - but not only at unmount.
+ */
void invalidate_bh_lrus(void)
{
- on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
+ int cpu, oidx, nidx;
+
+ mutex_lock(&bh_lru_invalidate_mutex);
+ oidx = bh_lru_idx;
+ bh_lru_idx++;
+ if (bh_lru_idx >= 2)
+ bh_lru_idx = 0;
+
+ nidx = bh_lru_idx;
+ /* Assign the per-CPU bh_lru pointer */
+ cpus_read_lock();
+ for_each_online_cpu(cpu)
+ rcu_assign_pointer(per_cpu(bh_lru, cpu), per_cpu_ptr(&bh_lrus[nidx], cpu));
+ cpus_read_unlock();
+ synchronize_rcu_expedited();
+
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct bh_lru *b = per_cpu_ptr(&bh_lrus[oidx], cpu);
+
+ bh_lru_lock();
+ __invalidate_bh_lrus(b);
+ bh_lru_unlock();
+ }
+ cpus_read_unlock();
+ mutex_unlock(&bh_lru_invalidate_mutex);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
@@ -1422,8 +1464,10 @@ void invalidate_bh_lrus_cpu(void)
struct bh_lru *b;
bh_lru_lock();
- b = this_cpu_ptr(&bh_lrus);
+ rcu_read_lock();
+ b = rcu_dereference(per_cpu(bh_lru, smp_processor_id()));
__invalidate_bh_lrus(b);
+ rcu_read_unlock();
bh_lru_unlock();
}
@@ -2923,12 +2967,15 @@ EXPORT_SYMBOL(free_buffer_head);
static int buffer_exit_cpu_dead(unsigned int cpu)
{
int i;
- struct bh_lru *b = &per_cpu(bh_lrus, cpu);
+ struct bh_lru *b;
+ rcu_read_lock();
+ b = rcu_dereference(per_cpu(bh_lru, cpu));
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
+ rcu_read_unlock();
this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
per_cpu(bh_accounting, cpu).nr = 0;
return 0;
@@ -3021,7 +3068,7 @@ EXPORT_SYMBOL(__bh_read_batch);
void __init buffer_init(void)
{
unsigned long nrpages;
- int ret;
+ int ret, cpu;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
@@ -3029,6 +3076,11 @@ void __init buffer_init(void)
SLAB_MEM_SPREAD),
NULL);
+ cpus_read_lock();
+ for_each_online_cpu(cpu)
+ per_cpu(bh_lru, cpu) = per_cpu_ptr(&bh_lrus[0], cpu);
+ cpus_read_unlock();
+
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
*/
next reply other threads:[~2023-01-05 15:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-05 15:40 Marcelo Tosatti [this message]
2023-01-06 1:03 ` [PATCH RFC] fs/buffer.c: update per-CPU bh_lru cache via RCU kernel test robot
2023-01-06 2:44 ` kernel test robot
2023-01-06 18:25 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y7bvYG7Vn/gYVq1S@tpad \
--to=mtosatti@redhat.com \
--cc=axboe@kernel.dk \
--cc=frederic@kernel.org \
--cc=hch@lst.de \
--cc=linux-kernel@vger.kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.