* [patch 2/21] reduced locking in buffer.c
@ 2002-08-11 7:38 Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-14 8:36 ` William Lee Irwin III
0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-11 7:38 UTC (permalink / raw)
To: Linus Torvalds; +Cc: lkml
Resend. Replace the buffer lru spinlock protection with
local_irq_disable and a cross-CPU call to invalidate them.
buffer.c | 75 ++++++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 48 insertions(+), 27 deletions(-)
--- 2.5.31/fs/buffer.c~buffer-lru-lock Sat Aug 10 23:27:26 2002
+++ 2.5.31-akpm/fs/buffer.c Sat Aug 10 23:27:26 2002
@@ -1277,15 +1277,32 @@ __bread_slow(struct block_device *bdev,
*
* This is a transparent caching front-end to sb_bread(), sb_getblk() and
* sb_find_get_block().
+ *
+ * The LRUs themselves only need locking against invalidate_bh_lrus. We use
+ * a local interrupt disable for that.
*/
-#define BH_LRU_SIZE 7
+#define BH_LRU_SIZE 8
static struct bh_lru {
- spinlock_t lock;
struct buffer_head *bhs[BH_LRU_SIZE];
} ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
+#ifdef CONFIG_SMP
+#define bh_lru_lock() local_irq_disable()
+#define bh_lru_unlock() local_irq_enable()
+#else
+#define bh_lru_lock() preempt_disable()
+#define bh_lru_unlock() preempt_enable()
+#endif
+
+static inline void check_irqs_on(void)
+{
+#ifdef irqs_disabled
+ BUG_ON(irqs_disabled());
+#endif
+}
+
/*
* The LRU management algorithm is dopey-but-simple. Sorry.
*/
@@ -1297,8 +1314,9 @@ static void bh_lru_install(struct buffer
if (bh == NULL)
return;
- lru = &bh_lrus[get_cpu()];
- spin_lock(&lru->lock);
+ check_irqs_on();
+ bh_lru_lock();
+ lru = &bh_lrus[smp_processor_id()];
if (lru->bhs[0] != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
int in;
@@ -1324,8 +1342,7 @@ static void bh_lru_install(struct buffer
bhs[out++] = NULL;
memcpy(lru->bhs, bhs, sizeof(bhs));
}
- spin_unlock(&lru->lock);
- put_cpu();
+ bh_lru_unlock();
if (evictee) {
touch_buffer(evictee);
@@ -1340,8 +1357,9 @@ lookup_bh(struct block_device *bdev, sec
struct bh_lru *lru;
int i;
- lru = &bh_lrus[get_cpu()];
- spin_lock(&lru->lock);
+ check_irqs_on();
+ bh_lru_lock();
+ lru = &bh_lrus[smp_processor_id()];
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = lru->bhs[i];
@@ -1359,8 +1377,7 @@ lookup_bh(struct block_device *bdev, sec
break;
}
}
- spin_unlock(&lru->lock);
- put_cpu();
+ bh_lru_unlock();
return ret;
}
@@ -1407,26 +1424,33 @@ __bread(struct block_device *bdev, secto
EXPORT_SYMBOL(__bread);
/*
- * This is called rarely - at unmount.
+ * invalidate_bh_lrus() is called rarely - at unmount. Because it is only for
+ * unmount it only needs to ensure that all buffers from the target device are
+ * invalidated on return and it doesn't need to worry about new buffers from
+ * that device being added - the unmount code has to prevent that.
*/
-static void invalidate_bh_lrus(void)
+static void invalidate_bh_lru(void *arg)
{
- int cpu_idx;
+ const int cpu = get_cpu();
+ int i;
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
- spin_lock(&bh_lrus[cpu_idx].lock);
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
- int i;
-
- for (i = 0; i < BH_LRU_SIZE; i++) {
- brelse(bh_lrus[cpu_idx].bhs[i]);
- bh_lrus[cpu_idx].bhs[i] = NULL;
- }
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ brelse(bh_lrus[cpu].bhs[i]);
+ bh_lrus[cpu].bhs[i] = NULL;
}
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
- spin_unlock(&bh_lrus[cpu_idx].lock);
+ put_cpu();
+}
+
+static void invalidate_bh_lrus(void)
+{
+ preempt_disable();
+ invalidate_bh_lru(NULL);
+ smp_call_function(invalidate_bh_lru, NULL, 1, 1);
+ preempt_enable();
}
+
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
@@ -2554,9 +2578,6 @@ void __init buffer_init(void)
{
int i;
- for (i = 0; i < NR_CPUS; i++)
- spin_lock_init(&bh_lrus[i].lock);
-
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
.
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [patch 2/21] reduced locking in buffer.c
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
@ 2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
2002-08-14 8:36 ` William Lee Irwin III
1 sibling, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2002-08-13 17:34 UTC (permalink / raw)
To: Andrew Morton; +Cc: lkml, David S. Miller
On Sun, 11 Aug 2002, Andrew Morton wrote:
>
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
This almost certainly breaks on sparc, where CPU cross-calls are
non-maskable, so local_irq_disable doesn't do anything for them.
Talk to Davem about this - there may be some workaround.
Linus
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
1 sibling, 0 replies; 7+ messages in thread
From: David S. Miller @ 2002-08-13 17:34 UTC (permalink / raw)
To: torvalds; +Cc: akpm, linux-kernel
From: Linus Torvalds <torvalds@transmeta.com>
Date: Tue, 13 Aug 2002 10:34:37 -0700 (PDT)
On Sun, 11 Aug 2002, Andrew Morton wrote:
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
This almost certainly breaks on sparc, where CPU cross-calls are
non-maskable, so local_irq_disable doesn't do anything for them.
Talk to Davem about this - there may be some workaround.
I told him it's OK in 2.5.x as I've discovered a way to implement
sparc32 (once we have it working again) such that local_irq_disable
blocks cross calls.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
@ 2002-08-13 17:53 ` Andrew Morton
2002-08-13 17:52 ` Christoph Hellwig
1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 17:53 UTC (permalink / raw)
To: Linus Torvalds; +Cc: lkml, David S. Miller
Linus Torvalds wrote:
>
> On Sun, 11 Aug 2002, Andrew Morton wrote:
> >
> > Resend. Replace the buffer lru spinlock protection with
> > local_irq_disable and a cross-CPU call to invalidate them.
>
> This almost certainly breaks on sparc, where CPU cross-calls are
> non-maskable, so local_irq_disable doesn't do anything for them.
>
> Talk to Davem about this - there may be some workaround.
I have discussed it with David - he said it's OK in 2.5, but
not in 2.4, and he has eyeballed the diff.
However there's another thing to think about:
local_irq_disable();
atomic_inc();
If the architecture implements atomic_inc with spinlocks, this will
schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
I can fix that with a preempt_disable() in there, but ick.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:53 ` Andrew Morton
@ 2002-08-13 17:52 ` Christoph Hellwig
2002-08-13 18:09 ` Andrew Morton
0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2002-08-13 17:52 UTC (permalink / raw)
To: Andrew Morton; +Cc: Linus Torvalds, lkml, David S. Miller
On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> I have discussed it with David - he said it's OK in 2.5, but
> not in 2.4, and he has eyeballed the diff.
>
> However there's another thing to think about:
>
> local_irq_disable();
> atomic_inc();
>
> If the architecture implements atomic_inc with spinlocks, this will
> schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
>
> I can fix that with a preempt_disable() in there, but ick.
Is there a reason you can't just use brlocks?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:52 ` Christoph Hellwig
@ 2002-08-13 18:09 ` Andrew Morton
0 siblings, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 18:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Linus Torvalds, lkml, David S. Miller
Christoph Hellwig wrote:
>
> On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> > I have discussed it with David - he said it's OK in 2.5, but
> > not in 2.4, and he has eyeballed the diff.
> >
> > However there's another thing to think about:
> >
> > local_irq_disable();
> > atomic_inc();
> >
> > If the architecture implements atomic_inc with spinlocks, this will
> > schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> >
> > I can fix that with a preempt_disable() in there, but ick.
>
> Is there a reason you can't just use brlocks?
I didn't use brlocks in the initial code because I wanted the lock
in the same cacheline as the data it's locking.
And this code removes the locking altogether.
I suspect the lock traffic is in the noise compared with all the
get_bh, brelse, set_bit and clear_bit operations but it's a start.
We don't have a tool to measure those other things ;)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-14 8:36 ` William Lee Irwin III
1 sibling, 0 replies; 7+ messages in thread
From: William Lee Irwin III @ 2002-08-14 8:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: Linus Torvalds, lkml
On Sun, Aug 11, 2002 at 12:38:27AM -0700, Andrew Morton wrote:
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
dbench 256 on a 16x/16G numaq:
Throughput 50.4801 MB/sec (NB=63.1001 MB/sec 504.801 MBit/sec) 256 procs
c013bf74 6612685 74.0731 .text.lock.highmem
c013b7d0 802362 8.98779 kunmap_high
c013b5dc 593514 6.64834 kmap_high
c012f260 107218 1.20102 generic_file_write
c012e53c 85566 0.958481 file_read_actor
c0114820 82810 0.92761 scheduler_tick
c0105394 68424 0.766463 default_idle
c0143c3c 42473 0.475768 block_prepare_write
c01113b8 36678 0.410855 smp_apic_timer_interrupt
c013564c 33728 0.37781 rmqueue
c013bcbc 32868 0.368176 blk_queue_bounce
c0142e38 23095 0.258702 create_empty_buffers
c01143d8 22315 0.249965 load_balance
c012dec0 20193 0.226195 unlock_page
c014325c 19229 0.215397 __block_prepare_write
c013b558 16795 0.188132 flush_all_zero_pkmaps
c0135d28 15512 0.17376 page_cache_release
c013429c 10994 0.123151 lru_cache_add
c0135b10 10675 0.119578 __alloc_pages
c013fa50 9353 0.104769 generic_file_llseek
c0140044 8889 0.0995716 vfs_write
c012dcb4 8417 0.0942844 add_to_page_cache
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2002-08-14 8:34 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
2002-08-13 17:52 ` Christoph Hellwig
2002-08-13 18:09 ` Andrew Morton
2002-08-14 8:36 ` William Lee Irwin III
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox