* [patch 2/21] reduced locking in buffer.c
@ 2002-08-11 7:38 Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-14 8:36 ` William Lee Irwin III
0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-11 7:38 UTC (permalink / raw)
To: Linus Torvalds; +Cc: lkml
Resend. Replace the buffer lru spinlock protection with
local_irq_disable and a cross-CPU call to invalidate them.
buffer.c | 75 ++++++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 48 insertions(+), 27 deletions(-)
--- 2.5.31/fs/buffer.c~buffer-lru-lock Sat Aug 10 23:27:26 2002
+++ 2.5.31-akpm/fs/buffer.c Sat Aug 10 23:27:26 2002
@@ -1277,15 +1277,32 @@ __bread_slow(struct block_device *bdev,
*
* This is a transparent caching front-end to sb_bread(), sb_getblk() and
* sb_find_get_block().
+ *
+ * The LRUs themselves only need locking against invalidate_bh_lrus. We use
+ * a local interrupt disable for that.
*/
-#define BH_LRU_SIZE 7
+#define BH_LRU_SIZE 8
static struct bh_lru {
- spinlock_t lock;
struct buffer_head *bhs[BH_LRU_SIZE];
} ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
+#ifdef CONFIG_SMP
+#define bh_lru_lock() local_irq_disable()
+#define bh_lru_unlock() local_irq_enable()
+#else
+#define bh_lru_lock() preempt_disable()
+#define bh_lru_unlock() preempt_enable()
+#endif
+
+static inline void check_irqs_on(void)
+{
+#ifdef irqs_disabled
+ BUG_ON(irqs_disabled());
+#endif
+}
+
/*
* The LRU management algorithm is dopey-but-simple. Sorry.
*/
@@ -1297,8 +1314,9 @@ static void bh_lru_install(struct buffer
if (bh == NULL)
return;
- lru = &bh_lrus[get_cpu()];
- spin_lock(&lru->lock);
+ check_irqs_on();
+ bh_lru_lock();
+ lru = &bh_lrus[smp_processor_id()];
if (lru->bhs[0] != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
int in;
@@ -1324,8 +1342,7 @@ static void bh_lru_install(struct buffer
bhs[out++] = NULL;
memcpy(lru->bhs, bhs, sizeof(bhs));
}
- spin_unlock(&lru->lock);
- put_cpu();
+ bh_lru_unlock();
if (evictee) {
touch_buffer(evictee);
@@ -1340,8 +1357,9 @@ lookup_bh(struct block_device *bdev, sec
struct bh_lru *lru;
int i;
- lru = &bh_lrus[get_cpu()];
- spin_lock(&lru->lock);
+ check_irqs_on();
+ bh_lru_lock();
+ lru = &bh_lrus[smp_processor_id()];
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = lru->bhs[i];
@@ -1359,8 +1377,7 @@ lookup_bh(struct block_device *bdev, sec
break;
}
}
- spin_unlock(&lru->lock);
- put_cpu();
+ bh_lru_unlock();
return ret;
}
@@ -1407,26 +1424,33 @@ __bread(struct block_device *bdev, secto
EXPORT_SYMBOL(__bread);
/*
- * This is called rarely - at unmount.
+ * invalidate_bh_lrus() is called rarely - at unmount. Because it is only for
+ * unmount it only needs to ensure that all buffers from the target device are
+ * invalidated on return and it doesn't need to worry about new buffers from
+ * that device being added - the unmount code has to prevent that.
*/
-static void invalidate_bh_lrus(void)
+static void invalidate_bh_lru(void *arg)
{
- int cpu_idx;
+ const int cpu = get_cpu();
+ int i;
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
- spin_lock(&bh_lrus[cpu_idx].lock);
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
- int i;
-
- for (i = 0; i < BH_LRU_SIZE; i++) {
- brelse(bh_lrus[cpu_idx].bhs[i]);
- bh_lrus[cpu_idx].bhs[i] = NULL;
- }
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ brelse(bh_lrus[cpu].bhs[i]);
+ bh_lrus[cpu].bhs[i] = NULL;
}
- for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
- spin_unlock(&bh_lrus[cpu_idx].lock);
+ put_cpu();
+}
+
+static void invalidate_bh_lrus(void)
+{
+ preempt_disable();
+ invalidate_bh_lru(NULL);
+ smp_call_function(invalidate_bh_lru, NULL, 1, 1);
+ preempt_enable();
}
+
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
@@ -2554,9 +2578,6 @@ void __init buffer_init(void)
{
int i;
- for (i = 0; i < NR_CPUS; i++)
- spin_lock_init(&bh_lrus[i].lock);
-
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
1 sibling, 0 replies; 7+ messages in thread
From: David S. Miller @ 2002-08-13 17:34 UTC (permalink / raw)
To: torvalds; +Cc: akpm, linux-kernel
From: Linus Torvalds <torvalds@transmeta.com>
Date: Tue, 13 Aug 2002 10:34:37 -0700 (PDT)
On Sun, 11 Aug 2002, Andrew Morton wrote:
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
This almost certainly breaks on sparc, where CPU cross-calls are
non-maskable, so local_irq_disable doesn't do anything for them.
Talk to Davem about this - there may be some workaround.
I told him it's OK in 2.5.x as I've discovered a way to implement
sparc32 (once we have it working again) such that local_irq_disable
blocks cross calls.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
@ 2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
2002-08-14 8:36 ` William Lee Irwin III
1 sibling, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2002-08-13 17:34 UTC (permalink / raw)
To: Andrew Morton; +Cc: lkml, David S. Miller
On Sun, 11 Aug 2002, Andrew Morton wrote:
>
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
This almost certainly breaks on sparc, where CPU cross-calls are
non-maskable, so local_irq_disable doesn't do anything for them.
Talk to Davem about this - there may be some workaround.
Linus
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:53 ` Andrew Morton
@ 2002-08-13 17:52 ` Christoph Hellwig
2002-08-13 18:09 ` Andrew Morton
0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2002-08-13 17:52 UTC (permalink / raw)
To: Andrew Morton; +Cc: Linus Torvalds, lkml, David S. Miller
On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> I have discussed it with David - he said it's OK in 2.5, but
> not in 2.4, and he has eyeballed the diff.
>
> However there's another thing to think about:
>
> local_irq_disable();
> atomic_inc();
>
> If the architecture implements atomic_inc with spinlocks, this will
> schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
>
> I can fix that with a preempt_disable() in there, but ick.
Is there a reason you can't just use brlocks?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
@ 2002-08-13 17:53 ` Andrew Morton
2002-08-13 17:52 ` Christoph Hellwig
1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 17:53 UTC (permalink / raw)
To: Linus Torvalds; +Cc: lkml, David S. Miller
Linus Torvalds wrote:
>
> On Sun, 11 Aug 2002, Andrew Morton wrote:
> >
> > Resend. Replace the buffer lru spinlock protection with
> > local_irq_disable and a cross-CPU call to invalidate them.
>
> This almost certainly breaks on sparc, where CPU cross-calls are
> non-maskable, so local_irq_disable doesn't do anything for them.
>
> Talk to Davem about this - there may be some workaround.
I have discussed it with David - he said it's OK in 2.5, but
not in 2.4, and he has eyeballed the diff.
However there's another thing to think about:
local_irq_disable();
atomic_inc();
If the architecture implements atomic_inc with spinlocks, this will
schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
I can fix that with a preempt_disable() in there, but ick.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-13 17:52 ` Christoph Hellwig
@ 2002-08-13 18:09 ` Andrew Morton
0 siblings, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 18:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Linus Torvalds, lkml, David S. Miller
Christoph Hellwig wrote:
>
> On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> > I have discussed it with David - he said it's OK in 2.5, but
> > not in 2.4, and he has eyeballed the diff.
> >
> > However there's another thing to think about:
> >
> > local_irq_disable();
> > atomic_inc();
> >
> > If the architecture implements atomic_inc with spinlocks, this will
> > schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> >
> > I can fix that with a preempt_disable() in there, but ick.
>
> Is there a reason you can't just use brlocks?
I didn't use brlocks in the initial code because I wanted the lock
in the same cacheline as the data it's locking.
And this code removes the locking altogether.
I suspect the lock traffic is in the noise compared with all the
get_bh, brelse, set_bit and clear_bit operations but it's a start.
We don't have a tool to measure those other things ;)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 2/21] reduced locking in buffer.c
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-14 8:36 ` William Lee Irwin III
1 sibling, 0 replies; 7+ messages in thread
From: William Lee Irwin III @ 2002-08-14 8:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: Linus Torvalds, lkml
On Sun, Aug 11, 2002 at 12:38:27AM -0700, Andrew Morton wrote:
> Resend. Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.
dbench 256 on a 16x/16G numaq:
Throughput 50.4801 MB/sec (NB=63.1001 MB/sec 504.801 MBit/sec) 256 procs
c013bf74 6612685 74.0731 .text.lock.highmem
c013b7d0 802362 8.98779 kunmap_high
c013b5dc 593514 6.64834 kmap_high
c012f260 107218 1.20102 generic_file_write
c012e53c 85566 0.958481 file_read_actor
c0114820 82810 0.92761 scheduler_tick
c0105394 68424 0.766463 default_idle
c0143c3c 42473 0.475768 block_prepare_write
c01113b8 36678 0.410855 smp_apic_timer_interrupt
c013564c 33728 0.37781 rmqueue
c013bcbc 32868 0.368176 blk_queue_bounce
c0142e38 23095 0.258702 create_empty_buffers
c01143d8 22315 0.249965 load_balance
c012dec0 20193 0.226195 unlock_page
c014325c 19229 0.215397 __block_prepare_write
c013b558 16795 0.188132 flush_all_zero_pkmaps
c0135d28 15512 0.17376 page_cache_release
c013429c 10994 0.123151 lru_cache_add
c0135b10 10675 0.119578 __alloc_pages
c013fa50 9353 0.104769 generic_file_llseek
c0140044 8889 0.0995716 vfs_write
c012dcb4 8417 0.0942844 add_to_page_cache
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2002-08-14 8:34 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-08-11 7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34 ` David S. Miller
2002-08-13 17:53 ` Andrew Morton
2002-08-13 17:52 ` Christoph Hellwig
2002-08-13 18:09 ` Andrew Morton
2002-08-14 8:36 ` William Lee Irwin III
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.