public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 2/21] reduced locking in buffer.c
@ 2002-08-11  7:38 Andrew Morton
  2002-08-13 17:34 ` Linus Torvalds
  2002-08-14  8:36 ` William Lee Irwin III
  0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-11  7:38 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: lkml



Resend.  Replace the buffer lru spinlock protection with
local_irq_disable and a cross-CPU call to invalidate them.



 buffer.c |   75 ++++++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 48 insertions(+), 27 deletions(-)

--- 2.5.31/fs/buffer.c~buffer-lru-lock	Sat Aug 10 23:27:26 2002
+++ 2.5.31-akpm/fs/buffer.c	Sat Aug 10 23:27:26 2002
@@ -1277,15 +1277,32 @@ __bread_slow(struct block_device *bdev, 
  *
  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
  * sb_find_get_block().
+ *
+ * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
+ * a local interrupt disable for that.
  */
 
-#define BH_LRU_SIZE	7
+#define BH_LRU_SIZE	8
 
 static struct bh_lru {
-	spinlock_t lock;
 	struct buffer_head *bhs[BH_LRU_SIZE];
 } ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
 
+#ifdef CONFIG_SMP
+#define bh_lru_lock()	local_irq_disable()
+#define bh_lru_unlock()	local_irq_enable()
+#else
+#define bh_lru_lock()	preempt_disable()
+#define bh_lru_unlock()	preempt_enable()
+#endif
+
+static inline void check_irqs_on(void)
+{
+#ifdef irqs_disabled
+	BUG_ON(irqs_disabled());
+#endif
+}
+
 /*
  * The LRU management algorithm is dopey-but-simple.  Sorry.
  */
@@ -1297,8 +1314,9 @@ static void bh_lru_install(struct buffer
 	if (bh == NULL)
 		return;
 
-	lru = &bh_lrus[get_cpu()];
-	spin_lock(&lru->lock);
+	check_irqs_on();
+	bh_lru_lock();
+	lru = &bh_lrus[smp_processor_id()];
 	if (lru->bhs[0] != bh) {
 		struct buffer_head *bhs[BH_LRU_SIZE];
 		int in;
@@ -1324,8 +1342,7 @@ static void bh_lru_install(struct buffer
 			bhs[out++] = NULL;
 		memcpy(lru->bhs, bhs, sizeof(bhs));
 	}
-	spin_unlock(&lru->lock);
-	put_cpu();
+	bh_lru_unlock();
 
 	if (evictee) {
 		touch_buffer(evictee);
@@ -1340,8 +1357,9 @@ lookup_bh(struct block_device *bdev, sec
 	struct bh_lru *lru;
 	int i;
 
-	lru = &bh_lrus[get_cpu()];
-	spin_lock(&lru->lock);
+	check_irqs_on();
+	bh_lru_lock();
+	lru = &bh_lrus[smp_processor_id()];
 	for (i = 0; i < BH_LRU_SIZE; i++) {
 		struct buffer_head *bh = lru->bhs[i];
 
@@ -1359,8 +1377,7 @@ lookup_bh(struct block_device *bdev, sec
 			break;
 		}
 	}
-	spin_unlock(&lru->lock);
-	put_cpu();
+	bh_lru_unlock();
 	return ret;
 }
 
@@ -1407,26 +1424,33 @@ __bread(struct block_device *bdev, secto
 EXPORT_SYMBOL(__bread);
 
 /*
- * This is called rarely - at unmount.
+ * invalidate_bh_lrus() is called rarely - at unmount.  Because it is only for
+ * unmount it only needs to ensure that all buffers from the target device are
+ * invalidated on return and it doesn't need to worry about new buffers from
+ * that device being added - the unmount code has to prevent that.
  */
-static void invalidate_bh_lrus(void)
+static void invalidate_bh_lru(void *arg)
 {
-	int cpu_idx;
+	const int cpu = get_cpu();
+	int i;
 
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
-		spin_lock(&bh_lrus[cpu_idx].lock);
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
-		int i;
-
-		for (i = 0; i < BH_LRU_SIZE; i++) {
-			brelse(bh_lrus[cpu_idx].bhs[i]);
-			bh_lrus[cpu_idx].bhs[i] = NULL;
-		}
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		brelse(bh_lrus[cpu].bhs[i]);
+		bh_lrus[cpu].bhs[i] = NULL;
 	}
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
-		spin_unlock(&bh_lrus[cpu_idx].lock);
+	put_cpu();
+}
+	
+static void invalidate_bh_lrus(void)
+{
+	preempt_disable();
+	invalidate_bh_lru(NULL);
+	smp_call_function(invalidate_bh_lru, NULL, 1, 1);
+	preempt_enable();
 }
 
+
+
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
@@ -2554,9 +2578,6 @@ void __init buffer_init(void)
 {
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++)
-		spin_lock_init(&bh_lrus[i].lock);
-
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
 			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);

.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-13 17:34   ` David S. Miller
  2002-08-13 17:53   ` Andrew Morton
  1 sibling, 0 replies; 7+ messages in thread
From: David S. Miller @ 2002-08-13 17:34 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, linux-kernel

   From: Linus Torvalds <torvalds@transmeta.com>
   Date: Tue, 13 Aug 2002 10:34:37 -0700 (PDT)

   On Sun, 11 Aug 2002, Andrew Morton wrote:
   > Resend.  Replace the buffer lru spinlock protection with
   > local_irq_disable and a cross-CPU call to invalidate them.
   
   This almost certainly breaks on sparc, where CPU cross-calls are 
   non-maskable, so local_irq_disable doesn't do anything for them.
   
   Talk to Davem about this - there may be some workaround.

I told him it's OK in 2.5.x as I've discovered a way to implement
sparc32 (once we have it working again) such that local_irq_disable
blocks cross calls.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
@ 2002-08-13 17:34 ` Linus Torvalds
  2002-08-13 17:34   ` David S. Miller
  2002-08-13 17:53   ` Andrew Morton
  2002-08-14  8:36 ` William Lee Irwin III
  1 sibling, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2002-08-13 17:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: lkml, David S. Miller


On Sun, 11 Aug 2002, Andrew Morton wrote:
> 
> Resend.  Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.

This almost certainly breaks on sparc, where CPU cross-calls are 
non-maskable, so local_irq_disable doesn't do anything for them.

Talk to Davem about this - there may be some workaround.

		Linus


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:53   ` Andrew Morton
@ 2002-08-13 17:52     ` Christoph Hellwig
  2002-08-13 18:09       ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2002-08-13 17:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml, David S. Miller

On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> I have discussed it with David - he said it's OK in 2.5, but
> not in 2.4, and he has eyeballed the diff.
> 
> However there's another thing to think about:
> 
> 	local_irq_disable();
> 	atomic_inc();
> 
> If the architecture implements atomic_inc with spinlocks, this will
> schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> 
> I can fix that with a preempt_disable() in there, but ick.

Is there a reason you can't just use brlocks?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:34 ` Linus Torvalds
  2002-08-13 17:34   ` David S. Miller
@ 2002-08-13 17:53   ` Andrew Morton
  2002-08-13 17:52     ` Christoph Hellwig
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 17:53 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: lkml, David S. Miller

Linus Torvalds wrote:
> 
> On Sun, 11 Aug 2002, Andrew Morton wrote:
> >
> > Resend.  Replace the buffer lru spinlock protection with
> > local_irq_disable and a cross-CPU call to invalidate them.
> 
> This almost certainly breaks on sparc, where CPU cross-calls are
> non-maskable, so local_irq_disable doesn't do anything for them.
> 
> Talk to Davem about this - there may be some workaround.

I have discussed it with David - he said it's OK in 2.5, but
not in 2.4, and he has eyeballed the diff.

However there's another thing to think about:

	local_irq_disable();
	atomic_inc();

If the architecture implements atomic_inc with spinlocks, this will
schedule with interrupts off with CONFIG_PREEMPT=y, I expect.

I can fix that with a preempt_disable() in there, but ick.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:52     ` Christoph Hellwig
@ 2002-08-13 18:09       ` Andrew Morton
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 18:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Linus Torvalds, lkml, David S. Miller

Christoph Hellwig wrote:
> 
> On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> > I have discussed it with David - he said it's OK in 2.5, but
> > not in 2.4, and he has eyeballed the diff.
> >
> > However there's another thing to think about:
> >
> >       local_irq_disable();
> >       atomic_inc();
> >
> > If the architecture implements atomic_inc with spinlocks, this will
> > schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> >
> > I can fix that with a preempt_disable() in there, but ick.
> 
> Is there a reason you can't just use brlocks?

I didn't use brlocks in the initial code because I wanted the lock
in the same cacheline as the data it's locking.

And this code removes the locking altogether.

I suspect the lock traffic is in the noise compared with all the
get_bh, brelse, set_bit and clear_bit operations but it's a start.
We don't have a tool to measure those other things ;)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
  2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-14  8:36 ` William Lee Irwin III
  1 sibling, 0 replies; 7+ messages in thread
From: William Lee Irwin III @ 2002-08-14  8:36 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

On Sun, Aug 11, 2002 at 12:38:27AM -0700, Andrew Morton wrote:
> Resend.  Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.

dbench 256 on a 16x/16G numaq:

Throughput 50.4801 MB/sec (NB=63.1001 MB/sec  504.801 MBit/sec)  256 procs


c013bf74 6612685  74.0731     .text.lock.highmem
c013b7d0 802362   8.98779     kunmap_high
c013b5dc 593514   6.64834     kmap_high
c012f260 107218   1.20102     generic_file_write
c012e53c 85566    0.958481    file_read_actor
c0114820 82810    0.92761     scheduler_tick
c0105394 68424    0.766463    default_idle
c0143c3c 42473    0.475768    block_prepare_write
c01113b8 36678    0.410855    smp_apic_timer_interrupt
c013564c 33728    0.37781     rmqueue
c013bcbc 32868    0.368176    blk_queue_bounce
c0142e38 23095    0.258702    create_empty_buffers
c01143d8 22315    0.249965    load_balance
c012dec0 20193    0.226195    unlock_page
c014325c 19229    0.215397    __block_prepare_write
c013b558 16795    0.188132    flush_all_zero_pkmaps
c0135d28 15512    0.17376     page_cache_release
c013429c 10994    0.123151    lru_cache_add
c0135b10 10675    0.119578    __alloc_pages
c013fa50 9353     0.104769    generic_file_llseek
c0140044 8889     0.0995716   vfs_write
c012dcb4 8417     0.0942844   add_to_page_cache

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2002-08-14  8:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34   ` David S. Miller
2002-08-13 17:53   ` Andrew Morton
2002-08-13 17:52     ` Christoph Hellwig
2002-08-13 18:09       ` Andrew Morton
2002-08-14  8:36 ` William Lee Irwin III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox