All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 2/21] reduced locking in buffer.c
@ 2002-08-11  7:38 Andrew Morton
  2002-08-13 17:34 ` Linus Torvalds
  2002-08-14  8:36 ` William Lee Irwin III
  0 siblings, 2 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-11  7:38 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: lkml



Resend.  Replace the buffer lru spinlock protection with
local_irq_disable and a cross-CPU call to invalidate them.



 buffer.c |   75 ++++++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 48 insertions(+), 27 deletions(-)

--- 2.5.31/fs/buffer.c~buffer-lru-lock	Sat Aug 10 23:27:26 2002
+++ 2.5.31-akpm/fs/buffer.c	Sat Aug 10 23:27:26 2002
@@ -1277,15 +1277,32 @@ __bread_slow(struct block_device *bdev, 
  *
  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
  * sb_find_get_block().
+ *
+ * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
+ * a local interrupt disable for that.
  */
 
-#define BH_LRU_SIZE	7
+#define BH_LRU_SIZE	8
 
 static struct bh_lru {
-	spinlock_t lock;
 	struct buffer_head *bhs[BH_LRU_SIZE];
 } ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
 
+#ifdef CONFIG_SMP
+#define bh_lru_lock()	local_irq_disable()
+#define bh_lru_unlock()	local_irq_enable()
+#else
+#define bh_lru_lock()	preempt_disable()
+#define bh_lru_unlock()	preempt_enable()
+#endif
+
+static inline void check_irqs_on(void)
+{
+#ifdef irqs_disabled
+	BUG_ON(irqs_disabled());
+#endif
+}
+
 /*
  * The LRU management algorithm is dopey-but-simple.  Sorry.
  */
@@ -1297,8 +1314,9 @@ static void bh_lru_install(struct buffer
 	if (bh == NULL)
 		return;
 
-	lru = &bh_lrus[get_cpu()];
-	spin_lock(&lru->lock);
+	check_irqs_on();
+	bh_lru_lock();
+	lru = &bh_lrus[smp_processor_id()];
 	if (lru->bhs[0] != bh) {
 		struct buffer_head *bhs[BH_LRU_SIZE];
 		int in;
@@ -1324,8 +1342,7 @@ static void bh_lru_install(struct buffer
 			bhs[out++] = NULL;
 		memcpy(lru->bhs, bhs, sizeof(bhs));
 	}
-	spin_unlock(&lru->lock);
-	put_cpu();
+	bh_lru_unlock();
 
 	if (evictee) {
 		touch_buffer(evictee);
@@ -1340,8 +1357,9 @@ lookup_bh(struct block_device *bdev, sec
 	struct bh_lru *lru;
 	int i;
 
-	lru = &bh_lrus[get_cpu()];
-	spin_lock(&lru->lock);
+	check_irqs_on();
+	bh_lru_lock();
+	lru = &bh_lrus[smp_processor_id()];
 	for (i = 0; i < BH_LRU_SIZE; i++) {
 		struct buffer_head *bh = lru->bhs[i];
 
@@ -1359,8 +1377,7 @@ lookup_bh(struct block_device *bdev, sec
 			break;
 		}
 	}
-	spin_unlock(&lru->lock);
-	put_cpu();
+	bh_lru_unlock();
 	return ret;
 }
 
@@ -1407,26 +1424,33 @@ __bread(struct block_device *bdev, secto
 EXPORT_SYMBOL(__bread);
 
 /*
- * This is called rarely - at unmount.
+ * invalidate_bh_lrus() is called rarely - at unmount.  Because it is only for
+ * unmount it only needs to ensure that all buffers from the target device are
+ * invalidated on return and it doesn't need to worry about new buffers from
+ * that device being added - the unmount code has to prevent that.
  */
-static void invalidate_bh_lrus(void)
+static void invalidate_bh_lru(void *arg)
 {
-	int cpu_idx;
+	const int cpu = get_cpu();
+	int i;
 
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
-		spin_lock(&bh_lrus[cpu_idx].lock);
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
-		int i;
-
-		for (i = 0; i < BH_LRU_SIZE; i++) {
-			brelse(bh_lrus[cpu_idx].bhs[i]);
-			bh_lrus[cpu_idx].bhs[i] = NULL;
-		}
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		brelse(bh_lrus[cpu].bhs[i]);
+		bh_lrus[cpu].bhs[i] = NULL;
 	}
-	for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
-		spin_unlock(&bh_lrus[cpu_idx].lock);
+	put_cpu();
+}
+	
+static void invalidate_bh_lrus(void)
+{
+	preempt_disable();
+	invalidate_bh_lru(NULL);
+	smp_call_function(invalidate_bh_lru, NULL, 1, 1);
+	preempt_enable();
 }
 
+
+
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
@@ -2554,9 +2578,6 @@ void __init buffer_init(void)
 {
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++)
-		spin_lock_init(&bh_lrus[i].lock);
-
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
 			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);

.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-13 17:34   ` David S. Miller
  2002-08-13 17:53   ` Andrew Morton
  1 sibling, 0 replies; 7+ messages in thread
From: David S. Miller @ 2002-08-13 17:34 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, linux-kernel

   From: Linus Torvalds <torvalds@transmeta.com>
   Date: Tue, 13 Aug 2002 10:34:37 -0700 (PDT)

   On Sun, 11 Aug 2002, Andrew Morton wrote:
   > Resend.  Replace the buffer lru spinlock protection with
   > local_irq_disable and a cross-CPU call to invalidate them.
   
   This almost certainly breaks on sparc, where CPU cross-calls are 
   non-maskable, so local_irq_disable doesn't do anything for them.
   
   Talk to Davem about this - there may be some workaround.

I told him it's OK in 2.5.x as I've discovered a way to implement
sparc32 (once we have it working again) such that local_irq_disable
blocks cross calls.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
@ 2002-08-13 17:34 ` Linus Torvalds
  2002-08-13 17:34   ` David S. Miller
  2002-08-13 17:53   ` Andrew Morton
  2002-08-14  8:36 ` William Lee Irwin III
  1 sibling, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2002-08-13 17:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: lkml, David S. Miller


On Sun, 11 Aug 2002, Andrew Morton wrote:
> 
> Resend.  Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.

This almost certainly breaks on sparc, where CPU cross-calls are 
non-maskable, so local_irq_disable doesn't do anything for them.

Talk to Davem about this - there may be some workaround.

		Linus


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:53   ` Andrew Morton
@ 2002-08-13 17:52     ` Christoph Hellwig
  2002-08-13 18:09       ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2002-08-13 17:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml, David S. Miller

On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> I have discussed it with David - he said it's OK in 2.5, but
> not in 2.4, and he has eyeballed the diff.
> 
> However there's another thing to think about:
> 
> 	local_irq_disable();
> 	atomic_inc();
> 
> If the architecture implements atomic_inc with spinlocks, this will
> schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> 
> I can fix that with a preempt_disable() in there, but ick.

Is there a reason you can't just use brlocks?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:34 ` Linus Torvalds
  2002-08-13 17:34   ` David S. Miller
@ 2002-08-13 17:53   ` Andrew Morton
  2002-08-13 17:52     ` Christoph Hellwig
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 17:53 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: lkml, David S. Miller

Linus Torvalds wrote:
> 
> On Sun, 11 Aug 2002, Andrew Morton wrote:
> >
> > Resend.  Replace the buffer lru spinlock protection with
> > local_irq_disable and a cross-CPU call to invalidate them.
> 
> This almost certainly breaks on sparc, where CPU cross-calls are
> non-maskable, so local_irq_disable doesn't do anything for them.
> 
> Talk to Davem about this - there may be some workaround.

I have discussed it with David - he said it's OK in 2.5, but
not in 2.4, and he has eyeballed the diff.

However there's another thing to think about:

	local_irq_disable();
	atomic_inc();

If the architecture implements atomic_inc with spinlocks, this will
schedule with interrupts off with CONFIG_PREEMPT=y, I expect.

I can fix that with a preempt_disable() in there, but ick.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-13 17:52     ` Christoph Hellwig
@ 2002-08-13 18:09       ` Andrew Morton
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Morton @ 2002-08-13 18:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Linus Torvalds, lkml, David S. Miller

Christoph Hellwig wrote:
> 
> On Tue, Aug 13, 2002 at 10:53:59AM -0700, Andrew Morton wrote:
> > I have discussed it with David - he said it's OK in 2.5, but
> > not in 2.4, and he has eyeballed the diff.
> >
> > However there's another thing to think about:
> >
> >       local_irq_disable();
> >       atomic_inc();
> >
> > If the architecture implements atomic_inc with spinlocks, this will
> > schedule with interrupts off with CONFIG_PREEMPT=y, I expect.
> >
> > I can fix that with a preempt_disable() in there, but ick.
> 
> Is there a reason you can't just use brlocks?

I didn't use brlocks in the initial code because I wanted the lock
in the same cacheline as the data it's locking.

And this code removes the locking altogether.

I suspect the lock traffic is in the noise compared with all the
get_bh, brelse, set_bit and clear_bit operations but it's a start.
We don't have a tool to measure those other things ;)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/21] reduced locking in buffer.c
  2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
  2002-08-13 17:34 ` Linus Torvalds
@ 2002-08-14  8:36 ` William Lee Irwin III
  1 sibling, 0 replies; 7+ messages in thread
From: William Lee Irwin III @ 2002-08-14  8:36 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

On Sun, Aug 11, 2002 at 12:38:27AM -0700, Andrew Morton wrote:
> Resend.  Replace the buffer lru spinlock protection with
> local_irq_disable and a cross-CPU call to invalidate them.

dbench 256 on a 16x/16G numaq:

Throughput 50.4801 MB/sec (NB=63.1001 MB/sec  504.801 MBit/sec)  256 procs


c013bf74 6612685  74.0731     .text.lock.highmem
c013b7d0 802362   8.98779     kunmap_high
c013b5dc 593514   6.64834     kmap_high
c012f260 107218   1.20102     generic_file_write
c012e53c 85566    0.958481    file_read_actor
c0114820 82810    0.92761     scheduler_tick
c0105394 68424    0.766463    default_idle
c0143c3c 42473    0.475768    block_prepare_write
c01113b8 36678    0.410855    smp_apic_timer_interrupt
c013564c 33728    0.37781     rmqueue
c013bcbc 32868    0.368176    blk_queue_bounce
c0142e38 23095    0.258702    create_empty_buffers
c01143d8 22315    0.249965    load_balance
c012dec0 20193    0.226195    unlock_page
c014325c 19229    0.215397    __block_prepare_write
c013b558 16795    0.188132    flush_all_zero_pkmaps
c0135d28 15512    0.17376     page_cache_release
c013429c 10994    0.123151    lru_cache_add
c0135b10 10675    0.119578    __alloc_pages
c013fa50 9353     0.104769    generic_file_llseek
c0140044 8889     0.0995716   vfs_write
c012dcb4 8417     0.0942844   add_to_page_cache

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2002-08-14  8:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-08-11  7:38 [patch 2/21] reduced locking in buffer.c Andrew Morton
2002-08-13 17:34 ` Linus Torvalds
2002-08-13 17:34   ` David S. Miller
2002-08-13 17:53   ` Andrew Morton
2002-08-13 17:52     ` Christoph Hellwig
2002-08-13 18:09       ` Andrew Morton
2002-08-14  8:36 ` William Lee Irwin III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.