From: Juergen Doelle <jdoelle@de.ibm.com>
To: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Linus Torvalds <torvalds@transmeta.com>, linux-kernel@vger.kernel.org
Subject: Pls apply this spinlock patch to the kernel
Date: Mon, 29 Oct 2001 17:22:25 +0100 [thread overview]
Message-ID: <3BDD8241.8002B946@de.ibm.com> (raw)
Alan,
I had created a patch for improving the spinlock behavior on IA32 SMP
systems for file system work load created with dbench
(ftp://samba.org/pub/tridge/dbench). The work load is a mix of create,
delete, write, and read operations executed from a scalable number of
clients. It is mainly handled in buffer cache.
The patch introduces a new spinlock type, which is cacheline aligned
and occupies the full cacheline. The five hottest spinlocks for this
work load are set to that type (kmap_lock, lru_list_lock,
pagecache_lock, kernel_flag, pagemap_lru_lock) The change is done in
the common code, except for the kernel_flag.
Due to that each lock has now a separate cacheline, the communication
overhead for L1 cache synchronization is at the minimum for these
spinlocks. This results in significant improvement on an IA32 system
(PIII Xeon 700 MHz) for 4 and 8 CPUs.
The improvement by the patch on 2.4.12-ac5 is:
#CPU peak throughput average from 8,10,12,..20 clients
U 0.0% 0.1%
1 -0.1% 0.0%
2 -0.8% -1.2%
4 3.7% 4.4%
8 16.5% 23.2%
Note:
o Differences of 0.1% can be considered as noise. UP should behave
unchanged.
o The throughput for 8 CPUs on the base kernel immediately declines
after the maximum at 6 clients.
With that patch the 8-way does not really scale, but it is not longer
worse than a 4-way for that kind of workload.
I already have posted detailed measurement results and the patch to
the lse-tech mailing list and LKML. The current state of the patch
has feedback from Steven Tweedie, Daniel Phillips, Andrea Arcangeli
and Andrew Morton included.
Because the patch modifies common code, I also verified it on linux/390.
On that architecture it does not change the results. It seems that the
overhead for cache synchronization is already minimal, because the CPUs
are sharing one L2 cache. But this shows that the patch runs on other
architectures, and in the worst case it does not disturb, even for on
a system with a 256 byte cacheline.
I think this change is worth to go into the kernel, because it improves
scaling for servers with 4 and more CPUs and the degradation for 2 CPU
systems is really small.
Can you apply it to your kernel?
Thanks!
Juergen
______________________________________________________________
Juergen Doelle
IBM Linux Technology Center - kernel performance
jdoelle@de.ibm.com
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
--- linux/include/linux/spinlock.h.orig Tue Sep 25 09:05:09 2001
+++ linux/include/linux/spinlock.h Tue Sep 25 11:42:51 2001
@@ -133,4 +133,20 @@
extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
#endif
+#ifdef CONFIG_SMP
+#include <linux/cache.h>
+
+typedef union {
+ spinlock_t lock;
+ char fill_up[(SMP_CACHE_BYTES)];
+} __attribute__ ((aligned(SMP_CACHE_BYTES))) spinlock_cacheline_t;
+
+#else /* SMP */
+
+typedef struct {
+ spinlock_t lock;
+} spinlock_cacheline_t;
+
+
+#endif
#endif /* __LINUX_SPINLOCK_H */
--- linux/include/linux/swap.h.orig Tue Sep 25 09:59:15 2001
+++ linux/include/linux/swap.h Tue Sep 25 11:42:51 2001
@@ -86,7 +86,10 @@
extern atomic_t nr_async_pages;
extern atomic_t page_cache_size;
extern atomic_t buffermem_pages;
-extern spinlock_t pagecache_lock;
+
+extern spinlock_cacheline_t pagecache_lock_cacheline;
+#define pagecache_lock (pagecache_lock_cacheline.lock)
+
extern void __remove_inode_page(struct page *);
/* Incomplete types for prototype declarations: */
@@ -159,7 +162,8 @@
extern unsigned long swap_cache_find_success;
#endif
-extern spinlock_t pagemap_lru_lock;
+extern spinlock_cacheline_t pagemap_lru_lock_cacheline;
+#define pagemap_lru_lock pagemap_lru_lock_cacheline.lock
extern void FASTCALL(mark_page_accessed(struct page *));
--- linux/include/asm-i386/smplock.h.orig Tue Sep 25 11:29:51 2001
+++ linux/include/asm-i386/smplock.h Tue Sep 25 11:42:52 2001
@@ -8,7 +8,8 @@
#include <linux/sched.h>
#include <asm/current.h>
-extern spinlock_t kernel_flag;
+extern spinlock_cacheline_t kernel_flag_cacheline;
+#define kernel_flag kernel_flag_cacheline.lock
#define kernel_locked() spin_is_locked(&kernel_flag)
--- linux/arch/i386/kernel/i386_ksyms.c.orig Tue Sep 25 09:14:57 2001
+++ linux/arch/i386/kernel/i386_ksyms.c Tue Sep 25 10:13:06 2001
@@ -120,7 +120,7 @@
#ifdef CONFIG_SMP
EXPORT_SYMBOL(cpu_data);
-EXPORT_SYMBOL(kernel_flag);
+EXPORT_SYMBOL(kernel_flag_cacheline);
EXPORT_SYMBOL(smp_num_cpus);
EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL_NOVERS(__write_lock_failed);
--- linux/arch/i386/kernel/smp.c.orig Tue Sep 25 09:15:16 2001
+++ linux/arch/i386/kernel/smp.c Tue Sep 25 10:13:06 2001
@@ -101,7 +101,7 @@
*/
/* The 'big kernel lock' */
-spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED};
struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { &init_mm, 0 }};
--- linux/fs/buffer.c.orig Tue Sep 25 09:15:47 2001
+++ linux/fs/buffer.c Wed Sep 26 12:17:29 2001
@@ -81,7 +81,10 @@
static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED;
static struct buffer_head *lru_list[NR_LIST];
-static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED;
+
+static spinlock_cacheline_t lru_list_lock_cacheline = {SPIN_LOCK_UNLOCKED};
+#define lru_list_lock lru_list_lock_cacheline.lock
+
static int nr_buffers_type[NR_LIST];
static unsigned long size_buffers_type[NR_LIST];
--- linux/mm/filemap.c.orig Tue Sep 25 09:16:06 2001
+++ linux/mm/filemap.c Tue Sep 25 10:13:06 2001
@@ -46,12 +46,13 @@
unsigned int page_hash_bits;
struct page **page_hash_table;
-spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+spinlock_cacheline_t pagecache_lock_cacheline = {SPIN_LOCK_UNLOCKED};
+
/*
* NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
* the pagemap_lru_lock held.
*/
-spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
--- linux/mm/highmem.c.orig Tue Sep 25 09:29:49 2001
+++ linux/mm/highmem.c Tue Sep 25 10:21:14 2001
@@ -32,7 +32,8 @@
*/
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
-static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED};
+#define kmap_lock kmap_lock_cacheline.lock
pte_t * pkmap_page_table;
next reply other threads:[~2001-10-29 16:57 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2001-10-29 16:22 Juergen Doelle [this message]
2001-10-29 17:13 ` Pls apply this spinlock patch to the kernel Linus Torvalds
2001-10-29 17:22 ` Alan Cox
2001-10-29 17:32 ` Linus Torvalds
2001-11-03 19:55 ` Richard Henderson
2001-11-03 20:20 ` Linus Torvalds
2001-11-03 21:01 ` Richard Henderson
2001-11-04 0:43 ` Linus Torvalds
2001-10-30 13:14 ` Anton Blanchard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3BDD8241.8002B946@de.ibm.com \
--to=jdoelle@de.ibm.com \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox