public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Nick Piggin <nickpiggin@yahoo.com.au>,
	Ulrich Drepper <drepper@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>,
	Ravikiran G Thirumalai <kiran@scalex86.org>,
	"Shai Fultheim (Shai@scalex86.org)" <shai@scalex86.org>,
	pravin b shelar <pravin.shelar@calsoftinc.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 2/3] FUTEX : introduce private hashtables
Date: Thu, 15 Mar 2007 20:16:25 +0100	[thread overview]
Message-ID: <200703152016.25703.dada1@cosmosbay.com> (raw)
In-Reply-To: <200608090843.52893.dada1@cosmosbay.com>

[-- Attachment #1: Type: text/plain, Size: 1002 bytes --]

[PATCH 2/3] FUTEX : introduce private hashtables

This patch introduces a separate hashtable per process to store _PRIVATE 
futexes.
This hashtable is dynamically allocated on the first _PRIVATE futex syscall.
If memory cannot be allocated, the process will use the global hashtable.

Using a separate hashtable has the advantage of lowering the contention on the 
global hashtable. NUMA should benefits of this separation because the 
allocation should respect the mm policy of the process.

Code is using kmalloc()/vmalloc() depending on the size of spinlocks. For 
normal setup, size of the private hashtable should be 768 bytes on 32bit 
arches, 1536 bytes on 64bit arches.

Private hashtable is freed() when process exits.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
 include/linux/futex.h |    4 +
 include/linux/sched.h |    7 ++
 kernel/fork.c         |    1 
 kernel/futex.c        |  112 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 120 insertions(+), 4 deletions(-)

[-- Attachment #2: futex_p2.patch --]
[-- Type: text/plain, Size: 5570 bytes --]

--- linux-2.6.21-rc3/kernel/futex.c	2007-03-15 18:30:15.000000000 +0100
+++ linux-2.6.21-rc3-ed/kernel/futex.c	2007-03-15 18:54:47.000000000 +0100
@@ -51,11 +51,11 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
+#include <linux/vmalloc.h>
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
 
-#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
 /*
  * Futexes are matched on equal values of this key.
@@ -147,11 +147,96 @@ struct futex_hash_bucket {
        struct list_head       chain;
 };
 
-static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
+
+#if CONFIG_BASE_SMALL
+# define FUTEX_HASH_SLOTS	16
+# define FUTEX_NOPRIVHASH	/* no private hashtable, only one global */
+#else
+# define FUTEX_HASH_SLOTS	256
+# define FUTEX_PRIVHASH_SLOTS	64
+#endif
+
+#define FUTEX_PRIVHASH_SIZE \
+	(FUTEX_PRIVHASH_SLOTS * sizeof(struct futex_hash_bucket))
+/*
+ * futex_queues[] : global hash table
+ *
+ * PTHREAD_PROCESS_SHARED futexes are hashed into this table
+ *
+ * PTHREAD_PROCESS_PRIVATE futexes may be hashed into this table too if the 
+ * owner process failed to allocate its private hashtable (or CONFIG_BASE_SMALL)
+ *
+ */
+static struct futex_hash_bucket futex_queues[FUTEX_HASH_SLOTS];
 
 /* Futex-fs vfsmount entry: */
 static struct vfsmount *futex_mnt;
 
+
+/*
+ * private futexes are hashed into a process private table.
+ * As this table is dynamically allocated, it might be in fact
+ * the global table in case of memory stress.
+ * A pointer to this table is kept in mm_struct.
+ */
+#ifndef FUTEX_NOPRIVHASH
+static void mm_priv_queues_alloc(struct mm_struct *mm)
+{
+	unsigned int ui;
+	struct futex_hash_bucket *queues;
+
+	/*
+	 * FUTEX_PRIVHASH_SIZE is a constant, compiler should choose
+	 * either vmalloc()/kmalloc()  :)
+	 */
+	if (FUTEX_PRIVHASH_SIZE > PAGE_SIZE)
+		queues = vmalloc(FUTEX_PRIVHASH_SIZE);
+	else
+		queues = kmalloc(FUTEX_PRIVHASH_SIZE, GFP_KERNEL);
+
+	if (queues) {
+		for (ui = 0; ui < FUTEX_PRIVHASH_SLOTS; ui++) {
+			spin_lock_init(&queues[ui].lock);
+			INIT_LIST_HEAD(&queues[ui].chain);
+		}
+		spin_lock(&mm->page_table_lock);
+		/*
+		 * check if another thread installed a table before me
+		 */
+		if (mm->mm_priv_futex_queues) {
+			if (FUTEX_PRIVHASH_SIZE > PAGE_SIZE)
+				vfree(queues);
+			else
+				kfree(queues);
+		}
+		else
+			mm->mm_priv_futex_queues = queues;
+	}
+	else {
+		spin_lock(&mm->page_table_lock);
+		if (!mm->mm_priv_futex_queues)
+			mm->mm_priv_futex_queues = futex_queues;
+	}
+	spin_unlock(&mm->page_table_lock);
+}
+#endif
+
+/*
+ * Called from __mmdrop()/mm_free_futex() to eventually free private futexes
+ * hash table attached to mm
+ */
+void __mm_free_futex(struct mm_struct *mm)
+{
+#ifndef FUTEX_NOPRIVHASH
+	if (mm->mm_priv_futex_queues != futex_queues) {
+		if (FUTEX_PRIVHASH_SIZE > PAGE_SIZE)
+			vfree(mm->mm_priv_futex_queues);
+		else
+			kfree(mm->mm_priv_futex_queues);
+	}
+#endif
+}
+
 /*
  * We hash on the keys returned from get_futex_key (see below).
  */
@@ -159,8 +244,27 @@ static struct futex_hash_bucket *hash_fu
 {
 	u32 hash = jhash2((u32*)&key->both.word,
 			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
-			  key->both.offset);
-	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
+			  key->both.offset) % FUTEX_HASH_SLOTS;
+
+#ifdef FUTEX_NOPRIVHASH
+	return &futex_queues[hash];
+#else
+	struct mm_struct *mm;
+	/*
+	 * PROCESS_SHARED futexes are hashed into futex_queues[]
+	 */
+	if (key->both.offset & (OFF_INODE|OFF_MMSHARED))
+		return &futex_queues[hash];
+
+	if (FUTEX_PRIVHASH_SLOTS < FUTEX_HASH_SLOTS)
+		hash %= FUTEX_PRIVHASH_SLOTS;
+
+	mm = current->mm;
+	if (unlikely(!mm->mm_priv_futex_queues))
+		mm_priv_queues_alloc(mm);
+
+	return &mm->mm_priv_futex_queues[hash];
+#endif
 }
 
 /*
--- linux-2.6.21-rc3/include/linux/futex.h	2007-03-15 18:08:37.000000000 +0100
+++ linux-2.6.21-rc3-ed/include/linux/futex.h	2007-03-15 18:31:40.000000000 +0100
@@ -115,6 +115,7 @@ handle_futex_death(u32 __user *uaddr, st
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
 extern void exit_pi_state_list(struct task_struct *curr);
+extern void __mm_free_futex(struct mm_struct *mm);
 #else
 static inline void exit_robust_list(struct task_struct *curr)
 {
@@ -122,6 +123,9 @@ static inline void exit_robust_list(stru
 static inline void exit_pi_state_list(struct task_struct *curr)
 {
 }
+static inline void __mm_free_futex(struct mm_struct *mm)
+{
+}
 #endif
 #endif /* __KERNEL__ */
 
--- linux-2.6.21-rc3/include/linux/sched.h	2007-03-15 18:32:08.000000000 +0100
+++ linux-2.6.21-rc3-ed/include/linux/sched.h	2007-03-15 18:31:40.000000000 +0100
@@ -373,6 +373,8 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+	/* private futexes */
+	struct futex_hash_bucket *mm_priv_futex_queues;
 };
 
 struct sighand_struct {
@@ -1374,6 +1376,11 @@ static inline int sas_ss_flags(unsigned 
  * Routines for handling mm_structs
  */
 extern struct mm_struct * mm_alloc(void);
+static inline void mm_free_futex(struct mm_struct * mm)
+{
+	if (mm->mm_priv_futex_queues)
+		__mm_free_futex(mm);
+}
 
 /* mmdrop drops the mm and the page tables */
 extern void FASTCALL(__mmdrop(struct mm_struct *));
--- linux-2.6.21-rc3/kernel/fork.c	2007-03-15 18:32:08.000000000 +0100
+++ linux-2.6.21-rc3-ed/kernel/fork.c	2007-03-15 18:31:40.000000000 +0100
@@ -374,6 +374,7 @@ void fastcall __mmdrop(struct mm_struct 
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	mm_free_futex(mm);
 	free_mm(mm);
 }
 

  parent reply	other threads:[~2007-03-15 19:16 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-08  7:07 [RFC] NUMA futex hashing Ravikiran G Thirumalai
2006-08-08  9:14 ` Eric Dumazet
2006-08-08 20:31   ` Ravikiran G Thirumalai
2006-08-08  9:37 ` Jes Sorensen
2006-08-08  9:58   ` Andi Kleen
2006-08-08 10:07     ` Jes Sorensen
2006-08-08  9:57 ` Andi Kleen
2006-08-08 10:10   ` Eric Dumazet
2006-08-08 10:36     ` Andi Kleen
2006-08-08 12:29       ` Eric Dumazet
2006-08-08 12:47         ` Andi Kleen
2006-08-08 12:57           ` Eric Dumazet
2006-08-08 14:39             ` Ulrich Drepper
2006-08-08 15:11               ` Nick Piggin
2006-08-08 15:36                 ` Ulrich Drepper
2006-08-08 16:22                   ` Nick Piggin
2006-08-08 16:26                     ` Nick Piggin
2006-08-08 16:49                     ` Ulrich Drepper
2006-08-08 16:08                 ` Eric Dumazet
2006-08-08 16:34                   ` Nick Piggin
2006-08-08 16:49                     ` Eric Dumazet
2006-08-08 16:59                       ` Eric Dumazet
2006-08-09  1:56                       ` Nick Piggin
2006-08-08 16:58                   ` Ulrich Drepper
2006-08-08 17:08                     ` Eric Dumazet
2006-08-09  1:58                     ` Nick Piggin
2006-08-09  6:26                       ` Eric Dumazet
2006-08-09  6:43                         ` Eric Dumazet
2007-03-15 19:10                           ` [PATCH 0/3] FUTEX : new PRIVATE futexes, SMP and NUMA improvements Eric Dumazet
2007-03-15 20:15                             ` Nick Piggin
2007-03-16  8:05                             ` Peter Zijlstra
2007-03-16  9:30                               ` Eric Dumazet
2007-03-16 10:10                                 ` Peter Zijlstra
2007-03-16 10:30                                   ` Eric Dumazet
2007-03-16 10:36                                     ` Peter Zijlstra
2007-04-04  7:16                             ` Ulrich Drepper
2007-04-05 17:49                               ` [PATCH] FUTEX : new PRIVATE futexes Eric Dumazet
2007-04-05 20:43                                 ` Ulrich Drepper
2007-04-06  1:19                                 ` Nick Piggin
2007-04-06  5:53                                   ` Eric Dumazet
2007-04-06 11:50                                     ` Nick Piggin
2007-04-06  6:05                                   ` Hugh Dickins
2007-04-06 17:41                                     ` Jan Engelhardt
2007-04-06 12:26                                 ` Shared futexes (was [PATCH] FUTEX : new PRIVATE futexes) Peter Zijlstra
2007-04-06 13:02                                   ` Hugh Dickins
2007-04-06 13:15                                     ` Peter Zijlstra
2007-04-06 13:15                                     ` Nick Piggin
2007-04-06 13:22                                       ` Peter Zijlstra
2007-04-06 13:40                                         ` Nick Piggin
2007-04-06 12:31                                 ` [PATCH] FUTEX : new PRIVATE futexes Peter Zijlstra
2007-04-07  8:43                                 ` [PATCH, take4] " Eric Dumazet
2007-04-07  9:30                                   ` Nick Piggin
2007-04-07 10:00                                     ` Eric Dumazet
2007-04-11  7:22                                       ` Nick Piggin
2007-04-11  8:14                                         ` Eric Dumazet
2007-04-11  9:23                                           ` Nick Piggin
2007-04-11  9:30                                             ` Pierre Peiffer
2007-04-11  9:39                                               ` Nick Piggin
2007-04-11  9:40                                                 ` Nick Piggin
2007-04-11  9:35                                             ` Eric Dumazet
2007-04-12  1:57                                               ` Nick Piggin
2007-04-07 11:18                                   ` Jakub Jelinek
2007-04-07 11:54                                     ` Eric Dumazet
2007-04-07 16:40                                       ` Ulrich Drepper
2007-04-07 22:15                                   ` Andrew Morton
2007-04-10  9:21                                     ` Eric Dumazet
2007-04-11  9:19                                   ` [PATCH, take5] " Eric Dumazet
2007-04-11 12:23                                     ` Rusty Russell
2007-04-26 12:55                                     ` [PATCH, take6] " Eric Dumazet
2007-04-26 13:35                                       ` Pierre Peiffer
2007-03-15 19:13                           ` [PATCH 1/3] FUTEX : introduce PROCESS_PRIVATE semantic Eric Dumazet
2007-03-15 19:16                           ` Eric Dumazet [this message]
2007-03-15 20:25                             ` [PATCH 2/3] FUTEX : introduce private hashtables Nick Piggin
2007-03-15 21:09                               ` Ulrich Drepper
2007-03-15 21:29                                 ` Nick Piggin
2007-03-15 22:59                               ` William Lee Irwin III
2007-03-15 19:20                           ` [PATCH 3/3] FUTEX : NUMA friendly global hashtable Eric Dumazet
2006-08-09  0:13     ` [RFC] NUMA futex hashing Ravikiran G Thirumalai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200703152016.25703.dada1@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=ak@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=drepper@gmail.com \
    --cc=kiran@scalex86.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nickpiggin@yahoo.com.au \
    --cc=pravin.shelar@calsoftinc.com \
    --cc=shai@scalex86.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox