From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
Paul Turner <pjt@google.com>,
Suresh Siddha <suresh.b.siddha@intel.com>,
Mike Galbraith <efault@gmx.de>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
Dan Smith <danms@us.ibm.com>,
Bharata B Rao <bharata.rao@gmail.com>,
Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Rik van Riel <riel@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 23/26] sched, numa: Introduce sys_numa_{t,m}bind()
Date: Fri, 16 Mar 2012 15:40:51 +0100 [thread overview]
Message-ID: <20120316144241.612966692@chello.nl> (raw)
In-Reply-To: 20120316144028.036474157@chello.nl
[-- Attachment #1: numa-foo-syscall.patch --]
[-- Type: text/plain, Size: 20026 bytes --]
Now that we have a NUMA process scheduler, provide a syscall interface
for finer granularity NUMA balancing. In particular this allows
setting up NUMA groups of threads and vmas within a process.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/syscalls/syscall_32.tbl | 2
arch/x86/syscalls/syscall_64.tbl | 2
include/asm-generic/unistd.h | 6
include/linux/mempolicy.h | 35 ++
include/linux/sched.h | 2
include/linux/syscalls.h | 3
kernel/exit.c | 1
kernel/sched/numa.c | 582 ++++++++++++++++++++++++++++++++++++++-
kernel/sys_ni.c | 4
mm/mempolicy.c | 8
10 files changed, 639 insertions(+), 6 deletions(-)
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -355,3 +355,5 @@
346 i386 setns sys_setns
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+349 i386 numa_mbind sys_numa_mbind compat_sys_numa_mbind
+350 i386 numa_tbind sys_numa_tbind compat_sys_numa_tbind
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -318,6 +318,8 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
+312 64 numa_mbind sys_numa_mbind
+313 64 numa_tbind sys_numa_tbind
#
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -691,9 +691,13 @@ __SC_COMP(__NR_process_vm_readv, sys_pro
#define __NR_process_vm_writev 271
__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
compat_sys_process_vm_writev)
+#define __NR_numa_mbind 272
+__SC_COMP(__NR_numa_mbind, sys_numa_mbind, compat_sys_ms_mbind)
+#define __NR_numa_tbind 273
+__SC_COMP(__NR_numa_tbind, sys_numa_tbind, compat_sys_ms_tbind)
#undef __NR_syscalls
-#define __NR_syscalls 272
+#define __NR_syscalls 274
/*
* All syscalls below here should go away really,
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -78,6 +78,8 @@ enum mpol_rebind_step {
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <linux/migrate.h>
+#include <linux/list.h>
+#include <linux/sched.h>
struct mm_struct;
@@ -109,6 +111,10 @@ struct mempolicy {
atomic_t refcnt;
unsigned short mode; /* See MPOL_* above */
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
+ struct numa_group *numa_group;
+ struct list_head ng_entry;
+ struct vm_area_struct *vma;
+ struct rcu_head rcu;
union {
short preferred_node; /* preferred */
nodemask_t nodes; /* interleave/bind */
@@ -396,6 +402,35 @@ static inline int mpol_to_str(char *buff
}
#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_NUMA
+
+extern void __numa_task_exit(struct task_struct *);
+extern void numa_vma_link(struct vm_area_struct *, struct vm_area_struct *);
+extern void numa_vma_unlink(struct vm_area_struct *);
+extern void __numa_add_vma_counter(struct vm_area_struct *, int, long);
+
+static inline
+void numa_add_vma_counter(struct vm_area_struct *vma, int member, long value)
+{
+ if (vma->vm_policy && vma->vm_policy->numa_group)
+ __numa_add_vma_counter(vma, member, value);
+}
+
+static inline void numa_task_exit(struct task_struct *p)
+{
+ if (p->numa_group)
+ __numa_task_exit(p);
+}
+
+#else /* CONFIG_NUMA */
+
+static inline void numa_task_exit(struct task_struct *) { }
+static inline void numa_vma_link(struct vm_area_struct *, struct vm_area_struct *) { }
+static inline void numa_vma_unlink(struct vm_area_struct *) { }
+
+#endif /* CONFIG_NUMA */
+
#endif /* __KERNEL__ */
#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1548,6 +1548,8 @@ struct task_struct {
short il_next;
short pref_node_fork;
int node;
+ struct numa_group *numa_group;
+ struct list_head ng_entry;
#endif
struct rcu_head rcu;
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -856,5 +856,8 @@ asmlinkage long sys_process_vm_writev(pi
const struct iovec __user *rvec,
unsigned long riovcnt,
unsigned long flags);
+asmlinkage long sys_numa_mbind(unsigned long addr, unsigned long len,
+ int ng_id, unsigned long flags);
+asmlinkage long sys_numa_tbind(int tid, int ng_id, unsigned long flags);
#endif
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1010,6 +1010,7 @@ void do_exit(long code)
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
task_unlock(tsk);
+ numa_task_exit(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
--- a/kernel/sched/numa.c
+++ b/kernel/sched/numa.c
@@ -14,6 +14,7 @@
#include <linux/mempolicy.h>
#include <linux/kthread.h>
+#include <linux/compat.h>
#include "sched.h"
@@ -302,17 +303,20 @@ static void enqueue_ne(struct numa_entit
spin_unlock(&nq->lock);
}
-static void dequeue_ne(struct numa_entity *ne)
+static int dequeue_ne(struct numa_entity *ne)
{
struct node_queue *nq;
+ int node = ne->node; // XXX serialization
- if (ne->node == -1) // XXX serialization
- return;
+ if (node == -1) // XXX serialization
+ return node;
nq = lock_ne_nq(ne);
ne->node = -1;
__dequeue_ne(nq, ne);
spin_unlock(&nq->lock);
+
+ return node;
}
static void init_ne(struct numa_entity *ne, const struct numa_ops *nops)
@@ -400,6 +404,8 @@ static int find_idlest_node(int this_nod
void select_task_node(struct task_struct *p, struct mm_struct *mm, int sd_flags)
{
+ int node;
+
if (!sched_feat(NUMA_SELECT)) {
p->node = -1;
return;
@@ -424,7 +430,11 @@ void select_task_node(struct task_struct
}
}
- enqueue_ne(&mm->numa, find_idlest_node(p->node));
+ node = find_idlest_node(p->node);
+ if (node == -1)
+ node = numa_node_id();
+
+ enqueue_ne(&mm->numa, node);
}
__init void init_sched_numa(void)
@@ -804,3 +814,567 @@ static __init int numa_init(void)
return 0;
}
early_initcall(numa_init);
+
+
+/*
+ * numa_group bits
+ */
+
+#include <linux/idr.h>
+#include <linux/srcu.h>
+#include <linux/syscalls.h>
+
+struct numa_group {
+ spinlock_t lock;
+ int id;
+
+ struct mm_rss_stat rss;
+
+ struct list_head tasks;
+ struct list_head vmas;
+
+ const struct cred *cred;
+ atomic_t ref;
+
+ struct numa_entity numa_entity;
+
+ struct rcu_head rcu;
+};
+
+static struct srcu_struct ng_srcu;
+
+static DEFINE_MUTEX(numa_group_idr_lock);
+static DEFINE_IDR(numa_group_idr);
+
+static inline struct numa_group *ne_ng(struct numa_entity *ne)
+{
+ return container_of(ne, struct numa_group, numa_entity);
+}
+
+static inline bool ng_tryget(struct numa_group *ng)
+{
+ return atomic_inc_not_zero(&ng->ref);
+}
+
+static inline void ng_get(struct numa_group *ng)
+{
+ atomic_inc(&ng->ref);
+}
+
+static void __ng_put_rcu(struct rcu_head *rcu)
+{
+ struct numa_group *ng = container_of(rcu, struct numa_group, rcu);
+
+ put_cred(ng->cred);
+ kfree(ng);
+}
+
+static void __ng_put(struct numa_group *ng)
+{
+ mutex_lock(&numa_group_idr_lock);
+ idr_remove(&numa_group_idr, ng->id);
+ mutex_unlock(&numa_group_idr_lock);
+
+ WARN_ON(!list_empty(&ng->tasks));
+ WARN_ON(!list_empty(&ng->vmas));
+
+ dequeue_ne(&ng->numa_entity);
+
+ call_rcu(&ng->rcu, __ng_put_rcu);
+}
+
+static inline void ng_put(struct numa_group *ng)
+{
+ if (atomic_dec_and_test(&ng->ref))
+ __ng_put(ng);
+}
+
+/*
+ * numa_ops
+ */
+
+static unsigned long numa_group_mem_load(struct numa_entity *ne)
+{
+ struct numa_group *ng = ne_ng(ne);
+
+ return atomic_long_read(&ng->rss.count[MM_ANONPAGES]);
+}
+
+static unsigned long numa_group_cpu_load(struct numa_entity *ne)
+{
+ struct numa_group *ng = ne_ng(ne);
+ unsigned long load = 0;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &ng->tasks, ng_entry)
+ load += p->numa_contrib;
+ rcu_read_unlock();
+
+ return load;
+}
+
+static void numa_group_mem_migrate(struct numa_entity *ne, int node)
+{
+ struct numa_group *ng = ne_ng(ne);
+ struct vm_area_struct *vma;
+ struct mempolicy *mpol;
+ struct mm_struct *mm;
+ int idx;
+
+ /*
+ * Horrid code this..
+ *
+ * The main problem is that ng->lock nests inside mmap_sem [
+ * numa_vma_{,un}link() gets called under mmap_sem ]. But here we need
+ * to iterate that list and acquire mmap_sem for each entry.
+ *
+ * We get here without serialization. We abuse numa_vma_unlink() to add
+ * an SRCU delayed reference count to the mpols. This allows us to do
+ * lockless iteration of the list.
+ *
+ * Once we have an mpol we need to acquire mmap_sem, this too isn't
+ * straight fwd, take ng->lock to pin mpol->vma due to its
+ * serialization against numa_vma_unlink(). While that vma pointer is
+ * stable the vma->vm_mm pointer must be good too, so acquire an extra
+ * reference to the mm.
+ *
+ * This reference keeps mm stable so we can drop ng->lock and acquire
+ * mmap_sem. After which mpol->vma is stable again since the memory map
+ * is stable. So verify ->vma is still good (numa_vma_unlink clears it)
+ * and the mm is still the same (paranoia, can't see how that could
+ * happen).
+ */
+
+ idx = srcu_read_lock(&ng_srcu);
+ list_for_each_entry_rcu(mpol, &ng->vmas, ng_entry) {
+ nodemask_t mask = nodemask_of_node(node);
+
+ spin_lock(&ng->lock); /* pin mpol->vma */
+ vma = mpol->vma;
+ if (!vma) {
+ spin_unlock(&ng->lock);
+ continue;
+ }
+ mm = vma->vm_mm;
+ atomic_inc(&mm->mm_users); /* pin mm */
+ spin_unlock(&ng->lock);
+
+ down_read(&mm->mmap_sem);
+ vma = mpol->vma;
+ if (!vma)
+ goto unlock_next;
+
+ mpol_rebind_policy(mpol, &mask, MPOL_REBIND_ONCE);
+ lazy_migrate_vma(vma, node);
+unlock_next:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+ srcu_read_unlock(&ng_srcu, idx);
+}
+
+static void numa_group_cpu_migrate(struct numa_entity *ne, int node)
+{
+ struct numa_group *ng = ne_ng(ne);
+ struct task_struct *p;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &ng->tasks, ng_entry)
+ sched_setnode(p, node);
+ rcu_read_unlock();
+}
+
+static bool numa_group_tryget(struct numa_entity *ne)
+{
+ /*
+ * See process_tryget(), similar but against ng_put().
+ */
+ return ng_tryget(ne_ng(ne));
+}
+
+static void numa_group_put(struct numa_entity *ne)
+{
+ ng_put(ne_ng(ne));
+}
+
+static const struct numa_ops numa_group_ops = {
+ .mem_load = numa_group_mem_load,
+ .cpu_load = numa_group_cpu_load,
+
+ .mem_migrate = numa_group_mem_migrate,
+ .cpu_migrate = numa_group_cpu_migrate,
+
+ .tryget = numa_group_tryget,
+ .put = numa_group_put,
+};
+
+void __numa_task_exit(struct task_struct *p)
+{
+ struct numa_group *ng = p->numa_group;
+
+ spin_lock(&ng->lock);
+ list_del_rcu(&p->ng_entry);
+ spin_unlock(&ng->lock);
+
+ p->numa_group = NULL; // XXX serialization ?!
+
+ ng_put(ng);
+}
+
+/*
+ * memory (vma) accounting/tracking
+ *
+ * We assume a 1:1 relation between vmas and mpols and keep a list of mpols in
+ * the numa_group, and a vma backlink in the mpol.
+ */
+
+void numa_vma_link(struct vm_area_struct *new, struct vm_area_struct *old)
+{
+ struct numa_group *ng = NULL;
+
+ if (old && old->vm_policy)
+ ng = old->vm_policy->numa_group;
+
+ if (!ng && new->vm_policy)
+ ng = new->vm_policy->numa_group;
+
+ if (!ng)
+ return;
+
+ ng_get(ng);
+ new->vm_policy->numa_group = ng;
+ new->vm_policy->vma = new;
+
+ spin_lock(&ng->lock);
+ list_add_rcu(&new->vm_policy->ng_entry, &ng->vmas);
+ spin_unlock(&ng->lock);
+}
+
+void __numa_add_vma_counter(struct vm_area_struct *vma, int member, long value)
+{
+ /*
+ * Since the caller passes the vma argument, the caller is responsible
+ * for making sure the vma is stable, hence the ->vm_policy->numa_group
+ * dereference is safe. (caller usually has vma->vm_mm->mmap_sem for
+ * reading).
+ */
+ atomic_long_add(value, &vma->vm_policy->numa_group->rss.count[member]);
+}
+
+static void __mpol_put_rcu(struct rcu_head *rcu)
+{
+ struct mempolicy *mpol = container_of(rcu, struct mempolicy, rcu);
+ mpol_put(mpol);
+}
+
+void numa_vma_unlink(struct vm_area_struct *vma)
+{
+ struct mempolicy *mpol;
+ struct numa_group *ng;
+
+ if (!vma)
+ return;
+
+ mpol = vma->vm_policy;
+ if (!mpol)
+ return;
+
+ ng = mpol->numa_group;
+ if (!ng)
+ return;
+
+ spin_lock(&ng->lock);
+ list_del_rcu(&mpol->ng_entry);
+ /*
+ * Rediculous, see numa_group_mem_migrate.
+ */
+ mpol->vma = NULL;
+ mpol_get(mpol);
+ call_srcu(&ng_srcu, &mpol->rcu, __mpol_put_rcu);
+ spin_unlock(&ng->lock);
+
+ ng_put(ng);
+}
+
+/*
+ * syscall bits
+ */
+
+#define MS_ID_GET -2
+#define MS_ID_NEW -1
+
+static struct numa_group *ng_create(struct task_struct *p)
+{
+ struct numa_group *ng;
+ int node, err;
+
+ ng = kzalloc(sizeof(*ng), GFP_KERNEL);
+ if (!ng)
+ goto fail;
+
+ err = idr_pre_get(&numa_group_idr, GFP_KERNEL);
+ if (!err)
+ goto fail_alloc;
+
+ mutex_lock(&numa_group_idr_lock);
+ err = idr_get_new(&numa_group_idr, ng, &ng->id);
+ mutex_unlock(&numa_group_idr_lock);
+
+ if (err)
+ goto fail_alloc;
+
+ spin_lock_init(&ng->lock);
+ atomic_set(&ng->ref, 1);
+ ng->cred = get_task_cred(p);
+ INIT_LIST_HEAD(&ng->tasks);
+ INIT_LIST_HEAD(&ng->vmas);
+ init_ne(&ng->numa_entity, &numa_group_ops);
+
+ dequeue_ne(&p->mm->numa); // XXX
+
+ node = find_idlest_node(tsk_home_node(p));
+ enqueue_ne(&ng->numa_entity, node);
+
+ return ng;
+
+fail_alloc:
+ kfree(ng);
+fail:
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * More or less equal to ptrace_may_access(); XXX
+ */
+static int ng_allowed(struct numa_group *ng, struct task_struct *p)
+{
+ const struct cred *cred = ng->cred, *tcred;
+
+ rcu_read_lock();
+ tcred = __task_cred(p);
+ if (cred->user->user_ns == tcred->user->user_ns &&
+ (cred->uid == tcred->euid &&
+ cred->uid == tcred->suid &&
+ cred->uid == tcred->uid &&
+ cred->gid == tcred->egid &&
+ cred->gid == tcred->sgid &&
+ cred->gid == tcred->gid))
+ goto ok;
+ if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+ goto ok;
+ rcu_read_unlock();
+ return -EPERM;
+
+ok:
+ rcu_read_unlock();
+ return 0;
+}
+
+static struct numa_group *ng_lookup(int ng_id, struct task_struct *p)
+{
+ struct numa_group *ng;
+
+ rcu_read_lock();
+again:
+ ng = idr_find(&numa_group_idr, ng_id);
+ if (!ng) {
+ rcu_read_unlock();
+ return ERR_PTR(-EINVAL);
+ }
+ if (ng_allowed(ng, p)) {
+ rcu_read_unlock();
+ return ERR_PTR(-EPERM);
+ }
+ if (!ng_tryget(ng))
+ goto again;
+ rcu_read_unlock();
+
+ return ng;
+}
+
+static int ng_task_assign(struct task_struct *p, int ng_id)
+{
+ struct numa_group *old_ng, *ng;
+
+ ng = ng_lookup(ng_id, p);
+ if (IS_ERR(ng))
+ return PTR_ERR(ng);
+
+ old_ng = p->numa_group; // XXX racy
+ if (old_ng) {
+ spin_lock(&old_ng->lock);
+ list_del_rcu(&p->ng_entry);
+ spin_unlock(&old_ng->lock);
+
+ /*
+ * We have to wait for the old ng_entry users to go away before
+ * we can re-use the link entry for the new list.
+ */
+ synchronize_rcu();
+ }
+
+ spin_lock(&ng->lock);
+ p->numa_group = ng;
+ list_add_rcu(&p->ng_entry, &ng->tasks);
+ spin_unlock(&ng->lock);
+
+ sched_setnode(p, ng->numa_entity.node);
+
+ if (old_ng)
+ ng_put(old_ng);
+
+ return ng_id;
+}
+
+static struct task_struct *find_get_task(pid_t tid)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ if (!tid)
+ p = current;
+ else
+ p = find_task_by_vpid(tid);
+ if (p)
+ get_task_struct(p);
+ rcu_read_unlock();
+
+ if (!p)
+ return ERR_PTR(-ESRCH);
+
+ return p;
+}
+
+/*
+ * Bind a thread to a numa group or query its binding or create a new group.
+ *
+ * sys_numa_tbind(tid, -1, 0); // create new group, return new ng_id
+ * sys_numa_tbind(tid, -2, 0); // returns existing ng_id
+ * sys_numa_tbind(tid, ng_id, 0); // set ng_id
+ *
+ * Returns:
+ * -ESRCH tid->task resolution failed
+ * -EINVAL task didn't have a ng_id, flags was wrong
+ * -EPERM tid isn't in our process
+ *
+ */
+SYSCALL_DEFINE3(numa_tbind, int, tid, int, ng_id, unsigned long, flags)
+{
+ struct task_struct *p = find_get_task(tid);
+ struct numa_group *ng = NULL;
+ int orig_ng_id = ng_id;
+
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ if (flags) {
+ ng_id = -EINVAL;
+ goto out;
+ }
+
+ switch (ng_id) {
+ case MS_ID_GET:
+ ng_id = -EINVAL;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng)
+ ng_id = ng->id;
+ rcu_read_unlock();
+ break;
+
+ case MS_ID_NEW:
+ ng = ng_create(p);
+ if (IS_ERR(ng)) {
+ ng_id = PTR_ERR(ng);
+ break;
+ }
+ ng_id = ng->id;
+ /* fall through */
+
+ default:
+ ng_id = ng_task_assign(p, ng_id);
+ if (ng && orig_ng_id < 0)
+ ng_put(ng);
+ break;
+ }
+
+out:
+ put_task_struct(p);
+ return ng_id;
+}
+
+/*
+ * Bind a memory region to a numa group.
+ *
+ * sys_numa_mbind(addr, len, ng_id, 0);
+ *
+ * create a non-mergable vma over [addr,addr+len) and assign a mpol binding it
+ * to the numa group identified by ng_id.
+ *
+ */
+SYSCALL_DEFINE4(numa_mbind, unsigned long, addr, unsigned long, len,
+ int, ng_id, unsigned long, flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct mempolicy *mpol;
+ struct numa_group *ng;
+ nodemask_t mask;
+ int node, err = 0;
+
+ if (flags)
+ return -EINVAL;
+
+ if (addr & ~PAGE_MASK)
+ return -EINVAL;
+
+ ng = ng_lookup(ng_id, current);
+ if (IS_ERR(ng))
+ return PTR_ERR(ng);
+
+ mask = nodemask_of_node(ng->numa_entity.node);
+ mpol = mpol_new(MPOL_BIND, 0, &mask);
+ if (!mpol) {
+ ng_put(ng);
+ return -ENOMEM;
+ }
+ mpol->flags |= MPOL_MF_LAZY;
+ mpol->numa_group = ng;
+
+ node = dequeue_ne(&mm->numa); // XXX
+
+ down_write(&mm->mmap_sem);
+ err = mpol_do_mbind(addr, len, mpol, MPOL_BIND,
+ &mask, MPOL_MF_MOVE|MPOL_MF_LAZY);
+ up_write(&mm->mmap_sem);
+ mpol_put(mpol);
+ ng_put(ng);
+
+ if (err && node != -1)
+ enqueue_ne(&mm->numa, node); // XXX
+
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+
+asmlinkage long compat_sys_numa_mbind(compat_ulong_t addr, compat_ulong_t len,
+ compat_int_t ng_id, compat_ulong_t flags)
+{
+ return sys_numa_mbind(addr, len, ng_id, flags);
+}
+
+asmlinkage long compat_sys_numa_tbind(compat_int_t tid, compat_int_t ng_id,
+ compat_ulong_t flags)
+{
+ return sys_numa_tbind(tid, ng_id, flags);
+}
+
+#endif /* CONFIG_COMPAT */
+
+static __init int numa_group_init(void)
+{
+ init_srcu_struct(&ng_srcu);
+ return 0;
+}
+early_initcall(numa_group_init);
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -103,6 +103,10 @@ cond_syscall(sys_set_mempolicy);
cond_syscall(compat_sys_mbind);
cond_syscall(compat_sys_get_mempolicy);
cond_syscall(compat_sys_set_mempolicy);
+cond_syscall(sys_numa_mbind);
+cond_syscall(compat_sys_numa_mbind);
+cond_syscall(sys_numa_tbind);
+cond_syscall(compat_sys_numa_tbind);
cond_syscall(sys_add_key);
cond_syscall(sys_request_key);
cond_syscall(sys_keyctl);
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -287,12 +287,13 @@ struct mempolicy *mpol_new(unsigned shor
}
} else if (nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
- policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+ policy = kmem_cache_alloc(policy_cache, GFP_KERNEL | __GFP_ZERO);
if (!policy)
return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1);
policy->mode = mode;
policy->flags = flags;
+ INIT_LIST_HEAD(&policy->ng_entry);
return policy;
}
@@ -607,6 +608,9 @@ static int policy_vma(struct vm_area_str
if (!err) {
mpol_get(new);
vma->vm_policy = new;
+ numa_vma_link(vma, NULL);
+ if (old)
+ numa_vma_unlink(old->vma);
mpol_put(old);
}
return err;
@@ -1994,11 +1998,13 @@ int vma_dup_policy(struct vm_area_struct
if (IS_ERR(mpol))
return PTR_ERR(mpol);
vma_set_policy(new, mpol);
+ numa_vma_link(new, old);
return 0;
}
void vma_put_policy(struct vm_area_struct *vma)
{
+ numa_vma_unlink(vma);
mpol_put(vma_policy(vma));
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
Paul Turner <pjt@google.com>,
Suresh Siddha <suresh.b.siddha@intel.com>,
Mike Galbraith <efault@gmx.de>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
Dan Smith <danms@us.ibm.com>,
Bharata B Rao <bharata.rao@gmail.com>,
Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Rik van Riel <riel@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 23/26] sched, numa: Introduce sys_numa_{t,m}bind()
Date: Fri, 16 Mar 2012 15:40:51 +0100 [thread overview]
Message-ID: <20120316144241.612966692@chello.nl> (raw)
In-Reply-To: 20120316144028.036474157@chello.nl
[-- Attachment #1: numa-foo-syscall.patch --]
[-- Type: text/plain, Size: 19723 bytes --]
Now that we have a NUMA process scheduler, provide a syscall interface
for finer granularity NUMA balancing. In particular this allows
setting up NUMA groups of threads and vmas within a process.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/syscalls/syscall_32.tbl | 2
arch/x86/syscalls/syscall_64.tbl | 2
include/asm-generic/unistd.h | 6
include/linux/mempolicy.h | 35 ++
include/linux/sched.h | 2
include/linux/syscalls.h | 3
kernel/exit.c | 1
kernel/sched/numa.c | 582 ++++++++++++++++++++++++++++++++++++++-
kernel/sys_ni.c | 4
mm/mempolicy.c | 8
10 files changed, 639 insertions(+), 6 deletions(-)
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -355,3 +355,5 @@
346 i386 setns sys_setns
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+349 i386 numa_mbind sys_numa_mbind compat_sys_numa_mbind
+350 i386 numa_tbind sys_numa_tbind compat_sys_numa_tbind
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -318,6 +318,8 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
+312 64 numa_mbind sys_numa_mbind
+313 64 numa_tbind sys_numa_tbind
#
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -691,9 +691,13 @@ __SC_COMP(__NR_process_vm_readv, sys_pro
#define __NR_process_vm_writev 271
__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
compat_sys_process_vm_writev)
+#define __NR_numa_mbind 272
+__SC_COMP(__NR_numa_mbind, sys_numa_mbind, compat_sys_ms_mbind)
+#define __NR_numa_tbind 273
+__SC_COMP(__NR_numa_tbind, sys_numa_tbind, compat_sys_ms_tbind)
#undef __NR_syscalls
-#define __NR_syscalls 272
+#define __NR_syscalls 274
/*
* All syscalls below here should go away really,
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -78,6 +78,8 @@ enum mpol_rebind_step {
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <linux/migrate.h>
+#include <linux/list.h>
+#include <linux/sched.h>
struct mm_struct;
@@ -109,6 +111,10 @@ struct mempolicy {
atomic_t refcnt;
unsigned short mode; /* See MPOL_* above */
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
+ struct numa_group *numa_group;
+ struct list_head ng_entry;
+ struct vm_area_struct *vma;
+ struct rcu_head rcu;
union {
short preferred_node; /* preferred */
nodemask_t nodes; /* interleave/bind */
@@ -396,6 +402,35 @@ static inline int mpol_to_str(char *buff
}
#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_NUMA
+
+extern void __numa_task_exit(struct task_struct *);
+extern void numa_vma_link(struct vm_area_struct *, struct vm_area_struct *);
+extern void numa_vma_unlink(struct vm_area_struct *);
+extern void __numa_add_vma_counter(struct vm_area_struct *, int, long);
+
+static inline
+void numa_add_vma_counter(struct vm_area_struct *vma, int member, long value)
+{
+ if (vma->vm_policy && vma->vm_policy->numa_group)
+ __numa_add_vma_counter(vma, member, value);
+}
+
+static inline void numa_task_exit(struct task_struct *p)
+{
+ if (p->numa_group)
+ __numa_task_exit(p);
+}
+
+#else /* CONFIG_NUMA */
+
+static inline void numa_task_exit(struct task_struct *) { }
+static inline void numa_vma_link(struct vm_area_struct *, struct vm_area_struct *) { }
+static inline void numa_vma_unlink(struct vm_area_struct *) { }
+
+#endif /* CONFIG_NUMA */
+
#endif /* __KERNEL__ */
#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1548,6 +1548,8 @@ struct task_struct {
short il_next;
short pref_node_fork;
int node;
+ struct numa_group *numa_group;
+ struct list_head ng_entry;
#endif
struct rcu_head rcu;
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -856,5 +856,8 @@ asmlinkage long sys_process_vm_writev(pi
const struct iovec __user *rvec,
unsigned long riovcnt,
unsigned long flags);
+asmlinkage long sys_numa_mbind(unsigned long addr, unsigned long len,
+ int ng_id, unsigned long flags);
+asmlinkage long sys_numa_tbind(int tid, int ng_id, unsigned long flags);
#endif
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1010,6 +1010,7 @@ void do_exit(long code)
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
task_unlock(tsk);
+ numa_task_exit(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
--- a/kernel/sched/numa.c
+++ b/kernel/sched/numa.c
@@ -14,6 +14,7 @@
#include <linux/mempolicy.h>
#include <linux/kthread.h>
+#include <linux/compat.h>
#include "sched.h"
@@ -302,17 +303,20 @@ static void enqueue_ne(struct numa_entit
spin_unlock(&nq->lock);
}
-static void dequeue_ne(struct numa_entity *ne)
+static int dequeue_ne(struct numa_entity *ne)
{
struct node_queue *nq;
+ int node = ne->node; // XXX serialization
- if (ne->node == -1) // XXX serialization
- return;
+ if (node == -1) // XXX serialization
+ return node;
nq = lock_ne_nq(ne);
ne->node = -1;
__dequeue_ne(nq, ne);
spin_unlock(&nq->lock);
+
+ return node;
}
static void init_ne(struct numa_entity *ne, const struct numa_ops *nops)
@@ -400,6 +404,8 @@ static int find_idlest_node(int this_nod
void select_task_node(struct task_struct *p, struct mm_struct *mm, int sd_flags)
{
+ int node;
+
if (!sched_feat(NUMA_SELECT)) {
p->node = -1;
return;
@@ -424,7 +430,11 @@ void select_task_node(struct task_struct
}
}
- enqueue_ne(&mm->numa, find_idlest_node(p->node));
+ node = find_idlest_node(p->node);
+ if (node == -1)
+ node = numa_node_id();
+
+ enqueue_ne(&mm->numa, node);
}
__init void init_sched_numa(void)
@@ -804,3 +814,567 @@ static __init int numa_init(void)
return 0;
}
early_initcall(numa_init);
+
+
+/*
+ * numa_group bits
+ */
+
+#include <linux/idr.h>
+#include <linux/srcu.h>
+#include <linux/syscalls.h>
+
+struct numa_group {
+ spinlock_t lock;
+ int id;
+
+ struct mm_rss_stat rss;
+
+ struct list_head tasks;
+ struct list_head vmas;
+
+ const struct cred *cred;
+ atomic_t ref;
+
+ struct numa_entity numa_entity;
+
+ struct rcu_head rcu;
+};
+
+static struct srcu_struct ng_srcu;
+
+static DEFINE_MUTEX(numa_group_idr_lock);
+static DEFINE_IDR(numa_group_idr);
+
+static inline struct numa_group *ne_ng(struct numa_entity *ne)
+{
+ return container_of(ne, struct numa_group, numa_entity);
+}
+
+static inline bool ng_tryget(struct numa_group *ng)
+{
+ return atomic_inc_not_zero(&ng->ref);
+}
+
+static inline void ng_get(struct numa_group *ng)
+{
+ atomic_inc(&ng->ref);
+}
+
+static void __ng_put_rcu(struct rcu_head *rcu)
+{
+ struct numa_group *ng = container_of(rcu, struct numa_group, rcu);
+
+ put_cred(ng->cred);
+ kfree(ng);
+}
+
+static void __ng_put(struct numa_group *ng)
+{
+ mutex_lock(&numa_group_idr_lock);
+ idr_remove(&numa_group_idr, ng->id);
+ mutex_unlock(&numa_group_idr_lock);
+
+ WARN_ON(!list_empty(&ng->tasks));
+ WARN_ON(!list_empty(&ng->vmas));
+
+ dequeue_ne(&ng->numa_entity);
+
+ call_rcu(&ng->rcu, __ng_put_rcu);
+}
+
+static inline void ng_put(struct numa_group *ng)
+{
+ if (atomic_dec_and_test(&ng->ref))
+ __ng_put(ng);
+}
+
+/*
+ * numa_ops
+ */
+
+static unsigned long numa_group_mem_load(struct numa_entity *ne)
+{
+ struct numa_group *ng = ne_ng(ne);
+
+ return atomic_long_read(&ng->rss.count[MM_ANONPAGES]);
+}
+
+static unsigned long numa_group_cpu_load(struct numa_entity *ne)
+{
+ struct numa_group *ng = ne_ng(ne);
+ unsigned long load = 0;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &ng->tasks, ng_entry)
+ load += p->numa_contrib;
+ rcu_read_unlock();
+
+ return load;
+}
+
+static void numa_group_mem_migrate(struct numa_entity *ne, int node)
+{
+ struct numa_group *ng = ne_ng(ne);
+ struct vm_area_struct *vma;
+ struct mempolicy *mpol;
+ struct mm_struct *mm;
+ int idx;
+
+ /*
+ * Horrid code this..
+ *
+ * The main problem is that ng->lock nests inside mmap_sem [
+ * numa_vma_{,un}link() gets called under mmap_sem ]. But here we need
+ * to iterate that list and acquire mmap_sem for each entry.
+ *
+ * We get here without serialization. We abuse numa_vma_unlink() to add
+ * an SRCU delayed reference count to the mpols. This allows us to do
+ * lockless iteration of the list.
+ *
+ * Once we have an mpol we need to acquire mmap_sem, this too isn't
+ * straight fwd, take ng->lock to pin mpol->vma due to its
+ * serialization against numa_vma_unlink(). While that vma pointer is
+ * stable the vma->vm_mm pointer must be good too, so acquire an extra
+ * reference to the mm.
+ *
+ * This reference keeps mm stable so we can drop ng->lock and acquire
+ * mmap_sem. After which mpol->vma is stable again since the memory map
+ * is stable. So verify ->vma is still good (numa_vma_unlink clears it)
+ * and the mm is still the same (paranoia, can't see how that could
+ * happen).
+ */
+
+ idx = srcu_read_lock(&ng_srcu);
+ list_for_each_entry_rcu(mpol, &ng->vmas, ng_entry) {
+ nodemask_t mask = nodemask_of_node(node);
+
+ spin_lock(&ng->lock); /* pin mpol->vma */
+ vma = mpol->vma;
+ if (!vma) {
+ spin_unlock(&ng->lock);
+ continue;
+ }
+ mm = vma->vm_mm;
+ atomic_inc(&mm->mm_users); /* pin mm */
+ spin_unlock(&ng->lock);
+
+ down_read(&mm->mmap_sem);
+ vma = mpol->vma;
+ if (!vma)
+ goto unlock_next;
+
+ mpol_rebind_policy(mpol, &mask, MPOL_REBIND_ONCE);
+ lazy_migrate_vma(vma, node);
+unlock_next:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
+ srcu_read_unlock(&ng_srcu, idx);
+}
+
+static void numa_group_cpu_migrate(struct numa_entity *ne, int node)
+{
+ struct numa_group *ng = ne_ng(ne);
+ struct task_struct *p;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &ng->tasks, ng_entry)
+ sched_setnode(p, node);
+ rcu_read_unlock();
+}
+
+static bool numa_group_tryget(struct numa_entity *ne)
+{
+ /*
+ * See process_tryget(), similar but against ng_put().
+ */
+ return ng_tryget(ne_ng(ne));
+}
+
+static void numa_group_put(struct numa_entity *ne)
+{
+ ng_put(ne_ng(ne));
+}
+
+static const struct numa_ops numa_group_ops = {
+ .mem_load = numa_group_mem_load,
+ .cpu_load = numa_group_cpu_load,
+
+ .mem_migrate = numa_group_mem_migrate,
+ .cpu_migrate = numa_group_cpu_migrate,
+
+ .tryget = numa_group_tryget,
+ .put = numa_group_put,
+};
+
+void __numa_task_exit(struct task_struct *p)
+{
+ struct numa_group *ng = p->numa_group;
+
+ spin_lock(&ng->lock);
+ list_del_rcu(&p->ng_entry);
+ spin_unlock(&ng->lock);
+
+ p->numa_group = NULL; // XXX serialization ?!
+
+ ng_put(ng);
+}
+
+/*
+ * memory (vma) accounting/tracking
+ *
+ * We assume a 1:1 relation between vmas and mpols and keep a list of mpols in
+ * the numa_group, and a vma backlink in the mpol.
+ */
+
+void numa_vma_link(struct vm_area_struct *new, struct vm_area_struct *old)
+{
+ struct numa_group *ng = NULL;
+
+ if (old && old->vm_policy)
+ ng = old->vm_policy->numa_group;
+
+ if (!ng && new->vm_policy)
+ ng = new->vm_policy->numa_group;
+
+ if (!ng)
+ return;
+
+ ng_get(ng);
+ new->vm_policy->numa_group = ng;
+ new->vm_policy->vma = new;
+
+ spin_lock(&ng->lock);
+ list_add_rcu(&new->vm_policy->ng_entry, &ng->vmas);
+ spin_unlock(&ng->lock);
+}
+
+void __numa_add_vma_counter(struct vm_area_struct *vma, int member, long value)
+{
+ /*
+ * Since the caller passes the vma argument, the caller is responsible
+ * for making sure the vma is stable, hence the ->vm_policy->numa_group
+ * dereference is safe. (caller usually has vma->vm_mm->mmap_sem for
+ * reading).
+ */
+ atomic_long_add(value, &vma->vm_policy->numa_group->rss.count[member]);
+}
+
+static void __mpol_put_rcu(struct rcu_head *rcu)
+{
+ struct mempolicy *mpol = container_of(rcu, struct mempolicy, rcu);
+ mpol_put(mpol);
+}
+
+void numa_vma_unlink(struct vm_area_struct *vma)
+{
+ struct mempolicy *mpol;
+ struct numa_group *ng;
+
+ if (!vma)
+ return;
+
+ mpol = vma->vm_policy;
+ if (!mpol)
+ return;
+
+ ng = mpol->numa_group;
+ if (!ng)
+ return;
+
+ spin_lock(&ng->lock);
+ list_del_rcu(&mpol->ng_entry);
+ /*
+ * Rediculous, see numa_group_mem_migrate.
+ */
+ mpol->vma = NULL;
+ mpol_get(mpol);
+ call_srcu(&ng_srcu, &mpol->rcu, __mpol_put_rcu);
+ spin_unlock(&ng->lock);
+
+ ng_put(ng);
+}
+
+/*
+ * syscall bits
+ */
+
+#define MS_ID_GET -2
+#define MS_ID_NEW -1
+
+static struct numa_group *ng_create(struct task_struct *p)
+{
+ struct numa_group *ng;
+ int node, err;
+
+ ng = kzalloc(sizeof(*ng), GFP_KERNEL);
+ if (!ng)
+ goto fail;
+
+ err = idr_pre_get(&numa_group_idr, GFP_KERNEL);
+ if (!err)
+ goto fail_alloc;
+
+ mutex_lock(&numa_group_idr_lock);
+ err = idr_get_new(&numa_group_idr, ng, &ng->id);
+ mutex_unlock(&numa_group_idr_lock);
+
+ if (err)
+ goto fail_alloc;
+
+ spin_lock_init(&ng->lock);
+ atomic_set(&ng->ref, 1);
+ ng->cred = get_task_cred(p);
+ INIT_LIST_HEAD(&ng->tasks);
+ INIT_LIST_HEAD(&ng->vmas);
+ init_ne(&ng->numa_entity, &numa_group_ops);
+
+ dequeue_ne(&p->mm->numa); // XXX
+
+ node = find_idlest_node(tsk_home_node(p));
+ enqueue_ne(&ng->numa_entity, node);
+
+ return ng;
+
+fail_alloc:
+ kfree(ng);
+fail:
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * More or less equal to ptrace_may_access(); XXX
+ */
+static int ng_allowed(struct numa_group *ng, struct task_struct *p)
+{
+ const struct cred *cred = ng->cred, *tcred;
+
+ rcu_read_lock();
+ tcred = __task_cred(p);
+ if (cred->user->user_ns == tcred->user->user_ns &&
+ (cred->uid == tcred->euid &&
+ cred->uid == tcred->suid &&
+ cred->uid == tcred->uid &&
+ cred->gid == tcred->egid &&
+ cred->gid == tcred->sgid &&
+ cred->gid == tcred->gid))
+ goto ok;
+ if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+ goto ok;
+ rcu_read_unlock();
+ return -EPERM;
+
+ok:
+ rcu_read_unlock();
+ return 0;
+}
+
+static struct numa_group *ng_lookup(int ng_id, struct task_struct *p)
+{
+ struct numa_group *ng;
+
+ rcu_read_lock();
+again:
+ ng = idr_find(&numa_group_idr, ng_id);
+ if (!ng) {
+ rcu_read_unlock();
+ return ERR_PTR(-EINVAL);
+ }
+ if (ng_allowed(ng, p)) {
+ rcu_read_unlock();
+ return ERR_PTR(-EPERM);
+ }
+ if (!ng_tryget(ng))
+ goto again;
+ rcu_read_unlock();
+
+ return ng;
+}
+
+static int ng_task_assign(struct task_struct *p, int ng_id)
+{
+ struct numa_group *old_ng, *ng;
+
+ ng = ng_lookup(ng_id, p);
+ if (IS_ERR(ng))
+ return PTR_ERR(ng);
+
+ old_ng = p->numa_group; // XXX racy
+ if (old_ng) {
+ spin_lock(&old_ng->lock);
+ list_del_rcu(&p->ng_entry);
+ spin_unlock(&old_ng->lock);
+
+ /*
+ * We have to wait for the old ng_entry users to go away before
+ * we can re-use the link entry for the new list.
+ */
+ synchronize_rcu();
+ }
+
+ spin_lock(&ng->lock);
+ p->numa_group = ng;
+ list_add_rcu(&p->ng_entry, &ng->tasks);
+ spin_unlock(&ng->lock);
+
+ sched_setnode(p, ng->numa_entity.node);
+
+ if (old_ng)
+ ng_put(old_ng);
+
+ return ng_id;
+}
+
+static struct task_struct *find_get_task(pid_t tid)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ if (!tid)
+ p = current;
+ else
+ p = find_task_by_vpid(tid);
+ if (p)
+ get_task_struct(p);
+ rcu_read_unlock();
+
+ if (!p)
+ return ERR_PTR(-ESRCH);
+
+ return p;
+}
+
+/*
+ * Bind a thread to a numa group or query its binding or create a new group.
+ *
+ * sys_numa_tbind(tid, -1, 0); // create new group, return new ng_id
+ * sys_numa_tbind(tid, -2, 0); // returns existing ng_id
+ * sys_numa_tbind(tid, ng_id, 0); // set ng_id
+ *
+ * Returns:
+ * -ESRCH tid->task resolution failed
+ * -EINVAL task didn't have a ng_id, flags was wrong
+ * -EPERM tid isn't in our process
+ *
+ */
+SYSCALL_DEFINE3(numa_tbind, int, tid, int, ng_id, unsigned long, flags)
+{
+ struct task_struct *p = find_get_task(tid);
+ struct numa_group *ng = NULL;
+ int orig_ng_id = ng_id;
+
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ if (flags) {
+ ng_id = -EINVAL;
+ goto out;
+ }
+
+ switch (ng_id) {
+ case MS_ID_GET:
+ ng_id = -EINVAL;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng)
+ ng_id = ng->id;
+ rcu_read_unlock();
+ break;
+
+ case MS_ID_NEW:
+ ng = ng_create(p);
+ if (IS_ERR(ng)) {
+ ng_id = PTR_ERR(ng);
+ break;
+ }
+ ng_id = ng->id;
+ /* fall through */
+
+ default:
+ ng_id = ng_task_assign(p, ng_id);
+ if (ng && orig_ng_id < 0)
+ ng_put(ng);
+ break;
+ }
+
+out:
+ put_task_struct(p);
+ return ng_id;
+}
+
+/*
+ * Bind a memory region to a numa group.
+ *
+ * sys_numa_mbind(addr, len, ng_id, 0);
+ *
+ * create a non-mergable vma over [addr,addr+len) and assign a mpol binding it
+ * to the numa group identified by ng_id.
+ *
+ */
+SYSCALL_DEFINE4(numa_mbind, unsigned long, addr, unsigned long, len,
+ int, ng_id, unsigned long, flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct mempolicy *mpol;
+ struct numa_group *ng;
+ nodemask_t mask;
+ int node, err = 0;
+
+ if (flags)
+ return -EINVAL;
+
+ if (addr & ~PAGE_MASK)
+ return -EINVAL;
+
+ ng = ng_lookup(ng_id, current);
+ if (IS_ERR(ng))
+ return PTR_ERR(ng);
+
+ mask = nodemask_of_node(ng->numa_entity.node);
+ mpol = mpol_new(MPOL_BIND, 0, &mask);
+ if (!mpol) {
+ ng_put(ng);
+ return -ENOMEM;
+ }
+ mpol->flags |= MPOL_MF_LAZY;
+ mpol->numa_group = ng;
+
+ node = dequeue_ne(&mm->numa); // XXX
+
+ down_write(&mm->mmap_sem);
+ err = mpol_do_mbind(addr, len, mpol, MPOL_BIND,
+ &mask, MPOL_MF_MOVE|MPOL_MF_LAZY);
+ up_write(&mm->mmap_sem);
+ mpol_put(mpol);
+ ng_put(ng);
+
+ if (err && node != -1)
+ enqueue_ne(&mm->numa, node); // XXX
+
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+
+asmlinkage long compat_sys_numa_mbind(compat_ulong_t addr, compat_ulong_t len,
+ compat_int_t ng_id, compat_ulong_t flags)
+{
+ return sys_numa_mbind(addr, len, ng_id, flags);
+}
+
+asmlinkage long compat_sys_numa_tbind(compat_int_t tid, compat_int_t ng_id,
+ compat_ulong_t flags)
+{
+ return sys_numa_tbind(tid, ng_id, flags);
+}
+
+#endif /* CONFIG_COMPAT */
+
+static __init int numa_group_init(void)
+{
+ init_srcu_struct(&ng_srcu);
+ return 0;
+}
+early_initcall(numa_group_init);
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -103,6 +103,10 @@ cond_syscall(sys_set_mempolicy);
cond_syscall(compat_sys_mbind);
cond_syscall(compat_sys_get_mempolicy);
cond_syscall(compat_sys_set_mempolicy);
+cond_syscall(sys_numa_mbind);
+cond_syscall(compat_sys_numa_mbind);
+cond_syscall(sys_numa_tbind);
+cond_syscall(compat_sys_numa_tbind);
cond_syscall(sys_add_key);
cond_syscall(sys_request_key);
cond_syscall(sys_keyctl);
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -287,12 +287,13 @@ struct mempolicy *mpol_new(unsigned shor
}
} else if (nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
- policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+ policy = kmem_cache_alloc(policy_cache, GFP_KERNEL | __GFP_ZERO);
if (!policy)
return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1);
policy->mode = mode;
policy->flags = flags;
+ INIT_LIST_HEAD(&policy->ng_entry);
return policy;
}
@@ -607,6 +608,9 @@ static int policy_vma(struct vm_area_str
if (!err) {
mpol_get(new);
vma->vm_policy = new;
+ numa_vma_link(vma, NULL);
+ if (old)
+ numa_vma_unlink(old->vma);
mpol_put(old);
}
return err;
@@ -1994,11 +1998,13 @@ int vma_dup_policy(struct vm_area_struct
if (IS_ERR(mpol))
return PTR_ERR(mpol);
vma_set_policy(new, mpol);
+ numa_vma_link(new, old);
return 0;
}
void vma_put_policy(struct vm_area_struct *vma)
{
+ numa_vma_unlink(vma);
mpol_put(vma_policy(vma));
}
next prev parent reply other threads:[~2012-03-16 14:53 UTC|newest]
Thread overview: 304+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-16 14:40 [RFC][PATCH 00/26] sched/numa Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 01/26] mm, mpol: Re-implement check_*_range() using walk_page_range() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 02/26] mm, mpol: Remove NUMA_INTERLEAVE_HIT Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-07-06 10:32 ` Johannes Weiner
2012-07-06 10:32 ` Johannes Weiner
2012-07-06 13:46 ` [tip:sched/core] mm: Fix vmstat names-values off-by-one tip-bot for Johannes Weiner
2012-07-06 14:48 ` [RFC][PATCH 02/26] mm, mpol: Remove NUMA_INTERLEAVE_HIT Minchan Kim
2012-07-06 14:48 ` Minchan Kim
2012-07-06 15:02 ` Peter Zijlstra
2012-07-06 15:02 ` Peter Zijlstra
2012-07-06 14:54 ` Kyungmin Park
2012-07-06 14:54 ` Kyungmin Park
2012-07-06 15:00 ` Peter Zijlstra
2012-07-06 15:00 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 03/26] mm, mpol: add MPOL_MF_LAZY Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-23 11:50 ` Mel Gorman
2012-03-23 11:50 ` Mel Gorman
2012-07-06 16:38 ` Rik van Riel
2012-07-06 16:38 ` Rik van Riel
2012-07-06 20:04 ` Lee Schermerhorn
2012-07-06 20:04 ` Lee Schermerhorn
2012-07-06 20:27 ` Rik van Riel
2012-07-06 20:27 ` Rik van Riel
2012-07-09 11:48 ` Peter Zijlstra
2012-07-09 11:48 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 04/26] mm, mpol: add MPOL_MF_NOOP Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-07-06 18:40 ` Rik van Riel
2012-07-06 18:40 ` Rik van Riel
2012-03-16 14:40 ` [RFC][PATCH 05/26] mm, mpol: Check for misplaced page Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 06/26] mm: Migrate " Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-04-03 17:32 ` Dan Smith
2012-04-03 17:32 ` Dan Smith
2012-03-16 14:40 ` [RFC][PATCH 07/26] mm: Handle misplaced anon pages Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 08/26] mm, mpol: Simplify do_mbind() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 09/26] sched, mm: Introduce tsk_home_node() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 10/26] mm, mpol: Make mempolicy home-node aware Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 18:34 ` Christoph Lameter
2012-03-16 18:34 ` Christoph Lameter
2012-03-16 21:12 ` Peter Zijlstra
2012-03-16 21:12 ` Peter Zijlstra
2012-03-19 13:53 ` Christoph Lameter
2012-03-19 13:53 ` Christoph Lameter
2012-03-19 14:05 ` Peter Zijlstra
2012-03-19 14:05 ` Peter Zijlstra
2012-03-19 15:16 ` Christoph Lameter
2012-03-19 15:16 ` Christoph Lameter
2012-03-19 15:23 ` Peter Zijlstra
2012-03-19 15:23 ` Peter Zijlstra
2012-03-19 15:31 ` Christoph Lameter
2012-03-19 15:31 ` Christoph Lameter
2012-03-19 17:09 ` Peter Zijlstra
2012-03-19 17:09 ` Peter Zijlstra
2012-03-19 17:28 ` Peter Zijlstra
2012-03-19 17:28 ` Peter Zijlstra
2012-03-19 19:06 ` Christoph Lameter
2012-03-19 19:06 ` Christoph Lameter
2012-03-19 20:28 ` Lee Schermerhorn
2012-03-19 20:28 ` Lee Schermerhorn
2012-03-19 21:21 ` Peter Zijlstra
2012-03-19 21:21 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 11/26] mm, mpol: Lazy migrate a process/vma Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 12/26] sched, mm: sched_{fork,exec} node assignment Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-06-15 18:16 ` Tony Luck
2012-06-15 18:16 ` Tony Luck
2012-06-20 19:12 ` [PATCH] sched: Fix build problems when CONFIG_NUMA=y and CONFIG_SMP=n Luck, Tony
2012-06-20 19:12 ` Luck, Tony
2012-03-16 14:40 ` [RFC][PATCH 13/26] sched: Implement home-node awareness Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 14/26] sched, numa: Numa balancer Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-07-07 18:26 ` Rik van Riel
2012-07-07 18:26 ` Rik van Riel
2012-07-09 12:05 ` Peter Zijlstra
2012-07-09 12:05 ` Peter Zijlstra
2012-07-09 12:23 ` Peter Zijlstra
2012-07-09 12:23 ` Peter Zijlstra
2012-07-09 12:40 ` Peter Zijlstra
2012-07-09 12:40 ` Peter Zijlstra
2012-07-09 14:50 ` Rik van Riel
2012-07-09 14:50 ` Rik van Riel
2012-07-08 18:35 ` Rik van Riel
2012-07-08 18:35 ` Rik van Riel
2012-07-09 12:25 ` Peter Zijlstra
2012-07-09 12:25 ` Peter Zijlstra
2012-07-09 14:54 ` Rik van Riel
2012-07-09 14:54 ` Rik van Riel
2012-07-12 22:02 ` Rik van Riel
2012-07-12 22:02 ` Rik van Riel
2012-07-13 14:45 ` Don Morris
2012-07-13 14:45 ` Don Morris
2012-07-14 16:20 ` Rik van Riel
2012-07-14 16:20 ` Rik van Riel
2012-03-16 14:40 ` [RFC][PATCH 15/26] sched, numa: Implement hotplug hooks Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-19 12:16 ` Srivatsa S. Bhat
2012-03-19 12:16 ` Srivatsa S. Bhat
2012-03-19 12:19 ` Peter Zijlstra
2012-03-19 12:19 ` Peter Zijlstra
2012-03-19 12:27 ` Srivatsa S. Bhat
2012-03-19 12:27 ` Srivatsa S. Bhat
2012-03-16 14:40 ` [RFC][PATCH 16/26] sched, numa: Abstract the numa_entity Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 17/26] srcu: revert1 Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 18/26] srcu: revert2 Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 19/26] srcu: Implement call_srcu() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 20/26] mm, mpol: Introduce vma_dup_policy() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 21/26] mm, mpol: Introduce vma_put_policy() Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 22/26] mm, mpol: Split and explose some mempolicy functions Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra [this message]
2012-03-16 14:40 ` [RFC][PATCH 23/26] sched, numa: Introduce sys_numa_{t,m}bind() Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 24/26] mm, mpol: Implement numa_group RSS accounting Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 25/26] sched, numa: Only migrate long-running entities Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-07-08 18:34 ` Rik van Riel
2012-07-08 18:34 ` Rik van Riel
2012-07-09 12:26 ` Peter Zijlstra
2012-07-09 12:26 ` Peter Zijlstra
2012-07-09 14:53 ` Rik van Riel
2012-07-09 14:53 ` Rik van Riel
2012-07-09 14:55 ` Peter Zijlstra
2012-07-09 14:55 ` Peter Zijlstra
2012-03-16 14:40 ` [RFC][PATCH 26/26] sched, numa: A few debug bits Peter Zijlstra
2012-03-16 14:40 ` Peter Zijlstra
2012-03-16 18:25 ` [RFC] AutoNUMA alpha6 Andrea Arcangeli
2012-03-16 18:25 ` Andrea Arcangeli
2012-03-19 18:47 ` Peter Zijlstra
2012-03-19 18:47 ` Peter Zijlstra
2012-03-19 19:02 ` Andrea Arcangeli
2012-03-19 19:02 ` Andrea Arcangeli
2012-03-20 23:41 ` Dan Smith
2012-03-20 23:41 ` Dan Smith
2012-03-21 1:00 ` Andrea Arcangeli
2012-03-21 1:00 ` Andrea Arcangeli
2012-03-21 2:12 ` Andrea Arcangeli
2012-03-21 2:12 ` Andrea Arcangeli
2012-03-21 4:01 ` Dan Smith
2012-03-21 4:01 ` Dan Smith
2012-03-21 12:49 ` Andrea Arcangeli
2012-03-21 12:49 ` Andrea Arcangeli
2012-03-21 22:05 ` Dan Smith
2012-03-21 22:05 ` Dan Smith
2012-03-21 22:52 ` Andrea Arcangeli
2012-03-21 22:52 ` Andrea Arcangeli
2012-03-21 23:13 ` Dan Smith
2012-03-21 23:13 ` Dan Smith
2012-03-21 23:41 ` Andrea Arcangeli
2012-03-21 23:41 ` Andrea Arcangeli
2012-03-22 0:17 ` Andrea Arcangeli
2012-03-22 0:17 ` Andrea Arcangeli
2012-03-22 13:58 ` Dan Smith
2012-03-22 13:58 ` Dan Smith
2012-03-22 14:27 ` Andrea Arcangeli
2012-03-22 18:49 ` Andrea Arcangeli
2012-03-22 18:49 ` Andrea Arcangeli
2012-03-22 18:56 ` Dan Smith
2012-03-22 18:56 ` Dan Smith
2012-03-22 19:11 ` Andrea Arcangeli
2012-03-22 19:11 ` Andrea Arcangeli
2012-03-23 14:15 ` Andrew Theurer
2012-03-23 14:15 ` Andrew Theurer
2012-03-23 16:01 ` Andrea Arcangeli
2012-03-23 16:01 ` Andrea Arcangeli
2012-03-25 13:30 ` Andrea Arcangeli
2012-03-25 13:30 ` Andrea Arcangeli
2012-03-21 7:12 ` Ingo Molnar
2012-03-21 7:12 ` Ingo Molnar
2012-03-21 12:08 ` Andrea Arcangeli
2012-03-21 12:08 ` Andrea Arcangeli
2012-03-21 7:53 ` Ingo Molnar
2012-03-21 7:53 ` Ingo Molnar
2012-03-21 12:17 ` Andrea Arcangeli
2012-03-21 12:17 ` Andrea Arcangeli
2012-03-19 9:57 ` [RFC][PATCH 00/26] sched/numa Avi Kivity
2012-03-19 9:57 ` Avi Kivity
2012-03-19 11:12 ` Peter Zijlstra
2012-03-19 11:12 ` Peter Zijlstra
2012-03-19 11:30 ` Peter Zijlstra
2012-03-19 11:30 ` Peter Zijlstra
2012-03-19 11:39 ` Peter Zijlstra
2012-03-19 11:39 ` Peter Zijlstra
2012-03-19 11:42 ` Avi Kivity
2012-03-19 11:42 ` Avi Kivity
2012-03-19 11:59 ` Peter Zijlstra
2012-03-19 11:59 ` Peter Zijlstra
2012-03-19 12:07 ` Avi Kivity
2012-03-19 12:07 ` Avi Kivity
2012-03-19 12:09 ` Peter Zijlstra
2012-03-19 12:09 ` Peter Zijlstra
2012-03-19 12:16 ` Avi Kivity
2012-03-19 12:16 ` Avi Kivity
2012-03-19 20:03 ` Peter Zijlstra
2012-03-19 20:03 ` Peter Zijlstra
2012-03-20 10:18 ` Avi Kivity
2012-03-20 10:18 ` Avi Kivity
2012-03-20 10:48 ` Peter Zijlstra
2012-03-20 10:48 ` Peter Zijlstra
2012-03-20 10:52 ` Avi Kivity
2012-03-20 10:52 ` Avi Kivity
2012-03-20 11:07 ` Peter Zijlstra
2012-03-20 11:07 ` Peter Zijlstra
2012-03-20 11:48 ` Avi Kivity
2012-03-20 11:48 ` Avi Kivity
2012-03-19 12:20 ` Peter Zijlstra
2012-03-19 12:20 ` Peter Zijlstra
2012-03-19 12:24 ` Avi Kivity
2012-03-19 12:24 ` Avi Kivity
2012-03-19 15:44 ` Avi Kivity
2012-03-19 15:44 ` Avi Kivity
2012-03-19 13:40 ` Andrea Arcangeli
2012-03-19 13:40 ` Andrea Arcangeli
2012-03-19 20:06 ` Peter Zijlstra
2012-03-19 20:06 ` Peter Zijlstra
2012-03-19 13:04 ` Andrea Arcangeli
2012-03-19 13:04 ` Andrea Arcangeli
2012-03-19 13:26 ` Peter Zijlstra
2012-03-19 13:26 ` Peter Zijlstra
2012-03-19 13:57 ` Andrea Arcangeli
2012-03-19 13:57 ` Andrea Arcangeli
2012-03-19 14:06 ` Avi Kivity
2012-03-19 14:06 ` Avi Kivity
2012-03-19 14:30 ` Andrea Arcangeli
2012-03-19 14:30 ` Andrea Arcangeli
2012-03-19 18:42 ` Peter Zijlstra
2012-03-19 18:42 ` Peter Zijlstra
2012-03-20 22:18 ` Rik van Riel
2012-03-20 22:18 ` Rik van Riel
2012-03-21 16:50 ` Andrea Arcangeli
2012-03-21 16:50 ` Andrea Arcangeli
2012-04-02 16:34 ` Pekka Enberg
2012-04-02 16:34 ` Pekka Enberg
2012-04-02 16:55 ` Rik van Riel
2012-04-02 16:55 ` Rik van Riel
2012-04-02 16:54 ` Pekka Enberg
2012-04-02 16:54 ` Pekka Enberg
2012-04-02 17:12 ` Pekka Enberg
2012-04-02 17:12 ` Pekka Enberg
2012-04-02 17:23 ` Pekka Enberg
2012-04-02 17:23 ` Pekka Enberg
2012-03-19 14:07 ` Peter Zijlstra
2012-03-19 14:07 ` Peter Zijlstra
2012-03-19 14:34 ` Andrea Arcangeli
2012-03-19 14:34 ` Andrea Arcangeli
2012-03-19 18:41 ` Peter Zijlstra
2012-03-19 18:41 ` Peter Zijlstra
2012-03-19 19:13 ` Peter Zijlstra
2012-03-19 19:13 ` Peter Zijlstra
2012-03-19 14:07 ` Andrea Arcangeli
2012-03-19 14:07 ` Andrea Arcangeli
2012-03-19 19:05 ` Peter Zijlstra
2012-03-19 19:05 ` Peter Zijlstra
2012-03-19 13:26 ` Peter Zijlstra
2012-03-19 13:26 ` Peter Zijlstra
2012-03-19 14:16 ` Andrea Arcangeli
2012-03-19 14:16 ` Andrea Arcangeli
2012-03-19 13:29 ` Peter Zijlstra
2012-03-19 13:29 ` Peter Zijlstra
2012-03-19 14:19 ` Andrea Arcangeli
2012-03-19 14:19 ` Andrea Arcangeli
2012-03-19 13:39 ` Peter Zijlstra
2012-03-19 13:39 ` Peter Zijlstra
2012-03-19 14:20 ` Andrea Arcangeli
2012-03-19 14:20 ` Andrea Arcangeli
2012-03-19 20:17 ` Christoph Lameter
2012-03-19 20:17 ` Christoph Lameter
2012-03-19 20:28 ` Ingo Molnar
2012-03-19 20:28 ` Ingo Molnar
2012-03-19 20:43 ` Christoph Lameter
2012-03-19 20:43 ` Christoph Lameter
2012-03-19 21:34 ` Ingo Molnar
2012-03-19 21:34 ` Ingo Molnar
2012-03-20 0:05 ` Linus Torvalds
2012-03-20 0:05 ` Linus Torvalds
2012-03-20 7:31 ` Ingo Molnar
2012-03-20 7:31 ` Ingo Molnar
2012-03-21 22:53 ` Nish Aravamudan
2012-03-21 22:53 ` Nish Aravamudan
2012-03-22 9:45 ` Peter Zijlstra
2012-03-22 9:45 ` Peter Zijlstra
2012-03-22 10:34 ` Ingo Molnar
2012-03-22 10:34 ` Ingo Molnar
2012-03-24 1:41 ` Nish Aravamudan
2012-03-24 1:41 ` Nish Aravamudan
2012-03-26 11:42 ` Peter Zijlstra
2012-03-26 11:42 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120316144241.612966692@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=Lee.Schermerhorn@hp.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=bharata.rao@gmail.com \
--cc=danms@us.ibm.com \
--cc=efault@gmx.de \
--cc=hannes@cmpxchg.org \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=paulmck@linux.vnet.ibm.com \
--cc=pjt@google.com \
--cc=riel@redhat.com \
--cc=suresh.b.siddha@intel.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.