* [patch] NUMA API for 2.5.12 (1/4)
@ 2002-05-02 22:49 Matthew Dobson
0 siblings, 0 replies; 2+ messages in thread
From: Matthew Dobson @ 2002-05-02 22:49 UTC (permalink / raw)
To: linux-kernel, torvalds; +Cc: mjbligh, lse-tech, rml, efocht
[-- Attachment #1: Type: text/plain, Size: 899 bytes --]
Ok all,
I'm going to go ahead and assume (hope) that the no response on the last
posting was because the patch was really large. We'll try this again with 4
smaller patches and see what happens.
This patch implements the NUMA API specified at:
http://lse.sourceforge.net/numa/numa_api.html for the 2.5.12 version of the
kernel. The API implements such features as binding processes to CPUs
(similar to Robert Love's recent patch), binding to memory blocks, setting
launch policies for processes, and rudimentary topology features. The patch
is currently used via a prctl() interface with a possible /proc interface in
the future. I also have a syscall version if that is preferred.
Again, this is a slightly cleaned-up, and (more) bite-sized version of the
previous patch sent out...
Enjoy, and please send me any comments on the patch, or the API it implements!
-Matt
colpatch@us.ibm.com
[-- Attachment #2: numa_api-arch_indep-setup-2.5.12.patch --]
[-- Type: text/plain, Size: 6546 bytes --]
diff -Nur linux-2.5.8-vanilla/include/linux/init_task.h linux-2.5.8-api/include/linux/init_task.h
--- linux-2.5.8-vanilla/include/linux/init_task.h Mon Apr 22 17:20:20 2002
+++ linux-2.5.8-api/include/linux/init_task.h Fri Apr 26 15:22:52 2002
@@ -59,6 +59,10 @@
children: LIST_HEAD_INIT(tsk.children), \
sibling: LIST_HEAD_INIT(tsk.sibling), \
thread_group: LIST_HEAD_INIT(tsk.thread_group), \
+ numa_restrict: NEW_NUMA_SET, \
+ numa_binding: NEW_NUMA_SET, \
+ numa_launch_policy: NEW_NUMA_SET, \
+ numa_api_lock: RW_LOCK_UNLOCKED, \
wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
real_timer: { \
function: it_real_fn \
diff -Nur linux-2.5.8-vanilla/include/linux/mmzone.h linux-2.5.8-api/include/linux/mmzone.h
--- linux-2.5.8-vanilla/include/linux/mmzone.h Mon Apr 22 17:13:25 2002
+++ linux-2.5.8-api/include/linux/mmzone.h Fri Apr 26 17:15:28 2002
@@ -136,6 +136,7 @@
unsigned long node_start_mapnr;
unsigned long node_size;
int node_id;
+ int memblk_id; /* A unique ID for each memory block (physical contiguous chunk of memory) */
struct pglist_data *node_next;
} pg_data_t;
@@ -163,14 +164,15 @@
#define NODE_MEM_MAP(nid) mem_map
#define MAX_NR_NODES 1
-#else /* !CONFIG_DISCONTIGMEM */
+#endif /* !CONFIG_DISCONTIGMEM */
-#include <asm/mmzone.h>
+#if defined (CONFIG_DISCONTIGMEM) || defined (CONFIG_NUMA_API)
+#include <asm/mmzone.h>
/* page->zone is currently 8 bits ... */
#define MAX_NR_NODES (255 / MAX_NR_ZONES)
-#endif /* !CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_DISCONTIGMEM || CONFIG_NUMA_API */
#define MAP_ALIGN(x) ((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \
sizeof(mem_map_t) - ((x) % sizeof(mem_map_t))))
diff -Nur linux-2.5.8-vanilla/include/linux/numa.h linux-2.5.8-api/include/linux/numa.h
--- linux-2.5.8-vanilla/include/linux/numa.h Wed Dec 31 16:00:00 1969
+++ linux-2.5.8-api/include/linux/numa.h Mon Apr 29 11:03:20 2002
@@ -0,0 +1,76 @@
+/*
+ * linux/include/linux/numa.h
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _LINUX_NUMA_H_
+#define _LINUX_NUMA_H_
+
+#include <linux/types.h>
+
+#ifdef CONFIG_NUMA
+#define NR_MEMBLKS 32 /* Max number of Memory Blocks */
+#else
+#define NR_MEMBLKS 1
+#endif
+
+typedef unsigned long numa_bitmap_t;
+#define NUMA_BITMAP_NONE (~((numa_bitmap_t) 0))
+
+#define CPU_BIND_STRICT 0
+
+#define MPOL_FIRST 1 /* UNUSED FOR NOW */
+#define MPOL_STRIPE 2 /* UNUSED FOR NOW */
+#define MPOL_RR 4 /* UNUSED FOR NOW */
+#define MPOL_STRICT 8 /* Memory MUST be allocated according to binding */
+#define MPOL_LOOSE 16 /* Memory must try to be allocated according to binding first,
+ and can fall back to restriction if necessary */
+
+
+typedef struct numa_list {
+ numa_bitmap_t list;
+ int behavior;
+} numa_list_t;
+
+typedef struct numa_set {
+ numa_list_t cpus;
+ numa_list_t memblks;
+} numa_set_t;
+
+
+/* Initializes a numa_set_t to be an empty set. */
+#define numa_set_init(x) do { (x)->cpus.list = NUMA_BITMAP_NONE;\
+ (x)->memblks.list = NUMA_BITMAP_NONE;\
+ (x)->cpus.behavior = CPU_BIND_STRICT;\
+ (x)->memblks.behavior = MPOL_STRICT; } while(0)
+
+/* Assignment initializer for a numa_set_t to be an empty set */
+#define NEW_NUMA_SET { {NUMA_BITMAP_NONE, CPU_BIND_STRICT}, \
+ {NUMA_BITMAP_NONE, MPOL_STRICT} }
+
+/* Tests whether a numa_set_t represents an empty restriction (ie: all 1's. All cpus/memblks allowed.) */
+#define null_restrict(x) (((x)->cpus.list == NUMA_BITMAP_NONE) && \
+ ((x)->memblks.list == NUMA_BITMAP_NONE))
+
+#endif /* _LINUX_NUMA_H_ */
diff -Nur linux-2.5.8-vanilla/include/linux/sched.h linux-2.5.8-api/include/linux/sched.h
--- linux-2.5.8-vanilla/include/linux/sched.h Mon Apr 22 17:13:27 2002
+++ linux-2.5.8-api/include/linux/sched.h Fri Apr 26 15:14:15 2002
@@ -28,6 +28,7 @@
#include <linux/securebits.h>
#include <linux/fs_struct.h>
#include <linux/compiler.h>
+#include <linux/numa.h>
struct exec_domain;
@@ -286,6 +287,12 @@
struct task_struct *pidhash_next;
struct task_struct **pidhash_pprev;
+ /* additional NUMA stuff */
+ numa_set_t numa_restrict;
+ numa_set_t numa_binding;
+ numa_set_t numa_launch_policy;
+ rwlock_t numa_api_lock; /* protects the preceding 3 structs */
+
wait_queue_head_t wait_chldexit; /* for wait4() */
struct completion *vfork_done; /* for vfork() */
diff -Nur linux-2.5.8-vanilla/include/linux/smp.h linux-2.5.8-api/include/linux/smp.h
--- linux-2.5.8-vanilla/include/linux/smp.h Mon Apr 22 17:13:25 2002
+++ linux-2.5.8-api/include/linux/smp.h Fri Apr 26 15:14:15 2002
@@ -90,6 +90,7 @@
#define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) ({ 0; })
#define cpu_online_map 1
+#define memblk_online_map 1
static inline void smp_send_reschedule(int cpu) { }
static inline void smp_send_reschedule_all(void) { }
#define __per_cpu_data
diff -Nur linux-2.5.8-vanilla/kernel/sched.c linux-2.5.8-api/kernel/sched.c
--- linux-2.5.8-vanilla/kernel/sched.c Mon Apr 22 13:17:43 2002
+++ linux-2.5.8-api/kernel/sched.c Mon Apr 22 15:35:16 2002
@@ -357,7 +357,7 @@
runqueue_t *rq;
preempt_disable();
- rq = this_rq();
+ rq = task_rq(p);
spin_lock_irq(&rq->lock);
p->state = TASK_RUNNING;
@@ -371,7 +371,6 @@
p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
p->prio = effective_prio(p);
}
- p->thread_info->cpu = smp_processor_id();
activate_task(p, rq);
spin_unlock_irq(&rq->lock);
@@ -1662,8 +1661,7 @@
migration_req_t req;
runqueue_t *rq;
- new_mask &= cpu_online_map;
- if (!new_mask)
+ if (!(new_mask & cpu_online_map))
BUG();
preempt_disable();
^ permalink raw reply [flat|nested] 2+ messages in thread* [patch] NUMA API for 2.5.12 (1/4)
@ 2002-05-02 22:50 Matthew Dobson
0 siblings, 0 replies; 2+ messages in thread
From: Matthew Dobson @ 2002-05-02 22:50 UTC (permalink / raw)
To: linux-kernel, torvalds; +Cc: mjbligh, lse-tech, rml, efocht
[-- Attachment #1: Type: text/plain, Size: 899 bytes --]
Ok all,
I'm going to go ahead and assume (hope) that the no response on the last
posting was because the patch was really large. We'll try this again with 4
smaller patches and see what happens.
This patch implements the NUMA API specified at:
http://lse.sourceforge.net/numa/numa_api.html for the 2.5.12 version of the
kernel. The API implements such features as binding processes to CPUs
(similar to Robert Love's recent patch), binding to memory blocks, setting
launch policies for processes, and rudimentary topology features. The patch
is currently used via a prctl() interface with a possible /proc interface in
the future. I also have a syscall version if that is preferred.
Again, this is a slightly cleaned-up, and (more) bite-sized version of the
previous patch sent out...
Enjoy, and please send me any comments on the patch, or the API it implements!
-Matt
colpatch@us.ibm.com
[-- Attachment #2: numa_api-arch_dep-2.5.12.patch --]
[-- Type: text/plain, Size: 5346 bytes --]
diff -Nur linux-2.5.12-vanilla/arch/i386/Config.help linux-2.5.12-api/arch/i386/Config.help
--- linux-2.5.12-vanilla/arch/i386/Config.help Sun Apr 28 20:11:34 2002
+++ linux-2.5.12-api/arch/i386/Config.help Wed May 1 17:21:13 2002
@@ -48,6 +48,13 @@
You will need a new lynxer.elf file to flash your firmware with - send
email to Martin.Bligh@us.ibm.com
+CONFIG_NUMA_API
+ This option is used to turn on support for the NUMA API, which allows
+ the binding of processes to specific processors/nodes/memory blocks.
+ This option is also used for some of the NUMA Topology features.
+ Please email Matthew Dobson <colpatch@us.ibm.com> with questions
+ and/or concerns.
+
CONFIG_X86_UP_IOAPIC
An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
SMP-capable replacement for PC-style interrupt controllers. Most
diff -Nur linux-2.5.12-vanilla/arch/i386/config.in linux-2.5.12-api/arch/i386/config.in
--- linux-2.5.12-vanilla/arch/i386/config.in Sun Apr 28 20:12:15 2002
+++ linux-2.5.12-api/arch/i386/config.in Wed May 1 17:21:13 2002
@@ -198,6 +198,7 @@
fi
else
bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
+ bool 'Non-Uniform Memory Access API support' CONFIG_NUMA_API
fi
if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
diff -Nur linux-2.5.12-vanilla/include/asm-i386/core_ibmnumaq.h linux-2.5.12-api/include/asm-i386/core_ibmnumaq.h
--- linux-2.5.12-vanilla/include/asm-i386/core_ibmnumaq.h Wed Dec 31 16:00:00 1969
+++ linux-2.5.12-api/include/asm-i386/core_ibmnumaq.h Wed May 1 17:24:25 2002
@@ -0,0 +1,60 @@
+/*
+ * linux/include/asm-i386/mmzone.h
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _ASM_CORE_IBMNUMAQ_H_
+#define _ASM_CORE_IBMNUMAQ_H_
+
+/*
+ * These functions need to be defined for every architecture.
+ * The first five are necessary for the NUMA API to function.
+ * The last is needed by several pieces of NUMA code.
+ */
+
+/* Returns the number of the node containing CPU 'cpu' */
+#define _cpu_to_node(cpu) (cpu_to_logical_apicid(cpu) >> 4)
+
+/* Returns the number of the node containing MemBlk 'memblk' */
+#define _memblk_to_node(memblk) (memblk)
+
+/* Returns the number of the node containing Node 'nid'. This architecture is flat,
+ so it is a pretty simple function. */
+#define _node_to_node(nid) (nid)
+
+/* Returns the number of the first CPU on Node 'node' */
+static inline int _node_to_cpu(int node)
+{
+ int i, cpu, logical_apicid = node << 4;
+
+ for(i = 1; i < 16; i <<= 1)
+ if ((cpu = logical_apicid_to_cpu(logical_apicid | i)) >= 0)
+ return cpu;
+
+ return 0;
+}
+
+/* Returns the number of the first MemBlk on Node 'node' */
+#define _node_to_memblk(node) (node)
+
+#endif /* _ASM_CORE_IBMNUMAQ_H_ */
diff -Nur linux-2.5.12-vanilla/include/asm-i386/mmzone.h linux-2.5.12-api/include/asm-i386/mmzone.h
--- linux-2.5.12-vanilla/include/asm-i386/mmzone.h Wed Dec 31 16:00:00 1969
+++ linux-2.5.12-api/include/asm-i386/mmzone.h Wed May 1 17:24:25 2002
@@ -0,0 +1,44 @@
+/*
+ * linux/include/asm-i386/mmzone.h
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _ASM_MMZONE_H_
+#define _ASM_MMZONE_H_
+
+#include <asm/smpboot.h>
+
+#ifdef CONFIG_IBMNUMAQ
+#include <asm/core_ibmnumaq.h>
+#else /* !CONFIG_IBMNUMAQ */
+#define _cpu_to_node(cpu) (0)
+#define _memblk_to_node(memblk) (0)
+#define _node_to_node(nid) (0)
+#define _node_to_cpu(node) (0)
+#define _node_to_memblk(node) (0)
+#endif /* CONFIG_IBMNUMAQ */
+
+/* Returns the number of the current Node. */
+#define numa_node_id() (_cpu_to_node(smp_processor_id()))
+
+#endif /* _ASM_MMZONE_H_ */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2002-05-02 22:54 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-05-02 22:49 [patch] NUMA API for 2.5.12 (1/4) Matthew Dobson
-- strict thread matches above, loose matches on Subject: below --
2002-05-02 22:50 Matthew Dobson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox