public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Matthew Dobson <colpatch@us.ibm.com>
To: linux-kernel@vger.kernel.org, torvalds@transmeta.com
Cc: mjbligh@us.ibm.com, lse-tech@lists.sourceforge.net,
	rml@tech9.net, efocht@ess.nec.de
Subject: [patch] NUMA API for 2.5.12 (1/4)
Date: Thu, 02 May 2002 15:49:39 -0700	[thread overview]
Message-ID: <3CD1C283.394FF906@us.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 899 bytes --]

Ok all,
	I'm going to go ahead and assume (hope) that the no response on the last
posting was because the patch was really large.  We'll try this again with 4
smaller patches and see what happens.

	This patch implements the NUMA API specified at:
http://lse.sourceforge.net/numa/numa_api.html for the 2.5.12 version of the
kernel.  The API implements such features as binding processes to CPUs
(similar to Robert Love's recent patch), binding to memory blocks, setting
launch policies for processes, and rudimentary topology features.  The patch
is currently used via a prctl() interface with a possible /proc interface in
the future.  I also have a syscall version if that is preferred.

	Again, this is a slightly cleaned-up, and (more) bite-sized version of the
previous patch sent out...

Enjoy, and please send me any comments on the patch, or the API it implements!

-Matt
colpatch@us.ibm.com

[-- Attachment #2: numa_api-arch_indep-setup-2.5.12.patch --]
[-- Type: text/plain, Size: 6546 bytes --]

diff -Nur linux-2.5.8-vanilla/include/linux/init_task.h linux-2.5.8-api/include/linux/init_task.h
--- linux-2.5.8-vanilla/include/linux/init_task.h	Mon Apr 22 17:20:20 2002
+++ linux-2.5.8-api/include/linux/init_task.h	Fri Apr 26 15:22:52 2002
@@ -59,6 +59,10 @@
     children:		LIST_HEAD_INIT(tsk.children),			\
     sibling:		LIST_HEAD_INIT(tsk.sibling),			\
     thread_group:	LIST_HEAD_INIT(tsk.thread_group),		\
+    numa_restrict:	NEW_NUMA_SET,					\
+    numa_binding:	NEW_NUMA_SET,					\
+    numa_launch_policy:	NEW_NUMA_SET,					\
+    numa_api_lock:	RW_LOCK_UNLOCKED,				\
     wait_chldexit:	__WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
     real_timer:		{						\
 	function:		it_real_fn				\
diff -Nur linux-2.5.8-vanilla/include/linux/mmzone.h linux-2.5.8-api/include/linux/mmzone.h
--- linux-2.5.8-vanilla/include/linux/mmzone.h	Mon Apr 22 17:13:25 2002
+++ linux-2.5.8-api/include/linux/mmzone.h	Fri Apr 26 17:15:28 2002
@@ -136,6 +136,7 @@
 	unsigned long node_start_mapnr;
 	unsigned long node_size;
 	int node_id;
+	int memblk_id; /* A unique ID for each memory block (physical contiguous chunk of memory) */
 	struct pglist_data *node_next;
 } pg_data_t;
 
@@ -163,14 +164,15 @@
 #define NODE_MEM_MAP(nid)	mem_map
 #define MAX_NR_NODES		1
 
-#else /* !CONFIG_DISCONTIGMEM */
+#endif /* !CONFIG_DISCONTIGMEM */
 
-#include <asm/mmzone.h>
+#if defined (CONFIG_DISCONTIGMEM) || defined (CONFIG_NUMA_API)
 
+#include <asm/mmzone.h>
 /* page->zone is currently 8 bits ... */
 #define MAX_NR_NODES		(255 / MAX_NR_ZONES)
 
-#endif /* !CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_DISCONTIGMEM || CONFIG_NUMA_API */
 
 #define MAP_ALIGN(x)	((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \
 		sizeof(mem_map_t) - ((x) % sizeof(mem_map_t))))
diff -Nur linux-2.5.8-vanilla/include/linux/numa.h linux-2.5.8-api/include/linux/numa.h
--- linux-2.5.8-vanilla/include/linux/numa.h	Wed Dec 31 16:00:00 1969
+++ linux-2.5.8-api/include/linux/numa.h	Mon Apr 29 11:03:20 2002
@@ -0,0 +1,76 @@
+/*
+ * linux/include/linux/numa.h
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _LINUX_NUMA_H_
+#define _LINUX_NUMA_H_
+
+#include <linux/types.h>
+
+#ifdef CONFIG_NUMA
+#define NR_MEMBLKS	32 /* Max number of Memory Blocks */
+#else
+#define NR_MEMBLKS	1
+#endif
+
+typedef unsigned long		numa_bitmap_t;
+#define NUMA_BITMAP_NONE	(~((numa_bitmap_t) 0))
+
+#define CPU_BIND_STRICT	0
+
+#define MPOL_FIRST	1   /* UNUSED FOR NOW */
+#define MPOL_STRIPE	2   /* UNUSED FOR NOW */
+#define MPOL_RR		4   /* UNUSED FOR NOW */
+#define MPOL_STRICT	8   /* Memory MUST be allocated according to binding */
+#define MPOL_LOOSE	16  /* Memory must try to be allocated according to binding first, 
+			       and can fall back to restriction if necessary */
+
+
+typedef struct numa_list {
+	numa_bitmap_t list;
+	int behavior;
+} numa_list_t;
+
+typedef struct numa_set {
+	numa_list_t cpus;
+	numa_list_t memblks;
+} numa_set_t;
+
+
+/* Initializes a numa_set_t to be an empty set. */
+#define numa_set_init(x) do { (x)->cpus.list = NUMA_BITMAP_NONE;\
+				(x)->memblks.list = NUMA_BITMAP_NONE;\
+				(x)->cpus.behavior = CPU_BIND_STRICT;\
+				(x)->memblks.behavior = MPOL_STRICT; } while(0)
+
+/* Assignment initializer for a numa_set_t to be an empty set */
+#define NEW_NUMA_SET { {NUMA_BITMAP_NONE, CPU_BIND_STRICT}, \
+		       {NUMA_BITMAP_NONE, MPOL_STRICT} }
+
+/* Tests whether a numa_set_t represents an empty restriction (ie: all 1's.  All cpus/memblks allowed.) */
+#define null_restrict(x) (((x)->cpus.list == NUMA_BITMAP_NONE) && \
+				((x)->memblks.list == NUMA_BITMAP_NONE))
+
+#endif /* _LINUX_NUMA_H_ */
diff -Nur linux-2.5.8-vanilla/include/linux/sched.h linux-2.5.8-api/include/linux/sched.h
--- linux-2.5.8-vanilla/include/linux/sched.h	Mon Apr 22 17:13:27 2002
+++ linux-2.5.8-api/include/linux/sched.h	Fri Apr 26 15:14:15 2002
@@ -28,6 +28,7 @@
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
 #include <linux/compiler.h>
+#include <linux/numa.h>
 
 struct exec_domain;
 
@@ -286,6 +287,12 @@
 	struct task_struct *pidhash_next;
 	struct task_struct **pidhash_pprev;
 
+	/* additional NUMA stuff */
+	numa_set_t numa_restrict;
+	numa_set_t numa_binding;
+	numa_set_t numa_launch_policy;
+	rwlock_t  numa_api_lock;	/* protects the preceding 3 structs */
+	
 	wait_queue_head_t wait_chldexit;	/* for wait4() */
 	struct completion *vfork_done;		/* for vfork() */
 
diff -Nur linux-2.5.8-vanilla/include/linux/smp.h linux-2.5.8-api/include/linux/smp.h
--- linux-2.5.8-vanilla/include/linux/smp.h	Mon Apr 22 17:13:25 2002
+++ linux-2.5.8-api/include/linux/smp.h	Fri Apr 26 15:14:15 2002
@@ -90,6 +90,7 @@
 #define cpu_number_map(cpu)			0
 #define smp_call_function(func,info,retry,wait)	({ 0; })
 #define cpu_online_map				1
+#define memblk_online_map			1
 static inline void smp_send_reschedule(int cpu) { }
 static inline void smp_send_reschedule_all(void) { }
 #define __per_cpu_data
diff -Nur linux-2.5.8-vanilla/kernel/sched.c linux-2.5.8-api/kernel/sched.c
--- linux-2.5.8-vanilla/kernel/sched.c	Mon Apr 22 13:17:43 2002
+++ linux-2.5.8-api/kernel/sched.c	Mon Apr 22 15:35:16 2002
@@ -357,7 +357,7 @@
 	runqueue_t *rq;
 
 	preempt_disable();
-	rq = this_rq();
+	rq = task_rq(p);
 	spin_lock_irq(&rq->lock);
 
 	p->state = TASK_RUNNING;
@@ -371,7 +371,6 @@
 		p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
 		p->prio = effective_prio(p);
 	}
-	p->thread_info->cpu = smp_processor_id();
 	activate_task(p, rq);
 
 	spin_unlock_irq(&rq->lock);
@@ -1662,8 +1661,7 @@
 	migration_req_t req;
 	runqueue_t *rq;
 
-	new_mask &= cpu_online_map;
-	if (!new_mask)
+	if (!(new_mask & cpu_online_map))
 		BUG();
 
 	preempt_disable();

             reply	other threads:[~2002-05-02 22:52 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-05-02 22:49 Matthew Dobson [this message]
  -- strict thread matches above, loose matches on Subject: below --
2002-05-02 22:50 [patch] NUMA API for 2.5.12 (1/4) Matthew Dobson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3CD1C283.394FF906@us.ibm.com \
    --to=colpatch@us.ibm.com \
    --cc=efocht@ess.nec.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    --cc=mjbligh@us.ibm.com \
    --cc=rml@tech9.net \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox