public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <ak@suse.de>
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org, akpm@osdl.org
Subject: [PATCH] NUMA API for Linux 1/ Core NUMA API code
Date: Tue, 6 Apr 2004 15:34:31 +0200	[thread overview]
Message-ID: <20040406153431.197de19e.ak@suse.de> (raw)
In-Reply-To: <20040406153322.5d6e986e.ak@suse.de>

This is the core NUMA API code. This includes NUMA policy aware 
wrappers for get_free_pages and alloc_page_vma(). On non NUMA kernels
these are defined away.

The system calls mbind (see http://www.firstfloor.org/~andi/mbind.html),
get_mempolicy (http://www.firstfloor.org/~andi/get_mempolicy.html) and
set_mempolicy (http://www.firstfloor.org/~andi/set_mempolicy.html) are
implemented here.

Adds a vm_policy field to the VMA and to the process. The process
also has field for interleaving. VMA interleaving uses the offset
into the VMA, but that's not possible for process allocations.

diff -u linux-2.6.5-numa/include/linux/gfp.h-o linux-2.6.5-numa/include/linux/gfp.h
--- linux-2.6.5-numa/include/linux/gfp.h-o	2004-03-21 21:11:55.000000000 +0100
+++ linux-2.6.5-numa/include/linux/gfp.h	2004-04-06 13:36:12.000000000 +0200
@@ -4,6 +4,8 @@
 #include <linux/mmzone.h>
 #include <linux/stddef.h>
 #include <linux/linkage.h>
+#include <linux/config.h>
+
 /*
  * GFP bitmasks..
  */
@@ -72,10 +74,29 @@
 	return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
 }
 
+extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+struct vm_area_struct;
+
+#ifdef CONFIG_NUMA
+static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+	if (unlikely(order >= MAX_ORDER))
+		return NULL;
+
+	return alloc_pages_current(gfp_mask, order);
+}
+extern struct page *__alloc_page_vma(unsigned gfp_mask, struct vm_area_struct *vma, 
+				   unsigned long off);
+
+extern struct page *alloc_page_vma(unsigned gfp_mask, struct vm_area_struct *vma, 
+				   unsigned long addr);
+#else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_page(gfp_mask) \
-		alloc_pages_node(numa_node_id(), gfp_mask, 0)
+#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#define __alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#endif
+#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
 extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
 extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
diff -u linux-2.6.5-numa/include/linux/mm.h-o linux-2.6.5-numa/include/linux/mm.h
--- linux-2.6.5-numa/include/linux/mm.h-o	2004-04-06 13:12:23.000000000 +0200
+++ linux-2.6.5-numa/include/linux/mm.h	2004-04-06 13:36:12.000000000 +0200
@@ -12,6 +12,7 @@
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/fs.h>
+#include <linux/mempolicy.h>
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -47,6 +48,9 @@
  *
  * This structure is exactly 64 bytes on ia32.  Please think very, very hard
  * before adding anything to it.
+ * [Now 4 bytes more on 32bit NUMA machines. Sorry. -AK.
+ * But if you want to recover the 4 bytes justr remove vm_next. It is redundant 
+ * with vm_rb. Will be a lot of editing work though. vm_rb.color is redundant too.] 
  */
 struct vm_area_struct {
 	struct mm_struct * vm_mm;	/* The address space we belong to. */
@@ -77,6 +81,10 @@
 					   units, *not* PAGE_CACHE_SIZE */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
+
+#ifdef CONFIG_NUMA
+	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+#endif
 };
 
 /*
@@ -148,6 +156,8 @@
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
+	struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr);
 };
 
 /* forward declaration; pte_chain is meant to be internal to rmap.c */
@@ -435,6 +445,8 @@
 
 struct page *shmem_nopage(struct vm_area_struct * vma,
 			unsigned long address, int *type);
+int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
+struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr);
 struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
 void shmem_lock(struct file * file, int lock);
 int shmem_zero_setup(struct vm_area_struct *);
@@ -633,6 +645,11 @@
 	return vma;
 }
 
+static inline unsigned long vma_pages(struct vm_area_struct *vma)
+{
+	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+}
+
 extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
 
 extern unsigned int nr_used_zone_pages(void);
diff -u linux-2.6.5-numa/include/linux/sched.h-o linux-2.6.5-numa/include/linux/sched.h
--- linux-2.6.5-numa/include/linux/sched.h-o	2004-04-06 13:12:23.000000000 +0200
+++ linux-2.6.5-numa/include/linux/sched.h	2004-04-06 13:36:12.000000000 +0200
@@ -29,6 +29,7 @@
 #include <linux/completion.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
+#include <linux/mempolicy.h>
 
 struct exec_domain;
 
@@ -493,6 +494,9 @@
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
+
+  	struct mempolicy *mempolicy;
+  	short il_next;		/* could be shared with used_math */
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
diff -u linux-2.6.5-numa/kernel/sys.c-o linux-2.6.5-numa/kernel/sys.c
--- linux-2.6.5-numa/kernel/sys.c-o	1970-01-01 01:12:51.000000000 +0100
+++ linux-2.6.5-numa/kernel/sys.c	2004-04-06 13:36:12.000000000 +0200
@@ -260,6 +260,9 @@
 cond_syscall(sys_shmget)
 cond_syscall(sys_shmdt)
 cond_syscall(sys_shmctl)
+cond_syscall(sys_mbind)
+cond_syscall(sys_get_mempolicy)
+cond_syscall(sys_set_mempolicy)
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read)
diff -u linux-2.6.5-numa/mm/Makefile-o linux-2.6.5-numa/mm/Makefile
--- linux-2.6.5-numa/mm/Makefile-o	2004-03-21 21:12:13.000000000 +0100
+++ linux-2.6.5-numa/mm/Makefile	2004-04-06 13:36:12.000000000 +0200
@@ -12,3 +12,4 @@
 			   slab.o swap.o truncate.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_NUMA) 	+= policy.o

  reply	other threads:[~2004-04-06 13:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-04-06 13:33 NUMA API for Linux Andi Kleen
2004-04-06 13:34 ` Andi Kleen [this message]
2004-04-06 13:35 ` NUMA API for Linux 2/ Add x86-64 support Andi Kleen
2004-04-06 13:35 ` [PATCH] NUMA API for Linux 3/ Add i386 support Andi Kleen
2004-04-06 23:23   ` Andrew Morton
2004-04-06 13:36 ` [PATCH] NUMA API for Linux 4/ Add IA64 support Andi Kleen
2004-04-06 13:37 ` [PATCH] NUMA API for Linux 5/ Add VMA hooks for policy Andi Kleen
2004-05-05 16:05   ` Paul Jackson
2004-05-05 16:39     ` Andi Kleen
2004-05-05 16:47       ` Paul Jackson
2004-05-06  6:00         ` Andi Kleen
2004-04-06 13:37 ` [PATCH] NUMA API for Linux 6/ Add shared memory support Andi Kleen
2004-04-06 13:38 ` [PATCH] NUMA API for Linux 7/ Add statistics Andi Kleen
2004-04-06 13:39 ` [PATCH] NUMA API for Linux 8/ Add policy support to anonymous memory Andi Kleen
2004-04-06 13:40 ` [PATCH] NUMA API for Linux 9/ Add simple lazy i386/x86-64 hugetlbfs policy support Andi Kleen
2004-04-06 13:40 ` [PATCH] NUMA API for Linux 10/ Bitmap bugfix Andi Kleen
2004-04-06 23:35 ` NUMA API for Linux Paul Jackson
2004-04-08 20:12 ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040406153431.197de19e.ak@suse.de \
    --to=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox