All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20120619180646.GN4633@redhat.com>

diff --git a/a/1.txt b/N1/1.txt
index 56d0c49..254f472 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -49,3 +49,654 @@ details with Johannes during the MM summit but it's some work to
 implement it).
 
 ===
+>From 17e1cbc02c1b41037248d9952179ff293a287d58 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Tue, 19 Jun 2012 18:55:25 +0200
+Subject: [PATCH] autonuma: shrink the per-page page_autonuma struct size
+
+>From 32 to 12 bytes, so the AutoNUMA memory footprint is reduced to
+0.29% of RAM.
+
+This however will fail to migrate pages above a 16 Terabyte offset
+from the start of each node (migration failure isn't fatal, simply
+those pages will not follow the CPU, a warning will be printed in the
+log just once in that case).
+
+AutoNUMA will also fail to build if there are more than (2**15)-1
+nodes supported by the MAX_NUMNODES at build time (it would be easy to
+relax it to (2**16)-1 nodes without increasing the memory footprint,
+but it's not even worth it, so let's keep the negative space reserved
+for now).
+
+This means the max RAM configuration fully supported by AutoNUMA
+becomes AUTONUMA_LIST_MAX_PFN_OFFSET multiplied by 32767 nodes
+multiplied by the PAGE_SIZE (assume 4096 here, but for some archs it's
+bigger).
+
+4096*32767*(0xffffffff-3)>>(10*5) = 511 PetaBytes.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+---
+ include/linux/autonuma_list.h  |   94 ++++++++++++++++++++++
+ include/linux/autonuma_types.h |   48 +++++++-----
+ include/linux/mmzone.h         |    3 +-
+ include/linux/page_autonuma.h  |    2 +-
+ mm/Makefile                    |    2 +-
+ mm/autonuma.c                  |   75 +++++++++++++-----
+ mm/autonuma_list.c             |  167 ++++++++++++++++++++++++++++++++++++++++
+ mm/page_autonuma.c             |   15 ++--
+ 8 files changed, 355 insertions(+), 51 deletions(-)
+ create mode 100644 include/linux/autonuma_list.h
+ create mode 100644 mm/autonuma_list.c
+
+diff --git a/include/linux/autonuma_list.h b/include/linux/autonuma_list.h
+new file mode 100644
+index 0000000..0f338e9
+--- /dev/null
++++ b/include/linux/autonuma_list.h
+@@ -0,0 +1,94 @@
++#ifndef __AUTONUMA_LIST_H
++#define __AUTONUMA_LIST_H
++
++#include <linux/types.h>
++#include <linux/kernel.h>
++
++typedef uint32_t autonuma_list_entry;
++#define AUTONUMA_LIST_MAX_PFN_OFFSET	(AUTONUMA_LIST_HEAD-3)
++#define AUTONUMA_LIST_POISON1		(AUTONUMA_LIST_HEAD-2)
++#define AUTONUMA_LIST_POISON2		(AUTONUMA_LIST_HEAD-1)
++#define AUTONUMA_LIST_HEAD		((uint32_t)UINT_MAX)
++
++struct autonuma_list_head {
++	autonuma_list_entry anl_next_pfn;
++	autonuma_list_entry anl_prev_pfn;
++};
++
++static inline void AUTONUMA_INIT_LIST_HEAD(struct autonuma_list_head *anl)
++{
++	anl->anl_next_pfn = AUTONUMA_LIST_HEAD;
++	anl->anl_prev_pfn = AUTONUMA_LIST_HEAD;
++}
++
++/* abstraction conversion methods */
++extern struct page *autonuma_list_entry_to_page(int nid,
++					autonuma_list_entry pfn_offset);
++extern autonuma_list_entry autonuma_page_to_list_entry(int page_nid,
++						       struct page *page);
++extern struct autonuma_list_head *__autonuma_list_head(int page_nid,
++					struct autonuma_list_head *head,
++					autonuma_list_entry pfn_offset);
++
++extern bool __autonuma_list_add(int page_nid,
++				struct page *page,
++				struct autonuma_list_head *head,
++				autonuma_list_entry prev,
++				autonuma_list_entry next);
++
++/*
++ * autonuma_list_add - add a new entry
++ *
++ * Insert a new entry after the specified head.
++ */
++static inline bool autonuma_list_add(int page_nid,
++				     struct page *page,
++				     autonuma_list_entry entry,
++				     struct autonuma_list_head *head)
++{
++	struct autonuma_list_head *entry_head;
++	entry_head = __autonuma_list_head(page_nid, head, entry);
++	return __autonuma_list_add(page_nid, page, head,
++				   entry, entry_head->anl_next_pfn);
++}
++
++/*
++ * autonuma_list_add_tail - add a new entry
++ *
++ * Insert a new entry before the specified head.
++ * This is useful for implementing queues.
++ */
++static inline bool autonuma_list_add_tail(int page_nid,
++					  struct page *page,
++					  autonuma_list_entry entry,
++					  struct autonuma_list_head *head)
++{
++	struct autonuma_list_head *entry_head;
++	entry_head = __autonuma_list_head(page_nid, head, entry);
++	return __autonuma_list_add(page_nid, page, head,
++				   entry_head->anl_prev_pfn, entry);
++}
++
++/*
++ * autonuma_list_del - deletes entry from list.
++ * @entry: the element to delete from the list.
++ */
++extern void autonuma_list_del(int page_nid,
++			      struct autonuma_list_head *entry,
++			      struct autonuma_list_head *head);
++
++extern bool autonuma_list_empty(const struct autonuma_list_head *head);
++
++#if 0 /* not needed so far */
++/*
++ * autonuma_list_is_singular - tests whether a list has just one entry.
++ * @head: the list to test.
++ */
++static inline int autonuma_list_is_singular(const struct autonuma_list_head *head)
++{
++	return !autonuma_list_empty(head) &&
++		(head->anl_next_pfn == head->anl_prev_pfn);
++}
++#endif
++
++#endif /* __AUTONUMA_LIST_H */
+diff --git a/include/linux/autonuma_types.h b/include/linux/autonuma_types.h
+index 6662990..1abde9c5 100644
+--- a/include/linux/autonuma_types.h
++++ b/include/linux/autonuma_types.h
+@@ -4,6 +4,7 @@
+ #ifdef CONFIG_AUTONUMA
+ 
+ #include <linux/numa.h>
++#include <linux/autonuma_list.h>
+ 
+ /*
+  * Per-mm (process) structure dynamically allocated only if autonuma
+@@ -45,15 +46,36 @@ struct task_autonuma {
+ /*
+  * Per page (or per-pageblock) structure dynamically allocated only if
+  * autonuma is not impossible.
++ *
++ * This structure takes 12 bytes per page for all architectures. There
++ * are two constraints to make this work:
++ *
++ * 1) the build will abort if * MAX_NUMNODES is too big according to
++ *    the #error check below
++ *
++ * 2) AutoNUMA will not succeed to insert into the migration queue any
++ *    page whose pfn offset value (offset with respect to the first
++ *    pfn of the node) is bigger than AUTONUMA_LIST_MAX_PFN_OFFSET
++ *    (NOTE: AUTONUMA_LIST_MAX_PFN_OFFSET is still a valid pfn offset
++ *    value). This means with huge node sizes and small PAGE_SIZE,
++ *    some pages may not be allowed to be migrated.
+  */
+ struct page_autonuma {
+ 	/*
+ 	 * To modify autonuma_last_nid lockless the architecture,
+ 	 * needs SMP atomic granularity < sizeof(long), not all archs
+-	 * have that, notably some alpha. Archs without that requires
++	 * have that, notably some ancient alpha (but none of those
++	 * should run in NUMA systems). Archs without that requires
+ 	 * autonuma_last_nid to be a long.
+ 	 */
+-#if BITS_PER_LONG > 32
++#if MAX_NUMNODES > 32767
++	/*
++	 * Verify at build time that int16_t for autonuma_migrate_nid
++	 * and autonuma_last_nid won't risk to overflow, max allowed
++	 * nid value is (2**15)-1.
++	 */
++#error "too many nodes"
++#endif
+ 	/*
+ 	 * autonuma_migrate_nid is -1 if the page_autonuma structure
+ 	 * is not linked into any
+@@ -63,7 +85,7 @@ struct page_autonuma {
+ 	 * page_nid is the nid that the page (referenced by the
+ 	 * page_autonuma structure) belongs to.
+ 	 */
+-	int autonuma_migrate_nid;
++	int16_t autonuma_migrate_nid;
+ 	/*
+ 	 * autonuma_last_nid records which is the NUMA nid that tried
+ 	 * to access this page at the last NUMA hinting page fault.
+@@ -72,28 +94,14 @@ struct page_autonuma {
+ 	 * it will make different threads trashing on the same pages,
+ 	 * converge on the same NUMA node (if possible).
+ 	 */
+-	int autonuma_last_nid;
+-#else
+-#if MAX_NUMNODES >= 32768
+-#error "too many nodes"
+-#endif
+-	short autonuma_migrate_nid;
+-	short autonuma_last_nid;
+-#endif
++	int16_t autonuma_last_nid;
++
+ 	/*
+ 	 * This is the list node that links the page (referenced by
+ 	 * the page_autonuma structure) in the
+ 	 * &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid] lru.
+ 	 */
+-	struct list_head autonuma_migrate_node;
+-
+-	/*
+-	 * To find the page starting from the autonuma_migrate_node we
+-	 * need a backlink.
+-	 *
+-	 * FIXME: drop it;
+-	 */
+-	struct page *page;
++	struct autonuma_list_head autonuma_migrate_node;
+ };
+ 
+ extern int alloc_task_autonuma(struct task_struct *tsk,
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index ed5b0c0..acefdfa 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -17,6 +17,7 @@
+ #include <linux/pageblock-flags.h>
+ #include <generated/bounds.h>
+ #include <linux/atomic.h>
++#include <linux/autonuma_list.h>
+ #include <asm/page.h>
+ 
+ /* Free memory management - zoned buddy allocator.  */
+@@ -710,7 +711,7 @@ typedef struct pglist_data {
+ 	 * <linux/page_autonuma.h> and the below field must remain the
+ 	 * last one of this structure.
+ 	 */
+-	struct list_head autonuma_migrate_head[0];
++	struct autonuma_list_head autonuma_migrate_head[0];
+ #endif
+ } pg_data_t;
+ 
+diff --git a/include/linux/page_autonuma.h b/include/linux/page_autonuma.h
+index bc7a629..e78beda 100644
+--- a/include/linux/page_autonuma.h
++++ b/include/linux/page_autonuma.h
+@@ -53,7 +53,7 @@ extern void __init sparse_early_page_autonuma_alloc_node(struct page_autonuma **
+ /* inline won't work here */
+ #define autonuma_pglist_data_size() (sizeof(struct pglist_data) +	\
+ 				     (autonuma_impossible() ? 0 :	\
+-				      sizeof(struct list_head) * \
++				      sizeof(struct autonuma_list_head) * \
+ 				      num_possible_nodes()))
+ 
+ #endif /* _LINUX_PAGE_AUTONUMA_H */
+diff --git a/mm/Makefile b/mm/Makefile
+index a4d8354..4aa90d4 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -33,7 +33,7 @@ obj-$(CONFIG_FRONTSWAP)	+= frontswap.o
+ obj-$(CONFIG_HAS_DMA)	+= dmapool.o
+ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
+ obj-$(CONFIG_NUMA) 	+= mempolicy.o
+-obj-$(CONFIG_AUTONUMA) 	+= autonuma.o page_autonuma.o
++obj-$(CONFIG_AUTONUMA) 	+= autonuma.o page_autonuma.o autonuma_list.o
+ obj-$(CONFIG_SPARSEMEM)	+= sparse.o
+ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
+ obj-$(CONFIG_SLOB) += slob.o
+diff --git a/mm/autonuma.c b/mm/autonuma.c
+index 9834f5d..8aed9af 100644
+--- a/mm/autonuma.c
++++ b/mm/autonuma.c
+@@ -89,12 +89,21 @@ void autonuma_migrate_split_huge_page(struct page *page,
+ 	VM_BUG_ON(nid < -1);
+ 	VM_BUG_ON(page_tail_autonuma->autonuma_migrate_nid != -1);
+ 	if (nid >= 0) {
+-		VM_BUG_ON(page_to_nid(page) != page_to_nid(page_tail));
++		int page_nid = page_to_nid(page);
++		struct autonuma_list_head *head;
++		autonuma_list_entry entry;
++		entry = autonuma_page_to_list_entry(page_nid, page);
++		head = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];
++		VM_BUG_ON(page_nid != page_to_nid(page_tail));
++		VM_BUG_ON(page_nid == nid);
+ 
+ 		compound_lock(page_tail);
+ 		autonuma_migrate_lock(nid);
+-		list_add_tail(&page_tail_autonuma->autonuma_migrate_node,
+-			      &page_autonuma->autonuma_migrate_node);
++		if (!autonuma_list_add_tail(page_nid,
++					    page_tail,
++					    entry,
++					    head))
++			BUG();
+ 		autonuma_migrate_unlock(nid);
+ 
+ 		page_tail_autonuma->autonuma_migrate_nid = nid;
+@@ -119,8 +128,15 @@ void __autonuma_migrate_page_remove(struct page *page,
+ 	VM_BUG_ON(nid < -1);
+ 	if (nid >= 0) {
+ 		int numpages = hpage_nr_pages(page);
++		int page_nid = page_to_nid(page);
++		struct autonuma_list_head *head;
++		VM_BUG_ON(nid == page_nid);
++		head = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];
++
+ 		autonuma_migrate_lock(nid);
+-		list_del(&page_autonuma->autonuma_migrate_node);
++		autonuma_list_del(page_nid,
++				  &page_autonuma->autonuma_migrate_node,
++				  head);
+ 		NODE_DATA(nid)->autonuma_nr_migrate_pages -= numpages;
+ 		autonuma_migrate_unlock(nid);
+ 
+@@ -139,6 +155,8 @@ static void __autonuma_migrate_page_add(struct page *page,
+ 	int numpages;
+ 	unsigned long nr_migrate_pages;
+ 	wait_queue_head_t *wait_queue;
++	struct autonuma_list_head *head;
++	bool added;
+ 
+ 	VM_BUG_ON(dst_nid >= MAX_NUMNODES);
+ 	VM_BUG_ON(dst_nid < -1);
+@@ -155,25 +173,33 @@ static void __autonuma_migrate_page_add(struct page *page,
+ 	VM_BUG_ON(nid >= MAX_NUMNODES);
+ 	VM_BUG_ON(nid < -1);
+ 	if (nid >= 0) {
++		VM_BUG_ON(nid == page_nid);
++		head = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];
++
+ 		autonuma_migrate_lock(nid);
+-		list_del(&page_autonuma->autonuma_migrate_node);
++		autonuma_list_del(page_nid,
++				  &page_autonuma->autonuma_migrate_node,
++				  head);
+ 		NODE_DATA(nid)->autonuma_nr_migrate_pages -= numpages;
+ 		autonuma_migrate_unlock(nid);
+ 	}
+ 
++	head = &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid];
++
+ 	autonuma_migrate_lock(dst_nid);
+-	list_add(&page_autonuma->autonuma_migrate_node,
+-		 &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid]);
+-	NODE_DATA(dst_nid)->autonuma_nr_migrate_pages += numpages;
++	added = autonuma_list_add(page_nid, page, AUTONUMA_LIST_HEAD, head);
++	if (added)
++		NODE_DATA(dst_nid)->autonuma_nr_migrate_pages += numpages;
+ 	nr_migrate_pages = NODE_DATA(dst_nid)->autonuma_nr_migrate_pages;
+ 
+ 	autonuma_migrate_unlock(dst_nid);
+ 
+-	page_autonuma->autonuma_migrate_nid = dst_nid;
++	if (added)
++		page_autonuma->autonuma_migrate_nid = dst_nid;
+ 
+ 	compound_unlock_irqrestore(page, flags);
+ 
+-	if (!autonuma_migrate_defer()) {
++	if (added && !autonuma_migrate_defer()) {
+ 		wait_queue = &NODE_DATA(dst_nid)->autonuma_knuma_migrated_wait;
+ 		if (nr_migrate_pages >= pages_to_migrate &&
+ 		    nr_migrate_pages - numpages < pages_to_migrate &&
+@@ -813,7 +839,7 @@ static int isolate_migratepages(struct list_head *migratepages,
+ 				struct pglist_data *pgdat)
+ {
+ 	int nr = 0, nid;
+-	struct list_head *heads = pgdat->autonuma_migrate_head;
++	struct autonuma_list_head *heads = pgdat->autonuma_migrate_head;
+ 
+ 	/* FIXME: THP balancing, restart from last nid */
+ 	for_each_online_node(nid) {
+@@ -825,10 +851,10 @@ static int isolate_migratepages(struct list_head *migratepages,
+ 		cond_resched();
+ 		VM_BUG_ON(numa_node_id() != pgdat->node_id);
+ 		if (nid == pgdat->node_id) {
+-			VM_BUG_ON(!list_empty(&heads[nid]));
++			VM_BUG_ON(!autonuma_list_empty(&heads[nid]));
+ 			continue;
+ 		}
+-		if (list_empty(&heads[nid]))
++		if (autonuma_list_empty(&heads[nid]))
+ 			continue;
+ 		/* some page wants to go to this pgdat */
+ 		/*
+@@ -840,22 +866,29 @@ static int isolate_migratepages(struct list_head *migratepages,
+ 		 * irqs.
+ 		 */
+ 		autonuma_migrate_lock_irq(pgdat->node_id);
+-		if (list_empty(&heads[nid])) {
++		if (autonuma_list_empty(&heads[nid])) {
+ 			autonuma_migrate_unlock_irq(pgdat->node_id);
+ 			continue;
+ 		}
+-		page_autonuma = list_entry(heads[nid].prev,
+-					   struct page_autonuma,
+-					   autonuma_migrate_node);
+-		page = page_autonuma->page;
++		page = autonuma_list_entry_to_page(nid,
++						   heads[nid].anl_prev_pfn);
++		page_autonuma = lookup_page_autonuma(page);
+ 		if (unlikely(!get_page_unless_zero(page))) {
++			int page_nid = page_to_nid(page);
++			struct autonuma_list_head *entry_head;
++			VM_BUG_ON(nid == page_nid);
++
+ 			/*
+ 			 * Is getting freed and will remove self from the
+ 			 * autonuma list shortly, skip it for now.
+ 			 */
+-			list_del(&page_autonuma->autonuma_migrate_node);
+-			list_add(&page_autonuma->autonuma_migrate_node,
+-				 &heads[nid]);
++			entry_head = &page_autonuma->autonuma_migrate_node;
++			autonuma_list_del(page_nid, entry_head,
++					  &heads[nid]);
++			if (!autonuma_list_add(page_nid, page,
++					       AUTONUMA_LIST_HEAD,
++					       &heads[nid]))
++				BUG();
+ 			autonuma_migrate_unlock_irq(pgdat->node_id);
+ 			autonuma_printk("autonuma migrate page is free\n");
+ 			continue;
+diff --git a/mm/autonuma_list.c b/mm/autonuma_list.c
+new file mode 100644
+index 0000000..2c840f7
+--- /dev/null
++++ b/mm/autonuma_list.c
+@@ -0,0 +1,167 @@
++/*
++ * Copyright 2006, Red Hat, Inc., Dave Jones
++ * Copyright 2012, Red Hat, Inc.
++ * Released under the General Public License (GPL).
++ *
++ * This file contains the linked list implementations for
++ * autonuma migration lists.
++ */
++
++#include <linux/mm.h>
++#include <linux/autonuma.h>
++
++/*
++ * Insert a new entry between two known consecutive entries.
++ *
++ * This is only for internal list manipulation where we know
++ * the prev/next entries already!
++ *
++ * return true if succeeded, or false if the (page_nid, pfn_offset)
++ * pair couldn't represent the pfn and the list_add didn't succeed.
++ */
++bool __autonuma_list_add(int page_nid,
++			 struct page *page,
++			 struct autonuma_list_head *head,
++			 autonuma_list_entry prev,
++			 autonuma_list_entry next)
++{
++	autonuma_list_entry new;
++
++	VM_BUG_ON(page_nid != page_to_nid(page));
++	new = autonuma_page_to_list_entry(page_nid, page);
++	if (new > AUTONUMA_LIST_MAX_PFN_OFFSET)
++		return false;
++
++	WARN(new == prev || new == next,
++	     "autonuma_list_add double add: new=%u, prev=%u, next=%u.\n",
++	     new, prev, next);
++
++	__autonuma_list_head(page_nid, head, next)->anl_prev_pfn = new;
++	__autonuma_list_head(page_nid, head, new)->anl_next_pfn = next;
++	__autonuma_list_head(page_nid, head, new)->anl_prev_pfn = prev;
++	__autonuma_list_head(page_nid, head, prev)->anl_next_pfn = new;
++	return true;
++}
++
++static inline void __autonuma_list_del_entry(int page_nid,
++					     struct autonuma_list_head *entry,
++					     struct autonuma_list_head *head)
++{
++	autonuma_list_entry prev, next;
++
++	prev = entry->anl_prev_pfn;
++	next = entry->anl_next_pfn;
++
++	if (WARN(next == AUTONUMA_LIST_POISON1,
++		 "autonuma_list_del corruption, "
++		 "%p->anl_next_pfn is AUTONUMA_LIST_POISON1 (%u)\n",
++		entry, AUTONUMA_LIST_POISON1) ||
++	    WARN(prev == AUTONUMA_LIST_POISON2,
++		"autonuma_list_del corruption, "
++		 "%p->anl_prev_pfn is AUTONUMA_LIST_POISON2 (%u)\n",
++		entry, AUTONUMA_LIST_POISON2))
++		return;
++
++	__autonuma_list_head(page_nid, head, next)->anl_prev_pfn = prev;
++	__autonuma_list_head(page_nid, head, prev)->anl_next_pfn = next;
++}
++
++/*
++ * autonuma_list_del - deletes entry from list.
++ *
++ * Note: autonuma_list_empty on entry does not return true after this,
++ * the entry is in an undefined state.
++ */
++void autonuma_list_del(int page_nid, struct autonuma_list_head *entry,
++		       struct autonuma_list_head *head)
++{
++	__autonuma_list_del_entry(page_nid, entry, head);
++	entry->anl_next_pfn = AUTONUMA_LIST_POISON1;
++	entry->anl_prev_pfn = AUTONUMA_LIST_POISON2;
++}
++
++/*
++ * autonuma_list_empty - tests whether a list is empty
++ * @head: the list to test.
++ */
++bool autonuma_list_empty(const struct autonuma_list_head *head)
++{
++	bool ret = false;
++	if (head->anl_next_pfn == AUTONUMA_LIST_HEAD) {
++		ret = true;
++		BUG_ON(head->anl_prev_pfn != AUTONUMA_LIST_HEAD);
++	}
++	return ret;
++}
++
++/* abstraction conversion methods */
++
++static inline struct page *__autonuma_list_entry_to_page(int page_nid,
++							 autonuma_list_entry pfn_offset)
++{
++	struct pglist_data *pgdat = NODE_DATA(page_nid);
++	unsigned long pfn = pgdat->node_start_pfn + pfn_offset;
++	return pfn_to_page(pfn);
++}
++
++struct page *autonuma_list_entry_to_page(int page_nid,
++					 autonuma_list_entry pfn_offset)
++{
++	VM_BUG_ON(page_nid < 0);
++	BUG_ON(pfn_offset == AUTONUMA_LIST_POISON1);
++	BUG_ON(pfn_offset == AUTONUMA_LIST_POISON2);
++	BUG_ON(pfn_offset == AUTONUMA_LIST_HEAD);
++	return __autonuma_list_entry_to_page(page_nid, pfn_offset);
++}
++
++/*
++ * returns a value above AUTONUMA_LIST_MAX_PFN_OFFSET if the pfn is
++ * located a too big offset from the start of the node and cannot be
++ * represented by the (page_nid, pfn_offset) pair.
++ */
++autonuma_list_entry autonuma_page_to_list_entry(int page_nid,
++						struct page *page)
++{
++	unsigned long pfn = page_to_pfn(page);
++	struct pglist_data *pgdat = NODE_DATA(page_nid);
++	VM_BUG_ON(page_nid != page_to_nid(page));
++	BUG_ON(pfn < pgdat->node_start_pfn);
++	pfn -= pgdat->node_start_pfn;
++	if (pfn > AUTONUMA_LIST_MAX_PFN_OFFSET) {
++		WARN_ONCE(1, "autonuma_page_to_list_entry: "
++			  "pfn_offset  %lu, pgdat %p, "
++			  "pgdat->node_start_pfn %lu\n",
++			  pfn, pgdat, pgdat->node_start_pfn);
++		/*
++		 * Any value bigger than AUTONUMA_LIST_MAX_PFN_OFFSET
++		 * will work as an error retval, but better pick one
++		 * that will cause noise if computed wrong by the
++		 * caller.
++		 */
++		return AUTONUMA_LIST_POISON1;
++	}
++	return pfn; /* convert to uint16_t without losing information */
++}
++
++static inline struct autonuma_list_head *____autonuma_list_head(int page_nid,
++					autonuma_list_entry pfn_offset)
++{
++	struct pglist_data *pgdat = NODE_DATA(page_nid);
++	unsigned long pfn = pgdat->node_start_pfn + pfn_offset;
++	struct page *page = pfn_to_page(pfn);
++	struct page_autonuma *page_autonuma = lookup_page_autonuma(page);
++	return &page_autonuma->autonuma_migrate_node;
++}
++
++struct autonuma_list_head *__autonuma_list_head(int page_nid,
++					struct autonuma_list_head *head,
++					autonuma_list_entry pfn_offset)
++{
++	VM_BUG_ON(page_nid < 0);
++	BUG_ON(pfn_offset == AUTONUMA_LIST_POISON1);
++	BUG_ON(pfn_offset == AUTONUMA_LIST_POISON2);
++	if (pfn_offset != AUTONUMA_LIST_HEAD)
++		return ____autonuma_list_head(page_nid, pfn_offset);
++	else
++		return head;
++}
+diff --git a/mm/page_autonuma.c b/mm/page_autonuma.c
+index f929d81..151f25c 100644
+--- a/mm/page_autonuma.c
++++ b/mm/page_autonuma.c
+@@ -12,7 +12,6 @@ void __meminit page_autonuma_map_init(struct page *page,
+ 	for (end = page + nr_pages; page < end; page++, page_autonuma++) {
+ 		page_autonuma->autonuma_last_nid = -1;
+ 		page_autonuma->autonuma_migrate_nid = -1;
+-		page_autonuma->page = page;
+ 	}
+ }
+ 
+@@ -20,12 +19,18 @@ static void __meminit __pgdat_autonuma_init(struct pglist_data *pgdat)
+ {
+ 	int node_iter;
+ 
++	/* verify the per-page page_autonuma 12 byte fixed cost */
++	BUILD_BUG_ON((unsigned long) &((struct page_autonuma *)0)[1] != 12);
++
+ 	spin_lock_init(&pgdat->autonuma_lock);
+ 	init_waitqueue_head(&pgdat->autonuma_knuma_migrated_wait);
+ 	pgdat->autonuma_nr_migrate_pages = 0;
+ 	if (!autonuma_impossible())
+-		for_each_node(node_iter)
+-			INIT_LIST_HEAD(&pgdat->autonuma_migrate_head[node_iter]);
++		for_each_node(node_iter) {
++			struct autonuma_list_head *head;
++			head = &pgdat->autonuma_migrate_head[node_iter];
++			AUTONUMA_INIT_LIST_HEAD(head);
++		}
+ }
+ 
+ #if !defined(CONFIG_SPARSEMEM)
+@@ -112,10 +117,6 @@ struct page_autonuma *lookup_page_autonuma(struct page *page)
+ 	unsigned long pfn = page_to_pfn(page);
+ 	struct mem_section *section = __pfn_to_section(pfn);
+ 
+-	/* if it's not a power of two we may be wasting memory */
+-	BUILD_BUG_ON(SECTION_PAGE_AUTONUMA_SIZE &
+-		     (SECTION_PAGE_AUTONUMA_SIZE-1));
+-
+ #ifdef CONFIG_DEBUG_VM
+ 	/*
+ 	 * The sanity checks the page allocator does upon freeing a
diff --git a/a/content_digest b/N1/content_digest
index a2cf1f0..f4efb37 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -84,6 +84,657 @@
  "details with Johannes during the MM summit but it's some work to\n"
  "implement it).\n"
  "\n"
- ===
+ "===\n"
+ ">From 17e1cbc02c1b41037248d9952179ff293a287d58 Mon Sep 17 00:00:00 2001\n"
+ "From: Andrea Arcangeli <aarcange@redhat.com>\n"
+ "Date: Tue, 19 Jun 2012 18:55:25 +0200\n"
+ "Subject: [PATCH] autonuma: shrink the per-page page_autonuma struct size\n"
+ "\n"
+ ">From 32 to 12 bytes, so the AutoNUMA memory footprint is reduced to\n"
+ "0.29% of RAM.\n"
+ "\n"
+ "This however will fail to migrate pages above a 16 Terabyte offset\n"
+ "from the start of each node (migration failure isn't fatal, simply\n"
+ "those pages will not follow the CPU, a warning will be printed in the\n"
+ "log just once in that case).\n"
+ "\n"
+ "AutoNUMA will also fail to build if there are more than (2**15)-1\n"
+ "nodes supported by the MAX_NUMNODES at build time (it would be easy to\n"
+ "relax it to (2**16)-1 nodes without increasing the memory footprint,\n"
+ "but it's not even worth it, so let's keep the negative space reserved\n"
+ "for now).\n"
+ "\n"
+ "This means the max RAM configuration fully supported by AutoNUMA\n"
+ "becomes AUTONUMA_LIST_MAX_PFN_OFFSET multiplied by 32767 nodes\n"
+ "multiplied by the PAGE_SIZE (assume 4096 here, but for some archs it's\n"
+ "bigger).\n"
+ "\n"
+ "4096*32767*(0xffffffff-3)>>(10*5) = 511 PetaBytes.\n"
+ "\n"
+ "Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>\n"
+ "---\n"
+ " include/linux/autonuma_list.h  |   94 ++++++++++++++++++++++\n"
+ " include/linux/autonuma_types.h |   48 +++++++-----\n"
+ " include/linux/mmzone.h         |    3 +-\n"
+ " include/linux/page_autonuma.h  |    2 +-\n"
+ " mm/Makefile                    |    2 +-\n"
+ " mm/autonuma.c                  |   75 +++++++++++++-----\n"
+ " mm/autonuma_list.c             |  167 ++++++++++++++++++++++++++++++++++++++++\n"
+ " mm/page_autonuma.c             |   15 ++--\n"
+ " 8 files changed, 355 insertions(+), 51 deletions(-)\n"
+ " create mode 100644 include/linux/autonuma_list.h\n"
+ " create mode 100644 mm/autonuma_list.c\n"
+ "\n"
+ "diff --git a/include/linux/autonuma_list.h b/include/linux/autonuma_list.h\n"
+ "new file mode 100644\n"
+ "index 0000000..0f338e9\n"
+ "--- /dev/null\n"
+ "+++ b/include/linux/autonuma_list.h\n"
+ "@@ -0,0 +1,94 @@\n"
+ "+#ifndef __AUTONUMA_LIST_H\n"
+ "+#define __AUTONUMA_LIST_H\n"
+ "+\n"
+ "+#include <linux/types.h>\n"
+ "+#include <linux/kernel.h>\n"
+ "+\n"
+ "+typedef uint32_t autonuma_list_entry;\n"
+ "+#define AUTONUMA_LIST_MAX_PFN_OFFSET\t(AUTONUMA_LIST_HEAD-3)\n"
+ "+#define AUTONUMA_LIST_POISON1\t\t(AUTONUMA_LIST_HEAD-2)\n"
+ "+#define AUTONUMA_LIST_POISON2\t\t(AUTONUMA_LIST_HEAD-1)\n"
+ "+#define AUTONUMA_LIST_HEAD\t\t((uint32_t)UINT_MAX)\n"
+ "+\n"
+ "+struct autonuma_list_head {\n"
+ "+\tautonuma_list_entry anl_next_pfn;\n"
+ "+\tautonuma_list_entry anl_prev_pfn;\n"
+ "+};\n"
+ "+\n"
+ "+static inline void AUTONUMA_INIT_LIST_HEAD(struct autonuma_list_head *anl)\n"
+ "+{\n"
+ "+\tanl->anl_next_pfn = AUTONUMA_LIST_HEAD;\n"
+ "+\tanl->anl_prev_pfn = AUTONUMA_LIST_HEAD;\n"
+ "+}\n"
+ "+\n"
+ "+/* abstraction conversion methods */\n"
+ "+extern struct page *autonuma_list_entry_to_page(int nid,\n"
+ "+\t\t\t\t\tautonuma_list_entry pfn_offset);\n"
+ "+extern autonuma_list_entry autonuma_page_to_list_entry(int page_nid,\n"
+ "+\t\t\t\t\t\t       struct page *page);\n"
+ "+extern struct autonuma_list_head *__autonuma_list_head(int page_nid,\n"
+ "+\t\t\t\t\tstruct autonuma_list_head *head,\n"
+ "+\t\t\t\t\tautonuma_list_entry pfn_offset);\n"
+ "+\n"
+ "+extern bool __autonuma_list_add(int page_nid,\n"
+ "+\t\t\t\tstruct page *page,\n"
+ "+\t\t\t\tstruct autonuma_list_head *head,\n"
+ "+\t\t\t\tautonuma_list_entry prev,\n"
+ "+\t\t\t\tautonuma_list_entry next);\n"
+ "+\n"
+ "+/*\n"
+ "+ * autonuma_list_add - add a new entry\n"
+ "+ *\n"
+ "+ * Insert a new entry after the specified head.\n"
+ "+ */\n"
+ "+static inline bool autonuma_list_add(int page_nid,\n"
+ "+\t\t\t\t     struct page *page,\n"
+ "+\t\t\t\t     autonuma_list_entry entry,\n"
+ "+\t\t\t\t     struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\tstruct autonuma_list_head *entry_head;\n"
+ "+\tentry_head = __autonuma_list_head(page_nid, head, entry);\n"
+ "+\treturn __autonuma_list_add(page_nid, page, head,\n"
+ "+\t\t\t\t   entry, entry_head->anl_next_pfn);\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * autonuma_list_add_tail - add a new entry\n"
+ "+ *\n"
+ "+ * Insert a new entry before the specified head.\n"
+ "+ * This is useful for implementing queues.\n"
+ "+ */\n"
+ "+static inline bool autonuma_list_add_tail(int page_nid,\n"
+ "+\t\t\t\t\t  struct page *page,\n"
+ "+\t\t\t\t\t  autonuma_list_entry entry,\n"
+ "+\t\t\t\t\t  struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\tstruct autonuma_list_head *entry_head;\n"
+ "+\tentry_head = __autonuma_list_head(page_nid, head, entry);\n"
+ "+\treturn __autonuma_list_add(page_nid, page, head,\n"
+ "+\t\t\t\t   entry_head->anl_prev_pfn, entry);\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * autonuma_list_del - deletes entry from list.\n"
+ "+ * @entry: the element to delete from the list.\n"
+ "+ */\n"
+ "+extern void autonuma_list_del(int page_nid,\n"
+ "+\t\t\t      struct autonuma_list_head *entry,\n"
+ "+\t\t\t      struct autonuma_list_head *head);\n"
+ "+\n"
+ "+extern bool autonuma_list_empty(const struct autonuma_list_head *head);\n"
+ "+\n"
+ "+#if 0 /* not needed so far */\n"
+ "+/*\n"
+ "+ * autonuma_list_is_singular - tests whether a list has just one entry.\n"
+ "+ * @head: the list to test.\n"
+ "+ */\n"
+ "+static inline int autonuma_list_is_singular(const struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\treturn !autonuma_list_empty(head) &&\n"
+ "+\t\t(head->anl_next_pfn == head->anl_prev_pfn);\n"
+ "+}\n"
+ "+#endif\n"
+ "+\n"
+ "+#endif /* __AUTONUMA_LIST_H */\n"
+ "diff --git a/include/linux/autonuma_types.h b/include/linux/autonuma_types.h\n"
+ "index 6662990..1abde9c5 100644\n"
+ "--- a/include/linux/autonuma_types.h\n"
+ "+++ b/include/linux/autonuma_types.h\n"
+ "@@ -4,6 +4,7 @@\n"
+ " #ifdef CONFIG_AUTONUMA\n"
+ " \n"
+ " #include <linux/numa.h>\n"
+ "+#include <linux/autonuma_list.h>\n"
+ " \n"
+ " /*\n"
+ "  * Per-mm (process) structure dynamically allocated only if autonuma\n"
+ "@@ -45,15 +46,36 @@ struct task_autonuma {\n"
+ " /*\n"
+ "  * Per page (or per-pageblock) structure dynamically allocated only if\n"
+ "  * autonuma is not impossible.\n"
+ "+ *\n"
+ "+ * This structure takes 12 bytes per page for all architectures. There\n"
+ "+ * are two constraints to make this work:\n"
+ "+ *\n"
+ "+ * 1) the build will abort if * MAX_NUMNODES is too big according to\n"
+ "+ *    the #error check below\n"
+ "+ *\n"
+ "+ * 2) AutoNUMA will not succeed to insert into the migration queue any\n"
+ "+ *    page whose pfn offset value (offset with respect to the first\n"
+ "+ *    pfn of the node) is bigger than AUTONUMA_LIST_MAX_PFN_OFFSET\n"
+ "+ *    (NOTE: AUTONUMA_LIST_MAX_PFN_OFFSET is still a valid pfn offset\n"
+ "+ *    value). This means with huge node sizes and small PAGE_SIZE,\n"
+ "+ *    some pages may not be allowed to be migrated.\n"
+ "  */\n"
+ " struct page_autonuma {\n"
+ " \t/*\n"
+ " \t * To modify autonuma_last_nid lockless the architecture,\n"
+ " \t * needs SMP atomic granularity < sizeof(long), not all archs\n"
+ "-\t * have that, notably some alpha. Archs without that requires\n"
+ "+\t * have that, notably some ancient alpha (but none of those\n"
+ "+\t * should run in NUMA systems). Archs without that requires\n"
+ " \t * autonuma_last_nid to be a long.\n"
+ " \t */\n"
+ "-#if BITS_PER_LONG > 32\n"
+ "+#if MAX_NUMNODES > 32767\n"
+ "+\t/*\n"
+ "+\t * Verify at build time that int16_t for autonuma_migrate_nid\n"
+ "+\t * and autonuma_last_nid won't risk to overflow, max allowed\n"
+ "+\t * nid value is (2**15)-1.\n"
+ "+\t */\n"
+ "+#error \"too many nodes\"\n"
+ "+#endif\n"
+ " \t/*\n"
+ " \t * autonuma_migrate_nid is -1 if the page_autonuma structure\n"
+ " \t * is not linked into any\n"
+ "@@ -63,7 +85,7 @@ struct page_autonuma {\n"
+ " \t * page_nid is the nid that the page (referenced by the\n"
+ " \t * page_autonuma structure) belongs to.\n"
+ " \t */\n"
+ "-\tint autonuma_migrate_nid;\n"
+ "+\tint16_t autonuma_migrate_nid;\n"
+ " \t/*\n"
+ " \t * autonuma_last_nid records which is the NUMA nid that tried\n"
+ " \t * to access this page at the last NUMA hinting page fault.\n"
+ "@@ -72,28 +94,14 @@ struct page_autonuma {\n"
+ " \t * it will make different threads trashing on the same pages,\n"
+ " \t * converge on the same NUMA node (if possible).\n"
+ " \t */\n"
+ "-\tint autonuma_last_nid;\n"
+ "-#else\n"
+ "-#if MAX_NUMNODES >= 32768\n"
+ "-#error \"too many nodes\"\n"
+ "-#endif\n"
+ "-\tshort autonuma_migrate_nid;\n"
+ "-\tshort autonuma_last_nid;\n"
+ "-#endif\n"
+ "+\tint16_t autonuma_last_nid;\n"
+ "+\n"
+ " \t/*\n"
+ " \t * This is the list node that links the page (referenced by\n"
+ " \t * the page_autonuma structure) in the\n"
+ " \t * &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid] lru.\n"
+ " \t */\n"
+ "-\tstruct list_head autonuma_migrate_node;\n"
+ "-\n"
+ "-\t/*\n"
+ "-\t * To find the page starting from the autonuma_migrate_node we\n"
+ "-\t * need a backlink.\n"
+ "-\t *\n"
+ "-\t * FIXME: drop it;\n"
+ "-\t */\n"
+ "-\tstruct page *page;\n"
+ "+\tstruct autonuma_list_head autonuma_migrate_node;\n"
+ " };\n"
+ " \n"
+ " extern int alloc_task_autonuma(struct task_struct *tsk,\n"
+ "diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h\n"
+ "index ed5b0c0..acefdfa 100644\n"
+ "--- a/include/linux/mmzone.h\n"
+ "+++ b/include/linux/mmzone.h\n"
+ "@@ -17,6 +17,7 @@\n"
+ " #include <linux/pageblock-flags.h>\n"
+ " #include <generated/bounds.h>\n"
+ " #include <linux/atomic.h>\n"
+ "+#include <linux/autonuma_list.h>\n"
+ " #include <asm/page.h>\n"
+ " \n"
+ " /* Free memory management - zoned buddy allocator.  */\n"
+ "@@ -710,7 +711,7 @@ typedef struct pglist_data {\n"
+ " \t * <linux/page_autonuma.h> and the below field must remain the\n"
+ " \t * last one of this structure.\n"
+ " \t */\n"
+ "-\tstruct list_head autonuma_migrate_head[0];\n"
+ "+\tstruct autonuma_list_head autonuma_migrate_head[0];\n"
+ " #endif\n"
+ " } pg_data_t;\n"
+ " \n"
+ "diff --git a/include/linux/page_autonuma.h b/include/linux/page_autonuma.h\n"
+ "index bc7a629..e78beda 100644\n"
+ "--- a/include/linux/page_autonuma.h\n"
+ "+++ b/include/linux/page_autonuma.h\n"
+ "@@ -53,7 +53,7 @@ extern void __init sparse_early_page_autonuma_alloc_node(struct page_autonuma **\n"
+ " /* inline won't work here */\n"
+ " #define autonuma_pglist_data_size() (sizeof(struct pglist_data) +\t\\\n"
+ " \t\t\t\t     (autonuma_impossible() ? 0 :\t\\\n"
+ "-\t\t\t\t      sizeof(struct list_head) * \\\n"
+ "+\t\t\t\t      sizeof(struct autonuma_list_head) * \\\n"
+ " \t\t\t\t      num_possible_nodes()))\n"
+ " \n"
+ " #endif /* _LINUX_PAGE_AUTONUMA_H */\n"
+ "diff --git a/mm/Makefile b/mm/Makefile\n"
+ "index a4d8354..4aa90d4 100644\n"
+ "--- a/mm/Makefile\n"
+ "+++ b/mm/Makefile\n"
+ "@@ -33,7 +33,7 @@ obj-$(CONFIG_FRONTSWAP)\t+= frontswap.o\n"
+ " obj-$(CONFIG_HAS_DMA)\t+= dmapool.o\n"
+ " obj-$(CONFIG_HUGETLBFS)\t+= hugetlb.o\n"
+ " obj-$(CONFIG_NUMA) \t+= mempolicy.o\n"
+ "-obj-$(CONFIG_AUTONUMA) \t+= autonuma.o page_autonuma.o\n"
+ "+obj-$(CONFIG_AUTONUMA) \t+= autonuma.o page_autonuma.o autonuma_list.o\n"
+ " obj-$(CONFIG_SPARSEMEM)\t+= sparse.o\n"
+ " obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o\n"
+ " obj-$(CONFIG_SLOB) += slob.o\n"
+ "diff --git a/mm/autonuma.c b/mm/autonuma.c\n"
+ "index 9834f5d..8aed9af 100644\n"
+ "--- a/mm/autonuma.c\n"
+ "+++ b/mm/autonuma.c\n"
+ "@@ -89,12 +89,21 @@ void autonuma_migrate_split_huge_page(struct page *page,\n"
+ " \tVM_BUG_ON(nid < -1);\n"
+ " \tVM_BUG_ON(page_tail_autonuma->autonuma_migrate_nid != -1);\n"
+ " \tif (nid >= 0) {\n"
+ "-\t\tVM_BUG_ON(page_to_nid(page) != page_to_nid(page_tail));\n"
+ "+\t\tint page_nid = page_to_nid(page);\n"
+ "+\t\tstruct autonuma_list_head *head;\n"
+ "+\t\tautonuma_list_entry entry;\n"
+ "+\t\tentry = autonuma_page_to_list_entry(page_nid, page);\n"
+ "+\t\thead = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];\n"
+ "+\t\tVM_BUG_ON(page_nid != page_to_nid(page_tail));\n"
+ "+\t\tVM_BUG_ON(page_nid == nid);\n"
+ " \n"
+ " \t\tcompound_lock(page_tail);\n"
+ " \t\tautonuma_migrate_lock(nid);\n"
+ "-\t\tlist_add_tail(&page_tail_autonuma->autonuma_migrate_node,\n"
+ "-\t\t\t      &page_autonuma->autonuma_migrate_node);\n"
+ "+\t\tif (!autonuma_list_add_tail(page_nid,\n"
+ "+\t\t\t\t\t    page_tail,\n"
+ "+\t\t\t\t\t    entry,\n"
+ "+\t\t\t\t\t    head))\n"
+ "+\t\t\tBUG();\n"
+ " \t\tautonuma_migrate_unlock(nid);\n"
+ " \n"
+ " \t\tpage_tail_autonuma->autonuma_migrate_nid = nid;\n"
+ "@@ -119,8 +128,15 @@ void __autonuma_migrate_page_remove(struct page *page,\n"
+ " \tVM_BUG_ON(nid < -1);\n"
+ " \tif (nid >= 0) {\n"
+ " \t\tint numpages = hpage_nr_pages(page);\n"
+ "+\t\tint page_nid = page_to_nid(page);\n"
+ "+\t\tstruct autonuma_list_head *head;\n"
+ "+\t\tVM_BUG_ON(nid == page_nid);\n"
+ "+\t\thead = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];\n"
+ "+\n"
+ " \t\tautonuma_migrate_lock(nid);\n"
+ "-\t\tlist_del(&page_autonuma->autonuma_migrate_node);\n"
+ "+\t\tautonuma_list_del(page_nid,\n"
+ "+\t\t\t\t  &page_autonuma->autonuma_migrate_node,\n"
+ "+\t\t\t\t  head);\n"
+ " \t\tNODE_DATA(nid)->autonuma_nr_migrate_pages -= numpages;\n"
+ " \t\tautonuma_migrate_unlock(nid);\n"
+ " \n"
+ "@@ -139,6 +155,8 @@ static void __autonuma_migrate_page_add(struct page *page,\n"
+ " \tint numpages;\n"
+ " \tunsigned long nr_migrate_pages;\n"
+ " \twait_queue_head_t *wait_queue;\n"
+ "+\tstruct autonuma_list_head *head;\n"
+ "+\tbool added;\n"
+ " \n"
+ " \tVM_BUG_ON(dst_nid >= MAX_NUMNODES);\n"
+ " \tVM_BUG_ON(dst_nid < -1);\n"
+ "@@ -155,25 +173,33 @@ static void __autonuma_migrate_page_add(struct page *page,\n"
+ " \tVM_BUG_ON(nid >= MAX_NUMNODES);\n"
+ " \tVM_BUG_ON(nid < -1);\n"
+ " \tif (nid >= 0) {\n"
+ "+\t\tVM_BUG_ON(nid == page_nid);\n"
+ "+\t\thead = &NODE_DATA(nid)->autonuma_migrate_head[page_nid];\n"
+ "+\n"
+ " \t\tautonuma_migrate_lock(nid);\n"
+ "-\t\tlist_del(&page_autonuma->autonuma_migrate_node);\n"
+ "+\t\tautonuma_list_del(page_nid,\n"
+ "+\t\t\t\t  &page_autonuma->autonuma_migrate_node,\n"
+ "+\t\t\t\t  head);\n"
+ " \t\tNODE_DATA(nid)->autonuma_nr_migrate_pages -= numpages;\n"
+ " \t\tautonuma_migrate_unlock(nid);\n"
+ " \t}\n"
+ " \n"
+ "+\thead = &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid];\n"
+ "+\n"
+ " \tautonuma_migrate_lock(dst_nid);\n"
+ "-\tlist_add(&page_autonuma->autonuma_migrate_node,\n"
+ "-\t\t &NODE_DATA(dst_nid)->autonuma_migrate_head[page_nid]);\n"
+ "-\tNODE_DATA(dst_nid)->autonuma_nr_migrate_pages += numpages;\n"
+ "+\tadded = autonuma_list_add(page_nid, page, AUTONUMA_LIST_HEAD, head);\n"
+ "+\tif (added)\n"
+ "+\t\tNODE_DATA(dst_nid)->autonuma_nr_migrate_pages += numpages;\n"
+ " \tnr_migrate_pages = NODE_DATA(dst_nid)->autonuma_nr_migrate_pages;\n"
+ " \n"
+ " \tautonuma_migrate_unlock(dst_nid);\n"
+ " \n"
+ "-\tpage_autonuma->autonuma_migrate_nid = dst_nid;\n"
+ "+\tif (added)\n"
+ "+\t\tpage_autonuma->autonuma_migrate_nid = dst_nid;\n"
+ " \n"
+ " \tcompound_unlock_irqrestore(page, flags);\n"
+ " \n"
+ "-\tif (!autonuma_migrate_defer()) {\n"
+ "+\tif (added && !autonuma_migrate_defer()) {\n"
+ " \t\twait_queue = &NODE_DATA(dst_nid)->autonuma_knuma_migrated_wait;\n"
+ " \t\tif (nr_migrate_pages >= pages_to_migrate &&\n"
+ " \t\t    nr_migrate_pages - numpages < pages_to_migrate &&\n"
+ "@@ -813,7 +839,7 @@ static int isolate_migratepages(struct list_head *migratepages,\n"
+ " \t\t\t\tstruct pglist_data *pgdat)\n"
+ " {\n"
+ " \tint nr = 0, nid;\n"
+ "-\tstruct list_head *heads = pgdat->autonuma_migrate_head;\n"
+ "+\tstruct autonuma_list_head *heads = pgdat->autonuma_migrate_head;\n"
+ " \n"
+ " \t/* FIXME: THP balancing, restart from last nid */\n"
+ " \tfor_each_online_node(nid) {\n"
+ "@@ -825,10 +851,10 @@ static int isolate_migratepages(struct list_head *migratepages,\n"
+ " \t\tcond_resched();\n"
+ " \t\tVM_BUG_ON(numa_node_id() != pgdat->node_id);\n"
+ " \t\tif (nid == pgdat->node_id) {\n"
+ "-\t\t\tVM_BUG_ON(!list_empty(&heads[nid]));\n"
+ "+\t\t\tVM_BUG_ON(!autonuma_list_empty(&heads[nid]));\n"
+ " \t\t\tcontinue;\n"
+ " \t\t}\n"
+ "-\t\tif (list_empty(&heads[nid]))\n"
+ "+\t\tif (autonuma_list_empty(&heads[nid]))\n"
+ " \t\t\tcontinue;\n"
+ " \t\t/* some page wants to go to this pgdat */\n"
+ " \t\t/*\n"
+ "@@ -840,22 +866,29 @@ static int isolate_migratepages(struct list_head *migratepages,\n"
+ " \t\t * irqs.\n"
+ " \t\t */\n"
+ " \t\tautonuma_migrate_lock_irq(pgdat->node_id);\n"
+ "-\t\tif (list_empty(&heads[nid])) {\n"
+ "+\t\tif (autonuma_list_empty(&heads[nid])) {\n"
+ " \t\t\tautonuma_migrate_unlock_irq(pgdat->node_id);\n"
+ " \t\t\tcontinue;\n"
+ " \t\t}\n"
+ "-\t\tpage_autonuma = list_entry(heads[nid].prev,\n"
+ "-\t\t\t\t\t   struct page_autonuma,\n"
+ "-\t\t\t\t\t   autonuma_migrate_node);\n"
+ "-\t\tpage = page_autonuma->page;\n"
+ "+\t\tpage = autonuma_list_entry_to_page(nid,\n"
+ "+\t\t\t\t\t\t   heads[nid].anl_prev_pfn);\n"
+ "+\t\tpage_autonuma = lookup_page_autonuma(page);\n"
+ " \t\tif (unlikely(!get_page_unless_zero(page))) {\n"
+ "+\t\t\tint page_nid = page_to_nid(page);\n"
+ "+\t\t\tstruct autonuma_list_head *entry_head;\n"
+ "+\t\t\tVM_BUG_ON(nid == page_nid);\n"
+ "+\n"
+ " \t\t\t/*\n"
+ " \t\t\t * Is getting freed and will remove self from the\n"
+ " \t\t\t * autonuma list shortly, skip it for now.\n"
+ " \t\t\t */\n"
+ "-\t\t\tlist_del(&page_autonuma->autonuma_migrate_node);\n"
+ "-\t\t\tlist_add(&page_autonuma->autonuma_migrate_node,\n"
+ "-\t\t\t\t &heads[nid]);\n"
+ "+\t\t\tentry_head = &page_autonuma->autonuma_migrate_node;\n"
+ "+\t\t\tautonuma_list_del(page_nid, entry_head,\n"
+ "+\t\t\t\t\t  &heads[nid]);\n"
+ "+\t\t\tif (!autonuma_list_add(page_nid, page,\n"
+ "+\t\t\t\t\t       AUTONUMA_LIST_HEAD,\n"
+ "+\t\t\t\t\t       &heads[nid]))\n"
+ "+\t\t\t\tBUG();\n"
+ " \t\t\tautonuma_migrate_unlock_irq(pgdat->node_id);\n"
+ " \t\t\tautonuma_printk(\"autonuma migrate page is free\\n\");\n"
+ " \t\t\tcontinue;\n"
+ "diff --git a/mm/autonuma_list.c b/mm/autonuma_list.c\n"
+ "new file mode 100644\n"
+ "index 0000000..2c840f7\n"
+ "--- /dev/null\n"
+ "+++ b/mm/autonuma_list.c\n"
+ "@@ -0,0 +1,167 @@\n"
+ "+/*\n"
+ "+ * Copyright 2006, Red Hat, Inc., Dave Jones\n"
+ "+ * Copyright 2012, Red Hat, Inc.\n"
+ "+ * Released under the General Public License (GPL).\n"
+ "+ *\n"
+ "+ * This file contains the linked list implementations for\n"
+ "+ * autonuma migration lists.\n"
+ "+ */\n"
+ "+\n"
+ "+#include <linux/mm.h>\n"
+ "+#include <linux/autonuma.h>\n"
+ "+\n"
+ "+/*\n"
+ "+ * Insert a new entry between two known consecutive entries.\n"
+ "+ *\n"
+ "+ * This is only for internal list manipulation where we know\n"
+ "+ * the prev/next entries already!\n"
+ "+ *\n"
+ "+ * return true if succeeded, or false if the (page_nid, pfn_offset)\n"
+ "+ * pair couldn't represent the pfn and the list_add didn't succeed.\n"
+ "+ */\n"
+ "+bool __autonuma_list_add(int page_nid,\n"
+ "+\t\t\t struct page *page,\n"
+ "+\t\t\t struct autonuma_list_head *head,\n"
+ "+\t\t\t autonuma_list_entry prev,\n"
+ "+\t\t\t autonuma_list_entry next)\n"
+ "+{\n"
+ "+\tautonuma_list_entry new;\n"
+ "+\n"
+ "+\tVM_BUG_ON(page_nid != page_to_nid(page));\n"
+ "+\tnew = autonuma_page_to_list_entry(page_nid, page);\n"
+ "+\tif (new > AUTONUMA_LIST_MAX_PFN_OFFSET)\n"
+ "+\t\treturn false;\n"
+ "+\n"
+ "+\tWARN(new == prev || new == next,\n"
+ "+\t     \"autonuma_list_add double add: new=%u, prev=%u, next=%u.\\n\",\n"
+ "+\t     new, prev, next);\n"
+ "+\n"
+ "+\t__autonuma_list_head(page_nid, head, next)->anl_prev_pfn = new;\n"
+ "+\t__autonuma_list_head(page_nid, head, new)->anl_next_pfn = next;\n"
+ "+\t__autonuma_list_head(page_nid, head, new)->anl_prev_pfn = prev;\n"
+ "+\t__autonuma_list_head(page_nid, head, prev)->anl_next_pfn = new;\n"
+ "+\treturn true;\n"
+ "+}\n"
+ "+\n"
+ "+static inline void __autonuma_list_del_entry(int page_nid,\n"
+ "+\t\t\t\t\t     struct autonuma_list_head *entry,\n"
+ "+\t\t\t\t\t     struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\tautonuma_list_entry prev, next;\n"
+ "+\n"
+ "+\tprev = entry->anl_prev_pfn;\n"
+ "+\tnext = entry->anl_next_pfn;\n"
+ "+\n"
+ "+\tif (WARN(next == AUTONUMA_LIST_POISON1,\n"
+ "+\t\t \"autonuma_list_del corruption, \"\n"
+ "+\t\t \"%p->anl_next_pfn is AUTONUMA_LIST_POISON1 (%u)\\n\",\n"
+ "+\t\tentry, AUTONUMA_LIST_POISON1) ||\n"
+ "+\t    WARN(prev == AUTONUMA_LIST_POISON2,\n"
+ "+\t\t\"autonuma_list_del corruption, \"\n"
+ "+\t\t \"%p->anl_prev_pfn is AUTONUMA_LIST_POISON2 (%u)\\n\",\n"
+ "+\t\tentry, AUTONUMA_LIST_POISON2))\n"
+ "+\t\treturn;\n"
+ "+\n"
+ "+\t__autonuma_list_head(page_nid, head, next)->anl_prev_pfn = prev;\n"
+ "+\t__autonuma_list_head(page_nid, head, prev)->anl_next_pfn = next;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * autonuma_list_del - deletes entry from list.\n"
+ "+ *\n"
+ "+ * Note: autonuma_list_empty on entry does not return true after this,\n"
+ "+ * the entry is in an undefined state.\n"
+ "+ */\n"
+ "+void autonuma_list_del(int page_nid, struct autonuma_list_head *entry,\n"
+ "+\t\t       struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\t__autonuma_list_del_entry(page_nid, entry, head);\n"
+ "+\tentry->anl_next_pfn = AUTONUMA_LIST_POISON1;\n"
+ "+\tentry->anl_prev_pfn = AUTONUMA_LIST_POISON2;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * autonuma_list_empty - tests whether a list is empty\n"
+ "+ * @head: the list to test.\n"
+ "+ */\n"
+ "+bool autonuma_list_empty(const struct autonuma_list_head *head)\n"
+ "+{\n"
+ "+\tbool ret = false;\n"
+ "+\tif (head->anl_next_pfn == AUTONUMA_LIST_HEAD) {\n"
+ "+\t\tret = true;\n"
+ "+\t\tBUG_ON(head->anl_prev_pfn != AUTONUMA_LIST_HEAD);\n"
+ "+\t}\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+/* abstraction conversion methods */\n"
+ "+\n"
+ "+static inline struct page *__autonuma_list_entry_to_page(int page_nid,\n"
+ "+\t\t\t\t\t\t\t autonuma_list_entry pfn_offset)\n"
+ "+{\n"
+ "+\tstruct pglist_data *pgdat = NODE_DATA(page_nid);\n"
+ "+\tunsigned long pfn = pgdat->node_start_pfn + pfn_offset;\n"
+ "+\treturn pfn_to_page(pfn);\n"
+ "+}\n"
+ "+\n"
+ "+struct page *autonuma_list_entry_to_page(int page_nid,\n"
+ "+\t\t\t\t\t autonuma_list_entry pfn_offset)\n"
+ "+{\n"
+ "+\tVM_BUG_ON(page_nid < 0);\n"
+ "+\tBUG_ON(pfn_offset == AUTONUMA_LIST_POISON1);\n"
+ "+\tBUG_ON(pfn_offset == AUTONUMA_LIST_POISON2);\n"
+ "+\tBUG_ON(pfn_offset == AUTONUMA_LIST_HEAD);\n"
+ "+\treturn __autonuma_list_entry_to_page(page_nid, pfn_offset);\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * returns a value above AUTONUMA_LIST_MAX_PFN_OFFSET if the pfn is\n"
+ "+ * located a too big offset from the start of the node and cannot be\n"
+ "+ * represented by the (page_nid, pfn_offset) pair.\n"
+ "+ */\n"
+ "+autonuma_list_entry autonuma_page_to_list_entry(int page_nid,\n"
+ "+\t\t\t\t\t\tstruct page *page)\n"
+ "+{\n"
+ "+\tunsigned long pfn = page_to_pfn(page);\n"
+ "+\tstruct pglist_data *pgdat = NODE_DATA(page_nid);\n"
+ "+\tVM_BUG_ON(page_nid != page_to_nid(page));\n"
+ "+\tBUG_ON(pfn < pgdat->node_start_pfn);\n"
+ "+\tpfn -= pgdat->node_start_pfn;\n"
+ "+\tif (pfn > AUTONUMA_LIST_MAX_PFN_OFFSET) {\n"
+ "+\t\tWARN_ONCE(1, \"autonuma_page_to_list_entry: \"\n"
+ "+\t\t\t  \"pfn_offset  %lu, pgdat %p, \"\n"
+ "+\t\t\t  \"pgdat->node_start_pfn %lu\\n\",\n"
+ "+\t\t\t  pfn, pgdat, pgdat->node_start_pfn);\n"
+ "+\t\t/*\n"
+ "+\t\t * Any value bigger than AUTONUMA_LIST_MAX_PFN_OFFSET\n"
+ "+\t\t * will work as an error retval, but better pick one\n"
+ "+\t\t * that will cause noise if computed wrong by the\n"
+ "+\t\t * caller.\n"
+ "+\t\t */\n"
+ "+\t\treturn AUTONUMA_LIST_POISON1;\n"
+ "+\t}\n"
+ "+\treturn pfn; /* convert to uint16_t without losing information */\n"
+ "+}\n"
+ "+\n"
+ "+static inline struct autonuma_list_head *____autonuma_list_head(int page_nid,\n"
+ "+\t\t\t\t\tautonuma_list_entry pfn_offset)\n"
+ "+{\n"
+ "+\tstruct pglist_data *pgdat = NODE_DATA(page_nid);\n"
+ "+\tunsigned long pfn = pgdat->node_start_pfn + pfn_offset;\n"
+ "+\tstruct page *page = pfn_to_page(pfn);\n"
+ "+\tstruct page_autonuma *page_autonuma = lookup_page_autonuma(page);\n"
+ "+\treturn &page_autonuma->autonuma_migrate_node;\n"
+ "+}\n"
+ "+\n"
+ "+struct autonuma_list_head *__autonuma_list_head(int page_nid,\n"
+ "+\t\t\t\t\tstruct autonuma_list_head *head,\n"
+ "+\t\t\t\t\tautonuma_list_entry pfn_offset)\n"
+ "+{\n"
+ "+\tVM_BUG_ON(page_nid < 0);\n"
+ "+\tBUG_ON(pfn_offset == AUTONUMA_LIST_POISON1);\n"
+ "+\tBUG_ON(pfn_offset == AUTONUMA_LIST_POISON2);\n"
+ "+\tif (pfn_offset != AUTONUMA_LIST_HEAD)\n"
+ "+\t\treturn ____autonuma_list_head(page_nid, pfn_offset);\n"
+ "+\telse\n"
+ "+\t\treturn head;\n"
+ "+}\n"
+ "diff --git a/mm/page_autonuma.c b/mm/page_autonuma.c\n"
+ "index f929d81..151f25c 100644\n"
+ "--- a/mm/page_autonuma.c\n"
+ "+++ b/mm/page_autonuma.c\n"
+ "@@ -12,7 +12,6 @@ void __meminit page_autonuma_map_init(struct page *page,\n"
+ " \tfor (end = page + nr_pages; page < end; page++, page_autonuma++) {\n"
+ " \t\tpage_autonuma->autonuma_last_nid = -1;\n"
+ " \t\tpage_autonuma->autonuma_migrate_nid = -1;\n"
+ "-\t\tpage_autonuma->page = page;\n"
+ " \t}\n"
+ " }\n"
+ " \n"
+ "@@ -20,12 +19,18 @@ static void __meminit __pgdat_autonuma_init(struct pglist_data *pgdat)\n"
+ " {\n"
+ " \tint node_iter;\n"
+ " \n"
+ "+\t/* verify the per-page page_autonuma 12 byte fixed cost */\n"
+ "+\tBUILD_BUG_ON((unsigned long) &((struct page_autonuma *)0)[1] != 12);\n"
+ "+\n"
+ " \tspin_lock_init(&pgdat->autonuma_lock);\n"
+ " \tinit_waitqueue_head(&pgdat->autonuma_knuma_migrated_wait);\n"
+ " \tpgdat->autonuma_nr_migrate_pages = 0;\n"
+ " \tif (!autonuma_impossible())\n"
+ "-\t\tfor_each_node(node_iter)\n"
+ "-\t\t\tINIT_LIST_HEAD(&pgdat->autonuma_migrate_head[node_iter]);\n"
+ "+\t\tfor_each_node(node_iter) {\n"
+ "+\t\t\tstruct autonuma_list_head *head;\n"
+ "+\t\t\thead = &pgdat->autonuma_migrate_head[node_iter];\n"
+ "+\t\t\tAUTONUMA_INIT_LIST_HEAD(head);\n"
+ "+\t\t}\n"
+ " }\n"
+ " \n"
+ " #if !defined(CONFIG_SPARSEMEM)\n"
+ "@@ -112,10 +117,6 @@ struct page_autonuma *lookup_page_autonuma(struct page *page)\n"
+ " \tunsigned long pfn = page_to_pfn(page);\n"
+ " \tstruct mem_section *section = __pfn_to_section(pfn);\n"
+ " \n"
+ "-\t/* if it's not a power of two we may be wasting memory */\n"
+ "-\tBUILD_BUG_ON(SECTION_PAGE_AUTONUMA_SIZE &\n"
+ "-\t\t     (SECTION_PAGE_AUTONUMA_SIZE-1));\n"
+ "-\n"
+ " #ifdef CONFIG_DEBUG_VM\n"
+ " \t/*\n"
+ " \t * The sanity checks the page allocator does upon freeing a"
 
-7f220ba1c43a6d37c22388909daf9dfa12d352f7d03ea336f750748b5e4ce48f
+c36af17d97bc4e141b8c77c815b3a66b504017e70b78c685b71b38654cdff5b3

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.