From: Mel Gorman <mgorman@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Nathan Zimmer <nzimmer@sgi.com>,
Dave Hansen <dave.hansen@intel.com>,
Waiman Long <waiman.long@hp.com>,
Scott Norton <scott.norton@hp.com>,
Daniel J Blueman <daniel@numascale.com>,
Linux-MM <linux-mm@kvack.org>,
LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 07/13] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set
Date: Tue, 28 Apr 2015 15:37:04 +0100 [thread overview]
Message-ID: <1430231830-7702-8-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1430231830-7702-1-git-send-email-mgorman@suse.de>
This patch initalises all low memory struct pages and 2G of the highest zone
on each node during memory initialisation if CONFIG_DEFERRED_STRUCT_PAGE_INIT
is set. That config option cannot be set but will be available in a later
patch. Parallel initialisation of struct page depends on some features
from memory hotplug and it is necessary to alter alter section annotations.
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
drivers/base/node.c | 6 +++-
include/linux/mmzone.h | 8 ++++++
mm/Kconfig | 18 ++++++++++++
mm/internal.h | 14 +++++++++
mm/page_alloc.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++--
5 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 36fabe43cd44..97ab2c4dd39e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -361,12 +361,16 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
#define page_initialized(page) (page->lru.next)
-static int get_nid_for_pfn(unsigned long pfn)
+static int __init_refok get_nid_for_pfn(unsigned long pfn)
{
struct page *page;
if (!pfn_valid_within(pfn))
return -1;
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ if (system_state == SYSTEM_BOOTING)
+ return early_pfn_to_nid(pfn);
+#endif
page = pfn_to_page(pfn);
if (!page_initialized(page))
return -1;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e3d8a2bd8d78..4882c53b70b5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -762,6 +762,14 @@ typedef struct pglist_data {
/* Number of pages migrated during the rate limiting time interval */
unsigned long numabalancing_migrate_nr_pages;
#endif
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+ /*
+ * If memory initialisation on large machines is deferred then this
+ * is the first PFN that needs to be initialised.
+ */
+ unsigned long first_deferred_pfn;
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
diff --git a/mm/Kconfig b/mm/Kconfig
index a03131b6ba8e..3e40cb64e226 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -629,3 +629,21 @@ config MAX_STACK_SIZE_MB
changed to a smaller value in which case that is used.
A sane initial value is 80 MB.
+
+# For architectures that support deferred memory initialisation
+config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+ bool
+
+config DEFERRED_STRUCT_PAGE_INIT
+ bool "Defer initialisation of struct pages to kswapd"
+ default n
+ depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+ depends on MEMORY_HOTPLUG
+ help
+ Ordinarily all struct pages are initialised during early boot in a
+ single thread. On very large machines this can take a considerable
+ amount of time. If this option is set, large machines will bring up
+ a subset of memmap at boot and then initialise the rest in parallel
+ when kswapd starts. This has a potential performance impact on
+ processes running early in the lifetime of the systemm until kswapd
+ finishes the initialisation.
diff --git a/mm/internal.h b/mm/internal.h
index 76b605139c7a..24314b671db1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -385,6 +385,20 @@ static inline void mminit_verify_zonelist(void)
}
#endif /* CONFIG_DEBUG_MEMORY_INIT */
+/*
+ * Deferred struct page initialisation requires some early init functions that
+ * are removed before kswapd is up and running. The feature depends on memory
+ * hotplug so put the data and code required by deferred initialisation into
+ * the __meminit section where they are preserved.
+ */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+#define __defermem_init __meminit
+#define __defer_init __meminit
+#else
+#define __defermem_init
+#define __defer_init __init
+#endif
+
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
#if defined(CONFIG_SPARSEMEM)
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bb99c7e66da5..8ec493a24b9c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -235,6 +235,64 @@ EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly;
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static inline void reset_deferred_meminit(pg_data_t *pgdat)
+{
+ pgdat->first_deferred_pfn = ULONG_MAX;
+}
+
+/* Returns true if the struct page for the pfn is uninitialised */
+static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
+{
+ int nid = early_pfn_to_nid(pfn);
+
+ if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
+ return true;
+
+ return false;
+}
+
+/*
+ * Returns false when the remaining initialisation should be deferred until
+ * later in the boot cycle when it can be parallelised.
+ */
+static inline bool update_defer_init(pg_data_t *pgdat,
+ unsigned long pfn, unsigned long zone_end,
+ unsigned long *nr_initialised)
+{
+ /* Always populate low zones for address-contrained allocations */
+ if (zone_end < pgdat_end_pfn(pgdat))
+ return true;
+
+ /* Initialise at least 2G of the highest zone */
+ (*nr_initialised)++;
+ if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
+ (pfn & (PAGES_PER_SECTION - 1)) == 0) {
+ pgdat->first_deferred_pfn = pfn;
+ return false;
+ }
+
+ return true;
+}
+#else
+static inline void reset_deferred_meminit(pg_data_t *pgdat)
+{
+}
+
+static inline bool early_page_uninitialised(unsigned long pfn)
+{
+ return false;
+}
+
+static inline bool update_defer_init(pg_data_t *pgdat,
+ unsigned long pfn, unsigned long zone_end,
+ unsigned long *nr_initialised)
+{
+ return true;
+}
+#endif
+
+
void set_pageblock_migratetype(struct page *page, int migratetype)
{
if (unlikely(page_group_by_mobility_disabled &&
@@ -892,8 +950,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_irq_restore(flags);
}
-void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
- unsigned int order)
+static void __defer_init __free_pages_boot_core(struct page *page,
+ unsigned long pfn, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
@@ -952,6 +1010,14 @@ static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
}
#endif
+void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
+ unsigned int order)
+{
+ if (early_page_uninitialised(pfn))
+ return;
+ return __free_pages_boot_core(page, pfn, order);
+}
+
#ifdef CONFIG_CMA
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
@@ -4224,14 +4290,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
+ pg_data_t *pgdat = NODE_DATA(nid);
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
struct zone *z;
+ unsigned long nr_initialised = 0;
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
- z = &NODE_DATA(nid)->node_zones[zone];
+ z = &pgdat->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
* There can be holes in boot-time mem_map[]s
@@ -4243,6 +4311,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
continue;
if (!early_pfn_in_nid(pfn, nid))
continue;
+ if (!update_defer_init(pgdat, pfn, end_pfn,
+ &nr_initialised))
+ break;
}
__init_single_pfn(pfn, zone, nid);
}
@@ -5044,6 +5115,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
/* pg_data_t should be reset to zero when it's allocated */
WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
+ reset_deferred_meminit(pgdat);
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
--
2.3.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2015-04-28 14:37 UTC|newest]
Thread overview: 87+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-28 14:36 [PATCH 0/13] Parallel struct page initialisation v4 Mel Gorman
2015-04-28 14:36 ` [PATCH 01/13] memblock: Introduce a for_each_reserved_mem_region iterator Mel Gorman
2015-04-28 14:36 ` [PATCH 02/13] mm: meminit: Move page initialization into a separate function Mel Gorman
2015-04-28 14:37 ` [PATCH 03/13] mm: meminit: Only set page reserved in the memblock region Mel Gorman
2015-05-22 20:31 ` Tony Luck
2015-05-26 10:22 ` Mel Gorman
2015-04-28 14:37 ` [PATCH 04/13] mm: page_alloc: Pass PFN to __free_pages_bootmem Mel Gorman
2015-05-01 9:20 ` [PATCH] mm: page_alloc: pass PFN to __free_pages_bootmem -fix Mel Gorman
2015-04-28 14:37 ` [PATCH 05/13] mm: meminit: Make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid Mel Gorman
2015-04-28 14:37 ` [PATCH 06/13] mm: meminit: Inline some helper functions Mel Gorman
2015-04-30 21:53 ` Andrew Morton
2015-04-30 21:55 ` Andrew Morton
2015-05-04 8:33 ` Michal Hocko
2015-05-04 8:38 ` Michal Hocko
2015-04-28 14:37 ` Mel Gorman [this message]
2015-04-29 21:19 ` [PATCH 07/13] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set Andrew Morton
2015-04-30 8:45 ` Mel Gorman
2015-05-01 9:21 ` [PATCH] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set -fix Mel Gorman
2015-07-14 15:54 ` 4.2-rc2: hitting "file-max limit 8192 reached" Dave Hansen
2015-07-14 16:15 ` Andrew Morton
2015-07-15 10:45 ` Mel Gorman
2015-04-28 14:37 ` [PATCH 08/13] mm: meminit: Initialise remaining struct pages in parallel with kswapd Mel Gorman
2015-04-28 14:37 ` [PATCH 09/13] mm: meminit: Minimise number of pfn->page lookups during initialisation Mel Gorman
2015-04-28 14:37 ` [PATCH 10/13] x86: mm: Enable deferred struct page initialisation on x86-64 Mel Gorman
2015-04-28 14:37 ` [PATCH 11/13] mm: meminit: Free pages in large chunks where possible Mel Gorman
2015-04-28 14:37 ` [PATCH 12/13] mm: meminit: Reduce number of times pageblocks are set during struct page init Mel Gorman
2015-05-01 9:23 ` [PATCH] mm: meminit: Reduce number of times pageblocks are set during struct page init -fix Mel Gorman
2015-04-28 14:37 ` [PATCH 13/13] mm: meminit: Remove mminit_verify_page_links Mel Gorman
2015-04-28 16:06 ` [PATCH 0/13] Parallel struct page initialisation v4 Pekka Enberg
2015-04-28 18:38 ` nzimmer
2015-04-30 16:10 ` Daniel J Blueman
2015-04-30 17:12 ` nzimmer
2015-04-30 17:28 ` Mel Gorman
2015-05-02 11:52 ` Elliott, Robert (Server Storage)
2015-04-29 1:16 ` Waiman Long
2015-05-01 22:02 ` Waiman Long
2015-05-02 0:09 ` Waiman Long
2015-05-02 8:52 ` Daniel J Blueman
2015-05-02 16:05 ` Daniel J Blueman
2015-05-04 21:30 ` Andrew Morton
2015-05-05 3:32 ` Waiman Long
2015-05-05 10:45 ` Mel Gorman
2015-05-05 13:55 ` Waiman Long
2015-05-05 14:31 ` Mel Gorman
2015-05-05 15:01 ` Waiman Long
2015-05-06 3:39 ` Waiman Long
2015-05-06 0:55 ` Waiman Long
2015-05-05 20:02 ` Andrew Morton
2015-05-05 22:13 ` Mel Gorman
2015-05-05 22:25 ` Andrew Morton
2015-05-06 7:12 ` Mel Gorman
2015-05-06 10:22 ` Mel Gorman
2015-05-06 12:05 ` Mel Gorman
2015-05-06 17:58 ` Waiman Long
2015-05-07 2:37 ` Waiman Long
2015-05-07 7:21 ` Mel Gorman
2015-05-06 1:21 ` Waiman Long
2015-05-06 2:01 ` Andrew Morton
2015-05-07 7:25 ` [PATCH] mm: meminit: Finish initialisation of struct pages before basic setup Mel Gorman
2015-05-07 22:09 ` Andrew Morton
2015-05-07 22:52 ` Mel Gorman
2015-05-07 23:02 ` Andrew Morton
2015-05-13 15:53 ` nzimmer
2015-05-13 16:31 ` Mel Gorman
2015-05-14 10:03 ` Daniel J Blueman
2015-05-14 15:47 ` nzimmer
2015-05-19 18:31 ` nzimmer
2015-05-19 19:06 ` Mel Gorman
2015-05-22 6:30 ` Daniel J Blueman
2015-05-22 9:33 ` Mel Gorman
2015-05-22 17:14 ` Waiman Long
2015-05-22 21:43 ` Davidlohr Bueso
2015-05-23 3:49 ` Daniel J Blueman
2015-06-24 22:50 ` Nathan Zimmer
2015-06-25 20:48 ` Mel Gorman
2015-06-25 20:57 ` Mel Gorman
2015-06-25 21:37 ` Nathan Zimmer
2015-06-25 21:34 ` Nathan Zimmer
2015-06-26 10:16 ` Mel Gorman
2015-07-06 17:45 ` Daniel J Blueman
2015-07-09 17:49 ` Nathan Zimmer
-- strict thread matches above, loose matches on Subject: below --
2015-04-23 10:33 [PATCH 0/13] Parallel struct page initialisation v3 Mel Gorman
2015-04-23 10:33 ` [PATCH 07/13] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set Mel Gorman
2015-04-23 15:56 ` Mel Gorman
2015-04-27 22:43 ` Andrew Morton
2015-04-28 9:53 ` Mel Gorman
2015-04-28 13:48 ` Andrew Morton
2015-04-28 14:56 ` Mel Gorman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1430231830-7702-8-git-send-email-mgorman@suse.de \
--to=mgorman@suse.de \
--cc=akpm@linux-foundation.org \
--cc=daniel@numascale.com \
--cc=dave.hansen@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nzimmer@sgi.com \
--cc=scott.norton@hp.com \
--cc=waiman.long@hp.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).