From: Mel Gorman <mgorman@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Nathan Zimmer <nzimmer@sgi.com>,
Dave Hansen <dave.hansen@intel.com>,
Waiman Long <waiman.long@hp.com>,
Scott Norton <scott.norton@hp.com>,
Daniel J Blueman <daniel@numascale.com>,
Linux-MM <linux-mm@kvack.org>,
LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 08/13] mm: meminit: Initialise remaining struct pages in parallel with kswapd
Date: Tue, 28 Apr 2015 15:37:05 +0100 [thread overview]
Message-ID: <1430231830-7702-9-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1430231830-7702-1-git-send-email-mgorman@suse.de>
Only a subset of struct pages are initialised at the moment. When this patch
is applied kswapd initialise the remaining struct pages in parallel. This
should boot faster by spreading the work to multiple CPUs and initialising
data that is local to the CPU. The user-visible effect on large machines
is that free memory will appear to rapidly increase early in the lifetime
of the system until kswapd reports that all memory is initialised in the
kernel log. Once initialised there should be no other user-visibile effects.
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
mm/internal.h | 6 +++
mm/mm_init.c | 1 +
mm/page_alloc.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
mm/vmscan.c | 6 ++-
4 files changed, 130 insertions(+), 6 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 24314b671db1..bed751a7ac42 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -394,9 +394,15 @@ static inline void mminit_verify_zonelist(void)
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
#define __defermem_init __meminit
#define __defer_init __meminit
+
+void deferred_init_memmap(int nid);
#else
#define __defermem_init
#define __defer_init __init
+
+static inline void deferred_init_memmap(int nid)
+{
+}
#endif
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5f420f7fafa1..28fbf87b20aa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/memory.h>
#include <linux/notifier.h>
+#include <linux/sched.h>
#include "internal.h"
#ifdef CONFIG_DEBUG_MEMORY_INIT
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ec493a24b9c..96f2c2dc8ca6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -252,6 +252,14 @@ static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
return false;
}
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+ if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
+ return true;
+
+ return false;
+}
+
/*
* Returns false when the remaining initialisation should be deferred until
* later in the boot cycle when it can be parallelised.
@@ -284,6 +292,11 @@ static inline bool early_page_uninitialised(unsigned long pfn)
return false;
}
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+ return false;
+}
+
static inline bool update_defer_init(pg_data_t *pgdat,
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
@@ -880,20 +893,51 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
}
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static void init_reserved_page(unsigned long pfn)
+{
+ pg_data_t *pgdat;
+ int nid, zid;
+
+ if (!early_page_uninitialised(pfn))
+ return;
+
+ nid = early_pfn_to_nid(pfn);
+ pgdat = NODE_DATA(nid);
+
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &pgdat->node_zones[zid];
+
+ if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
+ break;
+ }
+ __init_single_pfn(pfn, zid, nid);
+}
+#else
+static inline void init_reserved_page(unsigned long pfn)
+{
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
/*
* Initialised pages do not have PageReserved set. This function is
* called for each range allocated by the bootmem allocator and
* marks the pages PageReserved. The remaining valid pages are later
* sent to the buddy page allocator.
*/
-void reserve_bootmem_region(unsigned long start, unsigned long end)
+void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long end_pfn = PFN_UP(end);
- for (; start_pfn < end_pfn; start_pfn++)
- if (pfn_valid(start_pfn))
- SetPageReserved(pfn_to_page(start_pfn));
+ for (; start_pfn < end_pfn; start_pfn++) {
+ if (pfn_valid(start_pfn)) {
+ struct page *page = pfn_to_page(start_pfn);
+
+ init_reserved_page(start_pfn);
+ SetPageReserved(page);
+ }
+ }
}
static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1018,6 +1062,74 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
return __free_pages_boot_core(page, pfn, order);
}
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/* Initialise remaining memory on a node */
+void __defermem_init deferred_init_memmap(int nid)
+{
+ struct mminit_pfnnid_cache nid_init_state = { };
+ unsigned long start = jiffies;
+ unsigned long nr_pages = 0;
+ unsigned long walk_start, walk_end;
+ int i, zid;
+ struct zone *zone;
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long first_init_pfn = pgdat->first_deferred_pfn;
+
+ if (first_init_pfn == ULONG_MAX)
+ return;
+
+ /* Sanity check boundaries */
+ BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
+ BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
+ pgdat->first_deferred_pfn = ULONG_MAX;
+
+ /* Only the highest zone is deferred so find it */
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ zone = pgdat->node_zones + zid;
+ if (first_init_pfn < zone_end_pfn(zone))
+ break;
+ }
+
+ for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
+ unsigned long pfn, end_pfn;
+
+ end_pfn = min(walk_end, zone_end_pfn(zone));
+ pfn = first_init_pfn;
+ if (pfn < walk_start)
+ pfn = walk_start;
+ if (pfn < zone->zone_start_pfn)
+ pfn = zone->zone_start_pfn;
+
+ for (; pfn < end_pfn; pfn++) {
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ continue;
+
+ if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
+ continue;
+
+ if (page->flags) {
+ VM_BUG_ON(page_zone(page) != zone);
+ continue;
+ }
+
+ __init_single_page(page, pfn, zid, nid);
+ __free_pages_boot_core(page, pfn, 0);
+ nr_pages++;
+ cond_resched();
+ }
+ first_init_pfn = max(end_pfn, first_init_pfn);
+ }
+
+ /* Sanity check that the next zone really is unpopulated */
+ WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+
+ pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
+ jiffies_to_msecs(jiffies - start));
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
#ifdef CONFIG_CMA
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
@@ -4228,6 +4340,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
zone->nr_migrate_reserve_block = reserve;
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+ if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
+ return;
+
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5e8eadd71bac..c4895d26d036 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3348,7 +3348,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
* If there are applications that are active memory-allocators
* (most normal use), this basically shouldn't matter.
*/
-static int kswapd(void *p)
+static int __defermem_init kswapd(void *p)
{
unsigned long order, new_order;
unsigned balanced_order;
@@ -3383,6 +3383,8 @@ static int kswapd(void *p)
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
+ deferred_init_memmap(pgdat->node_id);
+
order = new_order = 0;
balanced_order = 0;
classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
@@ -3538,7 +3540,7 @@ static int cpu_callback(struct notifier_block *nfb, unsigned long action,
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
*/
-int kswapd_run(int nid)
+int __defermem_init kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
int ret = 0;
--
2.3.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2015-04-28 14:37 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-28 14:36 [PATCH 0/13] Parallel struct page initialisation v4 Mel Gorman
2015-04-28 14:36 ` [PATCH 01/13] memblock: Introduce a for_each_reserved_mem_region iterator Mel Gorman
2015-04-28 14:36 ` [PATCH 02/13] mm: meminit: Move page initialization into a separate function Mel Gorman
2015-04-28 14:37 ` [PATCH 03/13] mm: meminit: Only set page reserved in the memblock region Mel Gorman
2015-05-22 20:31 ` Tony Luck
2015-05-26 10:22 ` Mel Gorman
2015-04-28 14:37 ` [PATCH 04/13] mm: page_alloc: Pass PFN to __free_pages_bootmem Mel Gorman
2015-05-01 9:20 ` [PATCH] mm: page_alloc: pass PFN to __free_pages_bootmem -fix Mel Gorman
2015-04-28 14:37 ` [PATCH 05/13] mm: meminit: Make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid Mel Gorman
2015-04-28 14:37 ` [PATCH 06/13] mm: meminit: Inline some helper functions Mel Gorman
2015-04-30 21:53 ` Andrew Morton
2015-04-30 21:55 ` Andrew Morton
2015-05-04 8:33 ` Michal Hocko
2015-05-04 8:38 ` Michal Hocko
2015-04-28 14:37 ` [PATCH 07/13] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set Mel Gorman
2015-04-29 21:19 ` Andrew Morton
2015-04-30 8:45 ` Mel Gorman
2015-05-01 9:21 ` [PATCH] mm: meminit: Initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set -fix Mel Gorman
2015-07-14 15:54 ` 4.2-rc2: hitting "file-max limit 8192 reached" Dave Hansen
2015-07-14 16:15 ` Andrew Morton
2015-07-15 10:45 ` Mel Gorman
2015-04-28 14:37 ` Mel Gorman [this message]
2015-04-28 14:37 ` [PATCH 09/13] mm: meminit: Minimise number of pfn->page lookups during initialisation Mel Gorman
2015-04-28 14:37 ` [PATCH 10/13] x86: mm: Enable deferred struct page initialisation on x86-64 Mel Gorman
2015-04-28 14:37 ` [PATCH 11/13] mm: meminit: Free pages in large chunks where possible Mel Gorman
2015-04-28 14:37 ` [PATCH 12/13] mm: meminit: Reduce number of times pageblocks are set during struct page init Mel Gorman
2015-05-01 9:23 ` [PATCH] mm: meminit: Reduce number of times pageblocks are set during struct page init -fix Mel Gorman
2015-04-28 14:37 ` [PATCH 13/13] mm: meminit: Remove mminit_verify_page_links Mel Gorman
2015-04-28 16:06 ` [PATCH 0/13] Parallel struct page initialisation v4 Pekka Enberg
2015-04-28 18:38 ` nzimmer
2015-04-30 16:10 ` Daniel J Blueman
2015-04-30 17:12 ` nzimmer
2015-04-30 17:28 ` Mel Gorman
2015-05-02 11:52 ` Elliott, Robert (Server Storage)
2015-04-29 1:16 ` Waiman Long
2015-05-01 22:02 ` Waiman Long
2015-05-02 0:09 ` Waiman Long
2015-05-02 8:52 ` Daniel J Blueman
2015-05-02 16:05 ` Daniel J Blueman
2015-05-04 21:30 ` Andrew Morton
2015-05-05 3:32 ` Waiman Long
2015-05-05 10:45 ` Mel Gorman
2015-05-05 13:55 ` Waiman Long
2015-05-05 14:31 ` Mel Gorman
2015-05-05 15:01 ` Waiman Long
2015-05-06 3:39 ` Waiman Long
2015-05-06 0:55 ` Waiman Long
2015-05-05 20:02 ` Andrew Morton
2015-05-05 22:13 ` Mel Gorman
2015-05-05 22:25 ` Andrew Morton
2015-05-06 7:12 ` Mel Gorman
2015-05-06 10:22 ` Mel Gorman
2015-05-06 12:05 ` Mel Gorman
2015-05-06 17:58 ` Waiman Long
2015-05-07 2:37 ` Waiman Long
2015-05-07 7:21 ` Mel Gorman
2015-05-06 1:21 ` Waiman Long
2015-05-06 2:01 ` Andrew Morton
2015-05-07 7:25 ` [PATCH] mm: meminit: Finish initialisation of struct pages before basic setup Mel Gorman
2015-05-07 22:09 ` Andrew Morton
2015-05-07 22:52 ` Mel Gorman
2015-05-07 23:02 ` Andrew Morton
2015-05-13 15:53 ` nzimmer
2015-05-13 16:31 ` Mel Gorman
2015-05-14 10:03 ` Daniel J Blueman
2015-05-14 15:47 ` nzimmer
2015-05-19 18:31 ` nzimmer
2015-05-19 19:06 ` Mel Gorman
2015-05-22 6:30 ` Daniel J Blueman
2015-05-22 9:33 ` Mel Gorman
2015-05-22 17:14 ` Waiman Long
2015-05-22 21:43 ` Davidlohr Bueso
2015-05-23 3:49 ` Daniel J Blueman
2015-06-24 22:50 ` Nathan Zimmer
2015-06-25 20:48 ` Mel Gorman
2015-06-25 20:57 ` Mel Gorman
2015-06-25 21:37 ` Nathan Zimmer
2015-06-25 21:34 ` Nathan Zimmer
2015-06-26 10:16 ` Mel Gorman
2015-07-06 17:45 ` Daniel J Blueman
2015-07-09 17:49 ` Nathan Zimmer
-- strict thread matches above, loose matches on Subject: below --
2015-04-23 10:33 [PATCH 0/13] Parallel struct page initialisation v3 Mel Gorman
2015-04-23 10:33 ` [PATCH 08/13] mm: meminit: Initialise remaining struct pages in parallel with kswapd Mel Gorman
2015-04-27 22:43 ` Andrew Morton
2015-04-22 17:07 [RFC PATCH 0/14] Parallel struct page initialisation v5r4 Mel Gorman
2015-04-22 17:07 ` [PATCH 08/13] mm: meminit: Initialise remaining struct pages in parallel with kswapd Mel Gorman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1430231830-7702-9-git-send-email-mgorman@suse.de \
--to=mgorman@suse.de \
--cc=akpm@linux-foundation.org \
--cc=daniel@numascale.com \
--cc=dave.hansen@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nzimmer@sgi.com \
--cc=scott.norton@hp.com \
--cc=waiman.long@hp.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).