linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: mhocko@suse.com, vbabka@suse.cz, mgorman@suse.de,
	minchan@kernel.org, aneesh.kumar@linux.vnet.ibm.com,
	bsingharora@gmail.com, srikar@linux.vnet.ibm.com,
	haren@linux.vnet.ibm.com, jglisse@redhat.com,
	dave.hansen@intel.com, dan.j.williams@intel.com
Subject: [RFC V2 02/12] mm: Isolate HugeTLB allocations away from CDM nodes
Date: Mon, 30 Jan 2017 09:05:43 +0530	[thread overview]
Message-ID: <20170130033602.12275-3-khandual@linux.vnet.ibm.com> (raw)
In-Reply-To: <20170130033602.12275-1-khandual@linux.vnet.ibm.com>

HugeTLB allocation/release/accounting currently spans across all the nodes
under N_MEMORY node mask. Coherent memory nodes should not be part of these
allocations. So use system_ram() call to fetch system RAM only nodes on the
platform which can then be used for HugeTLB allocation purpose instead of
N_MEMORY node mask. This isolates coherent device memory nodes from HugeTLB
allocations.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 mm/hugetlb.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c7025c1..698af91 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1790,6 +1790,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 					unsigned long unused_resv_pages)
 {
 	unsigned long nr_pages;
+	nodemask_t ram_nodes = ram_nodemask();
 
 	/* Cannot return gigantic pages currently */
 	if (hstate_is_gigantic(h))
@@ -1816,7 +1817,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 	while (nr_pages--) {
 		h->resv_huge_pages--;
 		unused_resv_pages--;
-		if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
+		if (!free_pool_huge_page(h, &ram_nodes, 1))
 			goto out;
 		cond_resched_lock(&hugetlb_lock);
 	}
@@ -2107,8 +2108,9 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 {
 	struct huge_bootmem_page *m;
 	int nr_nodes, node;
+	nodemask_t ram_nodes = ram_nodemask();
 
-	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
+	for_each_node_mask_to_alloc(h, nr_nodes, node, &ram_nodes) {
 		void *addr;
 
 		addr = memblock_virt_alloc_try_nid_nopanic(
@@ -2177,13 +2179,14 @@ static void __init gather_bootmem_prealloc(void)
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
 	unsigned long i;
+	nodemask_t ram_nodes = ram_nodemask();
+
 
 	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (hstate_is_gigantic(h)) {
 			if (!alloc_bootmem_huge_page(h))
 				break;
-		} else if (!alloc_fresh_huge_page(h,
-					 &node_states[N_MEMORY]))
+		} else if (!alloc_fresh_huge_page(h, &ram_nodes))
 			break;
 	}
 	h->max_huge_pages = i;
@@ -2420,6 +2423,8 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
 					   unsigned long count, size_t len)
 {
 	int err;
+	nodemask_t ram_nodes = ram_nodemask();
+
 	NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
 	if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
@@ -2434,7 +2439,7 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
 		if (!(obey_mempolicy &&
 				init_nodemask_of_mempolicy(nodes_allowed))) {
 			NODEMASK_FREE(nodes_allowed);
-			nodes_allowed = &node_states[N_MEMORY];
+			nodes_allowed = &ram_nodes;
 		}
 	} else if (nodes_allowed) {
 		/*
@@ -2444,11 +2449,11 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
 		count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
 		init_nodemask_of_node(nodes_allowed, nid);
 	} else
-		nodes_allowed = &node_states[N_MEMORY];
+		nodes_allowed = &ram_nodes;
 
 	h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
 
-	if (nodes_allowed != &node_states[N_MEMORY])
+	if (nodes_allowed != &ram_nodes)
 		NODEMASK_FREE(nodes_allowed);
 
 	return len;
@@ -2745,9 +2750,10 @@ static void hugetlb_register_node(struct node *node)
  */
 static void __init hugetlb_register_all_nodes(void)
 {
+	nodemask_t nodes = ram_nodemask();
 	int nid;
 
-	for_each_node_state(nid, N_MEMORY) {
+	for_each_node_mask(nid, nodes) {
 		struct node *node = node_devices[nid];
 		if (node->dev.id == nid)
 			hugetlb_register_node(node);
@@ -3019,11 +3025,12 @@ void hugetlb_show_meminfo(void)
 {
 	struct hstate *h;
 	int nid;
+	nodemask_t ram_nodes = ram_nodemask();
 
 	if (!hugepages_supported())
 		return;
 
-	for_each_node_state(nid, N_MEMORY)
+	for_each_node_mask(nid, ram_nodes)
 		for_each_hstate(h)
 			pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
 				nid,
-- 
2.9.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-01-30  3:37 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-30  3:35 [RFC V2 00/12] Define coherent device memory node Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 01/12] mm: Define coherent device memory (CDM) node Anshuman Khandual
2017-01-30  3:35 ` Anshuman Khandual [this message]
2017-01-30 17:19   ` [RFC V2 02/12] mm: Isolate HugeTLB allocations away from CDM nodes Dave Hansen
2017-01-31  1:03     ` Anshuman Khandual
2017-01-31  1:37       ` Dave Hansen
2017-02-01 13:59         ` Anshuman Khandual
2017-02-01 19:01           ` Dave Hansen
2017-01-30  3:35 ` [RFC V2 03/12] mm: Change generic FALLBACK zonelist creation process Anshuman Khandual
2017-01-30 17:34   ` Dave Hansen
2017-01-31  1:36     ` Anshuman Khandual
2017-01-31  1:57       ` Dave Hansen
2017-01-31  7:25         ` John Hubbard
2017-01-31 18:04           ` Dave Hansen
2017-01-31 19:14             ` David Nellans
2017-02-01  6:56             ` Anshuman Khandual
2017-02-01  6:46           ` Anshuman Khandual
2017-02-01  6:40         ` Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 04/12] mm: Change mbind(MPOL_BIND) implementation for CDM nodes Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 05/12] cpuset: Add cpuset_inc() inside cpuset_init() Anshuman Khandual
2017-01-30 17:36   ` Dave Hansen
2017-01-30 20:30   ` Mel Gorman
2017-01-31 14:22     ` [RFC] cpuset: Enable changing of top_cpuset's mems_allowed nodemask Anshuman Khandual
2017-01-31 16:00       ` Mel Gorman
2017-02-01  7:31         ` Anshuman Khandual
2017-02-01  8:53           ` Michal Hocko
2017-02-01  9:18           ` Mel Gorman
2017-01-31 14:36     ` [RFC V2 05/12] cpuset: Add cpuset_inc() inside cpuset_init() Vlastimil Babka
2017-01-31 15:30       ` Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 06/12] mm: Exclude CDM nodes from task->mems_allowed and root cpuset Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 07/12] mm: Ignore cpuset enforcement when allocation flag has __GFP_THISNODE Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 08/12] mm: Add new VMA flag VM_CDM Anshuman Khandual
2017-01-30 18:52   ` Jerome Glisse
2017-01-31  4:22     ` Anshuman Khandual
2017-01-31  6:05       ` Jerome Glisse
2017-01-30  3:35 ` [RFC V2 09/12] mm: Exclude CDM marked VMAs from auto NUMA Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 10/12] mm: Ignore madvise(MADV_MERGEABLE) request for VM_CDM marked VMAs Anshuman Khandual
2017-01-30  3:35 ` [RFC V2 11/12] mm: Tag VMA with VM_CDM flag during page fault Anshuman Khandual
2017-01-30 17:51   ` Dave Hansen
2017-01-31  5:10     ` Anshuman Khandual
2017-01-31 17:54       ` Dave Hansen
2017-01-30  3:35 ` [RFC V2 12/12] mm: Tag VMA with VM_CDM flag explicitly during mbind(MPOL_BIND) Anshuman Khandual
2017-01-30 17:54   ` Dave Hansen
2017-01-31  4:36     ` Anshuman Khandual
2017-02-07 18:07       ` Dave Hansen
2017-02-08 14:13         ` Anshuman Khandual
2017-02-08 15:04         ` Jerome Glisse
2017-01-30  3:35 ` [DEBUG 13/21] powerpc/mm: Identify coherent device memory nodes during platform init Anshuman Khandual
2017-01-30  3:35 ` [DEBUG 14/21] powerpc/mm: Create numa nodes for hotplug memory Anshuman Khandual
2017-01-30  3:35 ` [DEBUG 15/21] powerpc/mm: Enable CONFIG_MOVABLE_NODE for PPC64 platform Anshuman Khandual
2017-01-30  3:35 ` [DEBUG 16/21] mm: Enable CONFIG_MOVABLE_NODE on powerpc Anshuman Khandual
2017-01-30  3:35 ` [DEBUG 17/21] mm: Export definition of 'zone_names' array through mmzone.h Anshuman Khandual
2017-01-30  3:35 ` [DEBUG 18/21] mm: Add debugfs interface to dump each node's zonelist information Anshuman Khandual
2017-01-30  3:36 ` [DEBUG 19/21] mm: Add migrate_virtual_range migration interface Anshuman Khandual
2017-01-30  3:36 ` [DEBUG 20/21] drivers: Add two drivers for coherent device memory tests Anshuman Khandual
2017-01-30  3:36 ` [DEBUG 21/21] selftests/powerpc: Add a script to perform random VMA migrations Anshuman Khandual
2017-01-31  5:48 ` [RFC V2 00/12] Define coherent device memory node Anshuman Khandual
2017-01-31  6:15   ` Jerome Glisse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170130033602.12275-3-khandual@linux.vnet.ibm.com \
    --to=khandual@linux.vnet.ibm.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=bsingharora@gmail.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=haren@linux.vnet.ibm.com \
    --cc=jglisse@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).