From: Mel Gorman <mgorman@suse.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
Ingo Molnar <mingo@kernel.org>,
Andrea Arcangeli <aarcange@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Linux-MM <linux-mm@kvack.org>,
LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 46/63] mm: numa: Do not group on RO pages
Date: Fri, 27 Sep 2013 14:27:31 +0100 [thread overview]
Message-ID: <1380288468-5551-47-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1380288468-5551-1-git-send-email-mgorman@suse.de>
From: Peter Zijlstra <peterz@infradead.org>
And here's a little something to make sure not the whole world ends up
in a single group.
As while we don't migrate shared executable pages, we do scan/fault on
them. And since everybody links to libc, everybody ends up in the same
group.
[riel@redhat.com: mapcount 1]
Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
include/linux/sched.h | 7 +++++--
kernel/sched/fair.c | 5 +++--
mm/huge_memory.c | 15 +++++++++++++--
mm/memory.c | 30 ++++++++++++++++++++++++++----
4 files changed, 47 insertions(+), 10 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4fad1f17..15888f5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1434,13 +1434,16 @@ struct task_struct {
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+#define TNF_MIGRATED 0x01
+#define TNF_NO_GROUP 0x02
+
#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
#else
static inline void task_numa_fault(int last_node, int node, int pages,
- bool migrated)
+ int flags)
{
}
static inline pid_t task_numa_group_id(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index abdbb7c..896cf87 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p)
/*
* Got a PROT_NONE fault for a page on @node.
*/
-void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
+void task_numa_fault(int last_cpupid, int node, int pages, int flags)
{
struct task_struct *p = current;
+ bool migrated = flags & TNF_MIGRATED;
int priv;
if (!numabalancing_enabled)
@@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
priv = 1;
} else {
priv = cpupid_match_pid(p, last_cpupid);
- if (!priv)
+ if (!priv && !(flags & TNF_NO_GROUP))
task_numa_group(p, last_cpupid);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 048d4b2..aaf46dc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1297,6 +1297,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
+ int flags = 0;
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1311,6 +1312,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
/*
+ * Avoid grouping on DSO/COW pages in specific and RO pages
+ * in general, RO pages shouldn't hurt as much anyway since
+ * they can be in shared cache state.
+ */
+ if (!pmd_write(pmd))
+ flags |= TNF_NO_GROUP;
+
+ /*
* Acquire the page lock to serialise THP migrations but avoid dropping
* page_table_lock if at all possible
*/
@@ -1355,8 +1364,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(&mm->page_table_lock);
migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, page, target_nid);
- if (migrated)
+ if (migrated) {
+ flags |= TNF_MIGRATED;
page_nid = target_nid;
+ }
goto out;
clear_pmdnuma:
@@ -1374,7 +1385,7 @@ out:
page_unlock_anon_vma_read(anon_vma);
if (page_nid != -1)
- task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated);
+ task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
return 0;
}
diff --git a/mm/memory.c b/mm/memory.c
index f779403..1aa4187 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3558,6 +3558,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int last_cpupid;
int target_nid;
bool migrated = false;
+ int flags = 0;
/*
* The "pte" at this point cannot be used safely without
@@ -3586,6 +3587,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
BUG_ON(is_zero_pfn(page_to_pfn(page)));
+ /*
+ * Avoid grouping on DSO/COW pages in specific and RO pages
+ * in general, RO pages shouldn't hurt as much anyway since
+ * they can be in shared cache state.
+ */
+ if (!pte_write(pte))
+ flags |= TNF_NO_GROUP;
+
last_cpupid = page_cpupid_last(page);
page_nid = page_to_nid(page);
target_nid = numa_migrate_prep(page, vma, addr, page_nid);
@@ -3597,12 +3606,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Migrate to the requested node */
migrated = migrate_misplaced_page(page, vma, target_nid);
- if (migrated)
+ if (migrated) {
page_nid = target_nid;
+ flags |= TNF_MIGRATED;
+ }
out:
if (page_nid != -1)
- task_numa_fault(last_cpupid, page_nid, 1, migrated);
+ task_numa_fault(last_cpupid, page_nid, 1, flags);
return 0;
}
@@ -3643,6 +3654,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
int page_nid = -1;
int target_nid;
bool migrated = false;
+ int flags = 0;
if (!pte_present(pteval))
continue;
@@ -3662,20 +3674,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!page))
continue;
+ /*
+ * Avoid grouping on DSO/COW pages in specific and RO pages
+ * in general, RO pages shouldn't hurt as much anyway since
+ * they can be in shared cache state.
+ */
+ if (!pte_write(pteval))
+ flags |= TNF_NO_GROUP;
+
last_cpupid = page_cpupid_last(page);
page_nid = page_to_nid(page);
target_nid = numa_migrate_prep(page, vma, addr, page_nid);
pte_unmap_unlock(pte, ptl);
if (target_nid != -1) {
migrated = migrate_misplaced_page(page, vma, target_nid);
- if (migrated)
+ if (migrated) {
page_nid = target_nid;
+ flags |= TNF_MIGRATED;
+ }
} else {
put_page(page);
}
if (page_nid != -1)
- task_numa_fault(last_cpupid, page_nid, 1, migrated);
+ task_numa_fault(last_cpupid, page_nid, 1, flags);
pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}
--
1.8.1.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-09-27 13:28 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-27 13:26 [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V8 Mel Gorman
2013-09-27 13:26 ` [PATCH 01/63] sched: monolithic code dump of what is being pushed upstream Mel Gorman
2013-09-27 13:26 ` [PATCH 02/63] mm: numa: Document automatic NUMA balancing sysctls Mel Gorman
2013-09-27 13:26 ` [PATCH 03/63] sched, numa: Comment fixlets Mel Gorman
2013-09-27 13:26 ` [PATCH 04/63] mm: numa: Do not account for a hinting fault if we raced Mel Gorman
2013-09-27 13:26 ` [PATCH 05/63] mm: Wait for THP migrations to complete during NUMA hinting faults Mel Gorman
2013-09-27 13:26 ` [PATCH 06/63] mm: Prevent parallel splits during THP migration Mel Gorman
2013-09-27 13:26 ` [PATCH 07/63] mm: Account for a THP NUMA hinting update as one PTE update Mel Gorman
2013-09-27 13:26 ` [PATCH 08/63] mm: Do not flush TLB during protection change if !pte_present && !migration_entry Mel Gorman
2013-09-27 13:26 ` [PATCH 09/63] mm: Only flush TLBs if a transhuge PMD is modified for NUMA pte scanning Mel Gorman
2013-09-27 13:26 ` [PATCH 10/63] mm: numa: Sanitize task_numa_fault() callsites Mel Gorman
2013-09-27 13:26 ` [PATCH 11/63] mm: Close races between THP migration and PMD numa clearing Mel Gorman
2013-09-30 8:52 ` Mel Gorman
2013-09-30 14:10 ` Rik van Riel
2013-09-30 15:18 ` Mel Gorman
2013-09-27 13:26 ` [PATCH 12/63] mm: numa: Do not migrate or account for hinting faults on the zero page Mel Gorman
2013-09-27 13:26 ` [PATCH 13/63] sched: numa: Mitigate chance that same task always updates PTEs Mel Gorman
2013-09-27 13:26 ` [PATCH 14/63] sched: numa: Continue PTE scanning even if migrate rate limited Mel Gorman
2013-09-27 13:27 ` [PATCH 15/63] Revert "mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node" Mel Gorman
2013-09-27 13:27 ` [PATCH 16/63] sched: numa: Initialise numa_next_scan properly Mel Gorman
2013-09-27 13:27 ` [PATCH 17/63] sched: Set the scan rate proportional to the memory usage of the task being scanned Mel Gorman
2013-09-27 13:27 ` [PATCH 18/63] sched: numa: Slow scan rate if no NUMA hinting faults are being recorded Mel Gorman
2013-09-27 13:27 ` [PATCH 19/63] sched: Track NUMA hinting faults on per-node basis Mel Gorman
2013-09-27 13:27 ` [PATCH 20/63] sched: Select a preferred node with the most numa hinting faults Mel Gorman
2013-09-27 13:27 ` [PATCH 21/63] sched: Update NUMA hinting faults once per scan Mel Gorman
2013-09-27 13:27 ` [PATCH 22/63] sched: Favour moving tasks towards the preferred node Mel Gorman
2013-09-27 13:27 ` [PATCH 23/63] sched: Resist moving tasks towards nodes with fewer hinting faults Mel Gorman
2013-09-27 13:27 ` [PATCH 24/63] sched: Reschedule task on preferred NUMA node once selected Mel Gorman
2013-09-27 13:27 ` [PATCH 25/63] sched: Add infrastructure for split shared/private accounting of NUMA hinting faults Mel Gorman
2013-09-27 13:27 ` [PATCH 26/63] sched: Check current->mm before allocating NUMA faults Mel Gorman
2013-09-27 13:27 ` [PATCH 27/63] mm: numa: Scan pages with elevated page_mapcount Mel Gorman
2013-09-27 13:27 ` [PATCH 28/63] sched: Remove check that skips small VMAs Mel Gorman
2013-09-27 13:27 ` [PATCH 29/63] sched: Set preferred NUMA node based on number of private faults Mel Gorman
2013-09-29 2:05 ` Figo.zhang
2013-09-27 13:27 ` [PATCH 30/63] sched: Do not migrate memory immediately after switching node Mel Gorman
2013-09-27 13:27 ` [PATCH 31/63] mm: numa: only unmap migrate-on-fault VMAs Mel Gorman
2013-09-27 13:27 ` [PATCH 32/63] sched: Avoid overloading CPUs on a preferred NUMA node Mel Gorman
2013-09-27 13:27 ` [PATCH 33/63] sched: Retry migration of tasks to CPU on a preferred node Mel Gorman
2013-09-27 13:27 ` [PATCH 34/63] sched: numa: increment numa_migrate_seq when task runs in correct location Mel Gorman
2013-09-27 13:27 ` [PATCH 35/63] sched: numa: Do not trap hinting faults for shared libraries Mel Gorman
2013-09-27 13:27 ` [PATCH 36/63] mm: numa: Only trap pmd hinting faults if we would otherwise trap PTE faults Mel Gorman
2013-09-27 13:27 ` [PATCH 37/63] stop_machine: Introduce stop_two_cpus() Mel Gorman
2013-09-27 13:27 ` [PATCH 38/63] sched: Introduce migrate_swap() Mel Gorman
2013-09-27 13:27 ` [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates Mel Gorman
2013-09-27 13:27 ` [PATCH 40/63] sched: numa: Favor placing a task on the preferred node Mel Gorman
2013-09-27 13:27 ` [PATCH 41/63] sched: numa: fix placement of workloads spread across multiple nodes Mel Gorman
2013-09-27 13:27 ` [PATCH 42/63] mm: numa: Change page last {nid,pid} into {cpu,pid} Mel Gorman
2013-09-27 13:27 ` [PATCH 43/63] sched: numa: Use {cpu, pid} to create task groups for shared faults Mel Gorman
2013-09-27 13:27 ` [PATCH 44/63] sched: numa: Report a NUMA task group ID Mel Gorman
2013-09-27 13:27 ` [PATCH 45/63] mm: numa: copy cpupid on page migration Mel Gorman
2013-09-27 13:27 ` Mel Gorman [this message]
2013-09-27 13:27 ` [PATCH 47/63] mm: numa: Do not batch handle PMD pages Mel Gorman
2013-09-27 13:27 ` [PATCH 48/63] sched: numa: stay on the same node if CLONE_VM Mel Gorman
2013-09-27 13:27 ` [PATCH 49/63] sched: numa: use group fault statistics in numa placement Mel Gorman
2013-09-27 13:27 ` [PATCH 50/63] sched: numa: call task_numa_free from do_execve Mel Gorman
2013-09-27 13:27 ` [PATCH 51/63] sched: numa: Prevent parallel updates to group stats during placement Mel Gorman
2013-09-27 13:27 ` [PATCH 52/63] sched: numa: add debugging Mel Gorman
2013-09-27 13:27 ` [PATCH 53/63] sched: numa: Decide whether to favour task or group weights based on swap candidate relationships Mel Gorman
2013-09-27 13:27 ` [PATCH 54/63] sched: numa: fix task or group comparison Mel Gorman
2013-09-27 13:27 ` [PATCH 55/63] sched: numa: Avoid migrating tasks that are placed on their preferred node Mel Gorman
2013-09-27 13:27 ` [PATCH 56/63] sched: numa: be more careful about joining numa groups Mel Gorman
2013-09-27 13:27 ` [PATCH 57/63] sched: numa: Take false sharing into account when adapting scan rate Mel Gorman
2013-09-27 13:27 ` [PATCH 58/63] sched: numa: adjust scan rate in task_numa_placement Mel Gorman
2013-09-27 13:27 ` [PATCH 59/63] sched: numa: Remove the numa_balancing_scan_period_reset sysctl Mel Gorman
2013-09-27 13:27 ` [PATCH 60/63] mm: numa: revert temporarily disabling of NUMA migration Mel Gorman
2013-09-27 13:27 ` [PATCH 61/63] sched: numa: skip some page migrations after a shared fault Mel Gorman
2013-09-27 13:27 ` [PATCH 62/63] sched: numa: use unsigned longs for numa group fault stats Mel Gorman
2013-09-27 13:27 ` [PATCH 63/63] sched: numa: periodically retry task_numa_migrate Mel Gorman
-- strict thread matches above, loose matches on Subject: below --
2013-10-07 10:28 [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V9 Mel Gorman
2013-10-07 10:29 ` [PATCH 46/63] mm: numa: Do not group on RO pages Mel Gorman
2013-10-07 19:10 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1380288468-5551-47-git-send-email-mgorman@suse.de \
--to=mgorman@suse.de \
--cc=a.p.zijlstra@chello.nl \
--cc=aarcange@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@kernel.org \
--cc=riel@redhat.com \
--cc=srikar@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).