From: "Huang, Ying" <ying.huang@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Tim Chen <tim.c.chen@intel.com>, Andi Kleen <ak@linux.intel.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Huang Ying <ying.huang@intel.com>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Michal Hocko <mhocko@suse.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Shaohua Li <shli@kernel.org>, Hugh Dickins <hughd@google.com>,
Minchan Kim <minchan@kernel.org>, Rik van Riel <riel@redhat.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>,
Zi Yan <zi.yan@cs.rutgers.edu>
Subject: [PATCH -mm 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD
Date: Tue, 17 Apr 2018 10:02:15 +0800 [thread overview]
Message-ID: <20180417020230.26412-7-ying.huang@intel.com> (raw)
In-Reply-To: <20180417020230.26412-1-ying.huang@intel.com>
From: Huang Ying <ying.huang@intel.com>
A huge PMD need to be split when zap a part of the PMD mapping etc.
If the PMD mapping is a swap mapping, we need to split it too. This
patch implemented the support for this. This is similar as splitting
the PMD page mapping, except we need to decrease the PMD swap mapping
count for the huge swap cluster too. If the PMD swap mapping count
becomes 0, the huge swap cluster will be split.
Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
PMD, so pmd_present() check is called before them.
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
---
include/linux/swap.h | 6 +++++
mm/huge_memory.c | 54 ++++++++++++++++++++++++++++++++++++++++----
mm/swapfile.c | 28 +++++++++++++++++++++++
3 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 89f34ebfd318..b5762b526719 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page(struct page *page)
#ifdef CONFIG_THP_SWAP
extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster_map(swp_entry_t entry);
#else
static inline int split_swap_cluster(swp_entry_t entry)
{
return 0;
}
+
+static inline int split_swap_cluster_map(swp_entry_t entry)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_MEMCG
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a3a1815f8e11..7342ad88de5d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1605,6 +1605,47 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
return 0;
}
+#ifdef CONFIG_THP_SWAP
+static void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pgtable_t pgtable;
+ pmd_t _pmd;
+ swp_entry_t entry;
+ int i, soft_dirty;
+
+ entry = pmd_to_swp_entry(*pmd);
+ soft_dirty = pmd_soft_dirty(*pmd);
+
+ split_swap_cluster_map(entry);
+
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pmd_populate(mm, &_pmd, pgtable);
+
+ for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) {
+ pte_t *pte, ptent;
+
+ pte = pte_offset_map(&_pmd, haddr);
+ VM_BUG_ON(!pte_none(*pte));
+ ptent = swp_entry_to_pte(entry);
+ if (soft_dirty)
+ ptent = pte_swp_mksoft_dirty(ptent);
+ set_pte_at(mm, haddr, pte, ptent);
+ pte_unmap(pte);
+ }
+ smp_wmb(); /* make pte visible before pmd */
+ pmd_populate(mm, pmd, pgtable);
+}
+#else
+static inline void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd)
+{
+}
+#endif
+
/*
* Return true if we do MADV_FREE successfully on entire pmd page.
* Otherwise, return false.
@@ -2071,7 +2112,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
- VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+ VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd)
&& !pmd_devmap(*pmd));
count_vm_event(THP_SPLIT_PMD);
@@ -2093,7 +2134,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
put_page(page);
add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
return;
- } else if (is_huge_zero_pmd(*pmd)) {
+ } else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2137,6 +2178,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
page = pfn_to_page(swp_offset(entry));
} else
#endif
+ if (thp_swap_supported() && is_swap_pmd(old_pmd))
+ return __split_huge_swap_pmd(vma, haddr, pmd);
+ else
page = pmd_page(old_pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
@@ -2228,14 +2272,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
* pmd against. Otherwise we can end up replacing wrong page.
*/
VM_BUG_ON(freeze && !page);
- if (page && page != pmd_page(*pmd))
- goto out;
+ if (page && (!pmd_present(*pmd) || page != pmd_page(*pmd)))
+ goto out;
if (pmd_trans_huge(*pmd)) {
page = pmd_page(*pmd);
if (PageMlocked(page))
clear_page_mlock(page);
- } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
+ } else if (!(pmd_devmap(*pmd) || is_swap_pmd(*pmd)))
goto out;
__split_huge_pmd_locked(vma, pmd, haddr, freeze);
out:
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c25c7759df6b..2be757548c97 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -4041,6 +4041,34 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
}
+#ifdef CONFIG_THP_SWAP
+/* The corresponding page table shouldn't be changed under us */
+int split_swap_cluster_map(swp_entry_t entry)
+{
+ struct swap_info_struct *si;
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+
+ VM_BUG_ON(!is_cluster_offset(offset));
+ si = _swap_info_get(entry);
+ if (!si)
+ return -EBUSY;
+ ci = lock_cluster(si, offset);
+ /* The swap cluster has been split by someone else */
+ if (!cluster_is_huge(ci))
+ goto out;
+ cluster_set_count(ci, cluster_count(ci) - 1);
+ VM_BUG_ON(cluster_count(ci) < SWAPFILE_CLUSTER);
+ if (cluster_count(ci) == SWAPFILE_CLUSTER &&
+ !(si->swap_map[offset] & SWAP_HAS_CACHE))
+ cluster_clear_huge(ci);
+
+out:
+ unlock_cluster(ci);
+ return 0;
+}
+#endif
+
static int __init swapfile_init(void)
{
int nid;
--
2.17.0
next prev parent reply other threads:[~2018-04-17 2:02 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-17 2:02 [PATCH 00/21] mm, THP, swap: Swapout/swapin THP as a whole Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 01/21] mm, THP, swap: Enable PMD swap operations for CONFIG_THP_SWAP Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 02/21] mm, THP, swap: Make CONFIG_THP_SWAP depends on CONFIG_SWAP Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 03/21] mm, THP, swap: Support PMD swap mapping in swap_duplicate() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 04/21] mm, THP, swap: Support PMD swap mapping in swapcache_free_cluster() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 05/21] mm, THP, swap: Support PMD swap mapping in free_swap_and_cache()/swap_free() Huang, Ying
2018-04-17 2:02 ` Huang, Ying [this message]
2018-04-17 21:12 ` [PATCH -mm 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD Randy Dunlap
2018-04-18 0:38 ` Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 07/21] mm, THP, swap: Support PMD swap mapping in split_swap_cluster() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 08/21] mm, THP, swap: Support to read a huge swap cluster for swapin a THP Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 09/21] mm, THP, swap: Swapin a THP as a whole Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 10/21] mm, THP, swap: Support to count THP swapin and its fallback Huang, Ying
2018-04-17 21:18 ` Randy Dunlap
2018-04-18 0:39 ` Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 11/21] mm, THP, swap: Add sysfs interface to configure THP swapin Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 12/21] mm, THP, swap: Support PMD swap mapping in swapoff Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 13/21] mm, THP, swap: Support PMD swap mapping in madvise_free() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 14/21] mm, cgroup, THP, swap: Support to move swap account for PMD swap mapping Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 15/21] mm, THP, swap: Support to copy PMD swap mapping when fork() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 16/21] mm, THP, swap: Free PMD swap mapping when zap_huge_pmd() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 17/21] mm, THP, swap: Support PMD swap mapping for MADV_WILLNEED Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 18/21] mm, THP, swap: Support PMD swap mapping in mincore() Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 19/21] mm, THP, swap: Support PMD swap mapping in common path Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 20/21] mm, THP, swap: create PMD swap mapping when unmap the THP Huang, Ying
2018-04-17 2:02 ` [PATCH -mm 21/21] mm, THP: Avoid to split THP when reclaim MADV_FREE THP Huang, Ying
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180417020230.26412-7-ying.huang@intel.com \
--to=ying.huang@intel.com \
--cc=aarcange@redhat.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@linux.intel.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=minchan@kernel.org \
--cc=n-horiguchi@ah.jp.nec.com \
--cc=riel@redhat.com \
--cc=shli@kernel.org \
--cc=tim.c.chen@intel.com \
--cc=zi.yan@cs.rutgers.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).