From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Michael Kerrisk <mtk.manpages@gmail.com>,
linux-api@vger.kernel.org, Hugh Dickins <hughd@google.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Rik van Riel <riel@redhat.com>,
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
Mel Gorman <mgorman@suse.de>, Jason Evans <je@fb.com>,
zhangyanfei@cn.fujitsu.com,
"Kirill A. Shutemov" <kirill@shutemov.name>,
Minchan Kim <minchan@kernel.org>,
Andrea Arcangeli <aarcange@redhat.com>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCH v17 7/7] mm: Don't split THP page when syscall is called
Date: Mon, 20 Oct 2014 19:12:04 +0900 [thread overview]
Message-ID: <1413799924-17946-8-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1413799924-17946-1-git-send-email-minchan@kernel.org>
We don't need to split THP page when MADV_FREE syscall is
called. It could be done when VM decide really frees it so
we could avoid unnecessary THP split.
Cc: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
include/linux/huge_mm.h | 4 ++++
mm/huge_memory.c | 35 +++++++++++++++++++++++++++++++++++
mm/madvise.c | 21 ++++++++++++++++++++-
mm/rmap.c | 8 ++++++--
mm/vmscan.c | 28 ++++++++++++++++++----------
5 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad9051bab267..07f736b18ffc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,9 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags);
+extern int madvise_free_huge_pmd(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr);
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
@@ -56,6 +59,7 @@ extern pmd_t *page_check_address_pmd(struct page *page,
unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl);
+extern int pmd_freeable(pmd_t pmd);
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de984159cf0b..5be0a5f3ea3a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1384,6 +1384,36 @@ out:
return 0;
}
+int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr)
+
+{
+ spinlock_t *ptl;
+ struct mm_struct *mm = tlb->mm;
+ int ret = 1;
+
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ struct page *page;
+ pmd_t orig_pmd;
+
+ orig_pmd = pmdp_get_and_clear(mm, addr, pmd);
+
+ /* No hugepage in swapcache */
+ page = pmd_page(orig_pmd);
+ VM_BUG_ON_PAGE(PageSwapCache(page), page);
+
+ orig_pmd = pmd_mkold(orig_pmd);
+ orig_pmd = pmd_mkclean(orig_pmd);
+
+ set_pmd_at(mm, addr, pmd, orig_pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ spin_unlock(ptl);
+ ret = 0;
+ }
+
+ return ret;
+}
+
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
@@ -1620,6 +1650,11 @@ unlock:
return NULL;
}
+int pmd_freeable(pmd_t pmd)
+{
+ return !pmd_dirty(pmd);
+}
+
static int __split_huge_page_splitting(struct page *page,
struct vm_area_struct *vma,
unsigned long address)
diff --git a/mm/madvise.c b/mm/madvise.c
index a21584235bb6..84badee5f46d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -271,8 +271,26 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
pte_t *pte, ptent;
struct page *page;
+ unsigned long next;
+
+ next = pmd_addr_end(addr, end);
+ if (pmd_trans_huge(*pmd)) {
+ if (next - addr != HPAGE_PMD_SIZE) {
+#ifdef CONFIG_DEBUG_VM
+ if (!rwsem_is_locked(&mm->mmap_sem)) {
+ pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
+ __func__, addr, end,
+ vma->vm_start,
+ vma->vm_end);
+ BUG();
+ }
+#endif
+ split_huge_page_pmd(vma, addr, pmd);
+ } else if (!madvise_free_huge_pmd(tlb, vma, pmd, addr))
+ goto next;
+ /* fall through */
+ }
- split_huge_page_pmd(vma, addr, pmd);
if (pmd_trans_unstable(pmd))
return 0;
@@ -316,6 +334,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
}
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
+next:
cond_resched();
return 0;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 93149c82a5a4..3a7081d884b9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -704,9 +704,13 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
referenced++;
/*
- * In this implmentation, MADV_FREE doesn't support THP free
+ * Use pmd_freeable instead of raw pmd_dirty because in some
+ * of architecture, pmd_dirty is not defined unless
+ * CONFIG_TRANSPARNTE_HUGE is enabled
*/
- dirty++;
+ if (!pmd_freeable(*pmd))
+ dirty++;
+
spin_unlock(ptl);
} else {
pte_t *pte;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f67765ebb77..29ae6382275a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -976,17 +976,25 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
- if (PageAnon(page) && !PageSwapCache(page) && !freeable) {
- if (!(sc->gfp_mask & __GFP_IO))
- goto keep_locked;
- if (!add_to_swap(page, page_list))
- goto activate_locked;
- may_enter_fs = 1;
-
- /* Adding to swap updated mapping */
- mapping = page_mapping(page);
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ if (!freeable) {
+ if (!(sc->gfp_mask & __GFP_IO))
+ goto keep_locked;
+ if (!add_to_swap(page, page_list))
+ goto activate_locked;
+ may_enter_fs = 1;
+ /* Adding to swap updated mapping */
+ mapping = page_mapping(page);
+ } else {
+ if (likely(!PageTransHuge(page)))
+ goto unmap;
+ /* try_to_unmap isn't aware of THP page */
+ if (unlikely(split_huge_page_to_list(page,
+ page_list)))
+ goto keep_locked;
+ }
}
-
+unmap:
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
--
2.0.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2014-10-20 10:12 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-20 10:11 [PATCH v17 0/7] MADV_FREE support Minchan Kim
2014-10-20 10:11 ` [PATCH v17 1/7] mm: support madvise(MADV_FREE) Minchan Kim
2014-11-27 14:47 ` Michal Hocko
[not found] ` <20141127144725.GB19157-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2014-11-30 23:56 ` Minchan Kim
2014-12-02 10:01 ` Michal Hocko
[not found] ` <20141202100125.GD27014-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2014-12-03 0:00 ` Minchan Kim
2014-12-03 10:13 ` Michal Hocko
[not found] ` <20141203101329.GB23236-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2014-12-05 7:08 ` Minchan Kim
2014-12-05 8:32 ` Michal Hocko
2015-02-03 16:39 ` Michael Kerrisk (man-pages)
2015-02-03 23:47 ` Minchan Kim
2015-02-06 0:33 ` Shaohua Li
2015-02-06 5:51 ` Minchan Kim
2015-02-06 18:29 ` Shaohua Li
2015-02-09 7:15 ` Minchan Kim
2015-02-10 22:38 ` Shaohua Li
2015-02-11 0:56 ` Minchan Kim
2015-02-12 0:14 ` Shaohua Li
2015-02-16 4:36 ` Minchan Kim
[not found] ` <20150206003311.GA2347-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2015-02-06 12:58 ` Michal Hocko
2015-02-06 18:32 ` Shaohua Li
2015-02-06 18:40 ` Rik van Riel
[not found] ` <54D0F9BC.4060306-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2015-02-04 12:52 ` Michal Hocko
2014-10-20 10:11 ` [PATCH v17 2/7] x86: add pmd_[dirty|mkclean] for THP Minchan Kim
[not found] ` <1413799924-17946-1-git-send-email-minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2014-10-20 10:12 ` [PATCH v17 3/7] sparc: " Minchan Kim
2014-11-13 22:58 ` [PATCH v17 0/7] MADV_FREE support Minchan Kim
2014-11-14 1:52 ` Andrew Morton
2014-10-20 10:12 ` [PATCH v17 4/7] powerpc: add pmd_[dirty|mkclean] for THP Minchan Kim
2014-10-20 10:12 ` [PATCH v17 5/7] arm: add pmd_mkclean " Minchan Kim
2014-10-20 10:12 ` [PATCH v17 6/7] arm64: add pmd_[dirty|mkclean] " Minchan Kim
2014-10-20 10:12 ` Minchan Kim [this message]
2014-11-27 15:49 ` [PATCH v17 7/7] mm: Don't split THP page when syscall is called Michal Hocko
2014-12-01 0:11 ` Minchan Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1413799924-17946-8-git-send-email-minchan@kernel.org \
--to=minchan@kernel.org \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=je@fb.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=kirill@shutemov.name \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mtk.manpages@gmail.com \
--cc=riel@redhat.com \
--cc=zhangyanfei@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).