linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Steve Capper <steve.capper@linaro.org>
To: linux-mm@kvack.org, x86@kernel.org, linux-arch@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Cc: Michal Hocko <mhocko@suse.cz>, Ken Chen <kenchen@google.com>,
	Mel Gorman <mgorman@suse.de>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	patches@linaro.org, Steve Capper <steve.capper@linaro.org>
Subject: [RFC PATCH v2 01/11] mm: hugetlb: Copy huge_pmd_share from x86 to mm.
Date: Wed,  8 May 2013 10:52:33 +0100	[thread overview]
Message-ID: <1368006763-30774-2-git-send-email-steve.capper@linaro.org> (raw)
In-Reply-To: <1368006763-30774-1-git-send-email-steve.capper@linaro.org>

Under x86, multiple puds can be made to reference the same bank of
huge pmds provided that they represent a full PUD_SIZE of shared
huge memory that is aligned to a PUD_SIZE boundary.

The code to share pmds does not require any architecture specific
knowledge other than the fact that pmds can be indexed, thus can
be beneficial to some other architectures.

This patch copies the huge pmd sharing (and unsharing) logic from
x86/ to mm/ and introduces a new config option to activate it:
CONFIG_ARCH_WANTS_HUGE_PMD_SHARE

Signed-off-by: Steve Capper <steve.capper@linaro.org>
---
 include/linux/hugetlb.h |   4 ++
 mm/hugetlb.c            | 122 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16e4e9a..795c32d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -68,6 +68,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 void copy_huge_page(struct page *dst, struct page *src);
 
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
+#endif
+
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ca9a7c6..41179b0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3142,6 +3142,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	hugetlb_acct_memory(h, -(chg - freed));
 }
 
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static unsigned long page_table_shareable(struct vm_area_struct *svma,
+				struct vm_area_struct *vma,
+				unsigned long addr, pgoff_t idx)
+{
+	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
+				svma->vm_start;
+	unsigned long sbase = saddr & PUD_MASK;
+	unsigned long s_end = sbase + PUD_SIZE;
+
+	/* Allow segments to share if only one is marked locked */
+	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+
+	/*
+	 * match the virtual addresses, permission and the alignment of the
+	 * page table page.
+	 */
+	if (pmd_index(addr) != pmd_index(saddr) ||
+	    vm_flags != svm_flags ||
+	    sbase < svma->vm_start || svma->vm_end < s_end)
+		return 0;
+
+	return saddr;
+}
+
+static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long base = addr & PUD_MASK;
+	unsigned long end = base + PUD_SIZE;
+
+	/*
+	 * check on proper vm_flags and page table alignment
+	 */
+	if (vma->vm_flags & VM_MAYSHARE &&
+	    vma->vm_start <= base && end <= vma->vm_end)
+		return 1;
+	return 0;
+}
+
+/*
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
+ * and returns the corresponding pte. While this is not necessary for the
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
+ * code much cleaner. pmd allocation is essential for the shared case because
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
+ * bad pmd for sharing.
+ */
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+{
+	struct vm_area_struct *vma = find_vma(mm, addr);
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+			vma->vm_pgoff;
+	struct vm_area_struct *svma;
+	unsigned long saddr;
+	pte_t *spte = NULL;
+	pte_t *pte;
+
+	if (!vma_shareable(vma, addr))
+		return (pte_t *)pmd_alloc(mm, pud, addr);
+
+	mutex_lock(&mapping->i_mmap_mutex);
+	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
+		if (svma == vma)
+			continue;
+
+		saddr = page_table_shareable(svma, vma, addr, idx);
+		if (saddr) {
+			spte = huge_pte_offset(svma->vm_mm, saddr);
+			if (spte) {
+				get_page(virt_to_page(spte));
+				break;
+			}
+		}
+	}
+
+	if (!spte)
+		goto out;
+
+	spin_lock(&mm->page_table_lock);
+	if (pud_none(*pud))
+		pud_populate(mm, pud,
+				(pmd_t *)((unsigned long)spte & PAGE_MASK));
+	else
+		put_page(virt_to_page(spte));
+	spin_unlock(&mm->page_table_lock);
+out:
+	pte = (pte_t *)pmd_alloc(mm, pud, addr);
+	mutex_unlock(&mapping->i_mmap_mutex);
+	return pte;
+}
+
+/*
+ * unmap huge page backed by shared pte.
+ *
+ * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
+ * indicated by page_count > 1, unmap is achieved by clearing pud and
+ * decrementing the ref count. If count == 1, the pte page is not shared.
+ *
+ * called with vma->vm_mm->page_table_lock held.
+ *
+ * returns: 1 successfully unmapped a shared pte page
+ *	    0 the underlying pte page is not shared, or it is the last user
+ */
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	pgd_t *pgd = pgd_offset(mm, *addr);
+	pud_t *pud = pud_offset(pgd, *addr);
+
+	BUG_ON(page_count(virt_to_page(ptep)) == 0);
+	if (page_count(virt_to_page(ptep)) == 1)
+		return 0;
+
+	pud_clear(pud);
+	put_page(virt_to_page(ptep));
+	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+	return 1;
+}
+#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 /* Should be called in hugetlb_lock */
-- 
1.8.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-05-08  9:53 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-08  9:52 [RFC PATCH v2 00/11] HugeTLB and THP support for ARM64 Steve Capper
2013-05-08  9:52 ` Steve Capper [this message]
2013-05-08  9:52 ` [RFC PATCH v2 02/11] x86: mm: Remove x86 version of huge_pmd_share Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 03/11] mm: hugetlb: Copy general hugetlb code from x86 to mm Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 04/11] x86: mm: Remove general hugetlb code from x86 Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 05/11] mm: thp: Correct the HPAGE_PMD_ORDER check Steve Capper
2013-05-08 12:44   ` Kirill A. Shutemov
2013-05-08 12:03     ` Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 06/11] ARM64: mm: Restore memblock limit when map_mem finished Steve Capper
2013-05-16 13:52   ` Catalin Marinas
2013-05-08  9:52 ` [RFC PATCH v2 07/11] ARM64: mm: Make PAGE_NONE pages read only and no-execute Steve Capper
2013-05-08 16:43   ` Will Deacon
2013-05-09  8:27     ` Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 08/11] ARM64: mm: Swap PTE_FILE and PTE_PROT_NONE bits Steve Capper
2013-05-08 16:17   ` Christopher Covington
2013-05-09  8:15     ` Steve Capper
2013-05-08 16:40   ` Will Deacon
2013-05-09  8:22     ` Steve Capper
2013-05-16 14:58   ` Catalin Marinas
2013-05-08  9:52 ` [RFC PATCH v2 09/11] ARM64: mm: HugeTLB support Steve Capper
2013-05-16 14:32   ` Catalin Marinas
2013-05-17  8:41     ` Steve Capper
2013-05-08  9:52 ` [RFC PATCH v2 10/11] ARM64: mm: Raise MAX_ORDER for 64KB pages and THP Steve Capper
2013-05-16 14:59   ` Catalin Marinas
2013-05-08  9:52 ` [RFC PATCH v2 11/11] ARM64: mm: THP support Steve Capper
2013-05-16 15:01   ` Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1368006763-30774-2-git-send-email-steve.capper@linaro.org \
    --to=steve.capper@linaro.org \
    --cc=catalin.marinas@arm.com \
    --cc=kenchen@google.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=patches@linaro.org \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).