linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mike Kravetz <mike.kravetz@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-api@vger.kernel.org
Cc: Marek Szyprowski <m.szyprowski@samsung.com>,
	Michal Nazarewicz <mina86@mina86.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Guy Shattah <sguy@mellanox.com>, Christoph Lameter <cl@linux.com>,
	Anshuman Khandual <khandual@linux.vnet.ibm.com>,
	Laura Abbott <labbott@redhat.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Mike Kravetz <mike.kravetz@oracle.com>
Subject: [RFC PATCH 3/3] mm/map_contig: Add mmap(MAP_CONTIG) support
Date: Wed, 11 Oct 2017 18:46:11 -0700	[thread overview]
Message-ID: <20171012014611.18725-4-mike.kravetz@oracle.com> (raw)
In-Reply-To: <20171012014611.18725-1-mike.kravetz@oracle.com>

Add new MAP_CONTIG flag to mmap system call.  Check for flag in normal
mmap flag processing.  If present, pre-allocate a contiguous set of
pages to back the mapping.  These pages will be used a fault time, and
the MAP_CONTIG flag implies populating the mapping at the mmap time.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 include/uapi/asm-generic/mman.h |  1 +
 mm/mmap.c                       | 94 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index 7162cd4cca73..e8046b4c4ac4 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -12,6 +12,7 @@
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+#define MAP_CONTIG	0x80000		/* back with contiguous pages */
 
 /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 680506faceae..aee7917ee073 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -167,6 +167,16 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 {
 	struct vm_area_struct *next = vma->vm_next;
 
+	if (vma->vm_flags & VM_CONTIG) {
+		/*
+		 * Do any necessary clean up when freeing a vma backed
+		 * by a contiguous allocation.
+		 *
+		 * Not very useful in it's present form.
+		 */
+		VM_BUG_ON(!vma->vm_private_data);
+		vma->vm_private_data = NULL;
+	}
 	might_sleep();
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
@@ -1378,6 +1388,18 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
+	/*
+	 * MAP_CONTIG has some restrictions,
+	 * and also implies additional mmap and vma flags.
+	 */
+	if (flags & MAP_CONTIG) {
+		if (!(flags & MAP_ANONYMOUS))
+			return -EINVAL;
+
+		flags |= MAP_POPULATE | MAP_LOCKED;
+		vm_flags |= (VM_CONTIG | VM_LOCKED | VM_DONTEXPAND);
+	}
+
 	if (flags & MAP_LOCKED)
 		if (!can_do_mlock())
 			return -EPERM;
@@ -1547,6 +1569,71 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 #endif /* __ARCH_WANT_SYS_OLD_MMAP */
 
 /*
+ * Attempt to allocate a contiguous range of pages to back the
+ * specified vma.  vm_private_data is used as a 'pointer' to the
+ * allocated pages.  Larger requests and more fragmented memory
+ * make the allocation more likely to fail.  So, caller must deal
+ * with this situation.
+ */
+static long __alloc_vma_contig_range(struct vm_area_struct *vma)
+{
+	gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO;
+	unsigned long order;
+
+	VM_BUG_ON_VMA(vma->vm_private_data != NULL, vma);
+	order = get_order(vma->vm_end - vma->vm_start);
+
+	/*
+	 * FIXME - Incomplete implementation.  For now, just handle
+	 * allocations < MAX_ORDER in size.  However, this should really
+	 * handle arbitrary size allocations.
+	 */
+	if (order >= MAX_ORDER)
+		return -ENOMEM;
+
+	vma->vm_private_data = alloc_pages_vma(gfp, order, vma, vma->vm_start,
+						numa_node_id(), false);
+	if (!vma->vm_private_data)
+		return -ENOMEM;
+
+	/*
+	 * split large allocation so it can be treated as individual
+	 * pages when populating the mapping and at unmap time.
+	 */
+	if (order) {
+		unsigned long vma_pages = (vma->vm_end - vma->vm_start) /
+								PAGE_SIZE;
+		unsigned long order_pages = 1 << order;
+		unsigned long i;
+		struct page *page = vma->vm_private_data;
+
+		split_page((struct page *)vma->vm_private_data, order);
+
+		/*
+		 * 'order' rounds up size of vma to next power of 2.  We
+		 * will not need/use the extra pages so free them now.
+		 */
+		for (i = vma_pages; i < order_pages; i++)
+			put_page(page + i);
+	}
+
+	return 0;
+}
+
+static void __free_vma_contig_range(struct vm_area_struct *vma)
+{
+	struct page *page = vma->vm_private_data;
+	unsigned long n_pages = (vma->vm_end - vma->vm_start) / PAGE_SIZE;
+	unsigned long i;
+
+	if (!page)
+		return;
+
+	for (i = 0; i < n_pages; i++)
+		put_page(page + i);
+}
+
+/*
  * Some shared mappigns will want the pages marked read-only
  * to track write events. If so, we'll downgrade vm_page_prot
  * to the private version (using protection_map[] without the
@@ -1669,6 +1756,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	vma->vm_pgoff = pgoff;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
+	if (vm_flags & VM_CONTIG) {
+		error = __alloc_vma_contig_range(vma);
+		if (error)
+			goto free_vma;
+	}
+
 	if (file) {
 		if (vm_flags & VM_DENYWRITE) {
 			error = deny_write_access(file);
@@ -1758,6 +1851,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
+	__free_vma_contig_range(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
 	if (charged)
-- 
2.13.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-10-12  1:46 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-03 23:56 [RFC] mmap(MAP_CONTIG) Mike Kravetz
2017-10-04 11:54 ` Michal Nazarewicz
2017-10-04 17:08   ` Mike Kravetz
2017-10-04 21:29     ` Laura Abbott
2017-10-04 13:49 ` Anshuman Khandual
2017-10-04 16:05   ` Christopher Lameter
2017-10-04 17:38     ` Mike Kravetz
2017-10-04 17:35   ` Mike Kravetz
2017-10-05  7:06 ` Vlastimil Babka
2017-10-05  8:58   ` Guy Shattah
2017-10-05 12:36     ` Guy Shattah
2017-10-05 14:30   ` Christopher Lameter
2017-10-12  1:46 ` [RFC PATCH 0/3] Add mmap(MAP_CONTIG) support Mike Kravetz
2017-10-12  1:46   ` [RFC PATCH 1/3] mm/map_contig: Add VM_CONTIG flag to vma struct Mike Kravetz
2017-10-12  1:46   ` [RFC PATCH 2/3] mm/map_contig: Use pre-allocated pages for VM_CONTIG mappings Mike Kravetz
2017-10-12 11:04     ` Anshuman Khandual
2017-10-12  1:46   ` Mike Kravetz [this message]
2017-10-12 11:22     ` [RFC PATCH 3/3] mm/map_contig: Add mmap(MAP_CONTIG) support Anshuman Khandual
2017-10-13 15:14       ` Christopher Lameter
2017-10-12 14:37     ` Michal Hocko
2017-10-12 17:19       ` Mike Kravetz
2017-10-13  8:40         ` Michal Hocko
2017-10-13 15:20           ` Christopher Lameter
2017-10-13 15:28             ` Michal Hocko
2017-10-13 15:42               ` Christopher Lameter
2017-10-13 15:47                 ` Michal Hocko
     [not found]                   ` <20171013154747.2jv7rtfqyyagiodn-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2017-10-13 15:56                     ` Christopher Lameter
2017-10-13 16:17                       ` Michal Hocko
2017-10-15  7:50                         ` Guy Shattah
2017-10-16  8:24                           ` Michal Hocko
     [not found]                             ` <20171016082456.no6ux63uy2rmj4fe-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2017-10-16  9:11                               ` Guy Shattah
2017-10-16 12:32                                 ` Michal Hocko
     [not found]                                   ` <20171016123248.csntl6luxgafst6q-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2017-10-16 16:00                                     ` Christopher Lameter
2017-10-16 17:42                                       ` Michal Hocko
     [not found]                                         ` <20171016174229.pz3o4uhzz3qbrp6n-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2017-10-16 17:56                                           ` Christopher Lameter
2017-10-16 18:17                                             ` Michal Hocko
2017-10-23 15:25                                           ` David Nellans
2017-10-17 10:50                                   ` Guy Shattah
     [not found]                                     ` <AM6PR0502MB378375AF8B569DBCCFE20D7DBD4C0-md96bDB8+JV1k1TWM4Wt8cDSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-10-17 10:59                                       ` Michal Hocko
2017-10-17 13:22                                       ` Michal Nazarewicz
2017-10-17 14:20                                         ` Guy Shattah
2017-10-17 17:44                                           ` Vlastimil Babka
2017-10-17 18:23                                           ` Mike Kravetz
2017-10-17 19:56                                             ` Vlastimil Babka
     [not found]                           ` <752b49eb-55c6-5a34-ab41-6e91dd93ea70-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-10-16 10:33                             ` Michal Nazarewicz
     [not found]                               ` <xa1t60bfxtzw.fsf-deATy8a+UHjQT0dZR+AlfA@public.gmane.org>
2017-10-16 11:09                                 ` Guy Shattah
2017-10-16 17:43                           ` Mike Kravetz
     [not found]                             ` <aff6b405-6a06-f84d-c9b1-c6fb166dff81-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-10-16 18:07                               ` Michal Hocko
2017-10-16 20:32                                 ` Mike Kravetz
2017-10-16 20:58                                   ` Michal Hocko
2017-10-16 21:03                                   ` Laura Abbott
2017-10-16 21:18                                     ` Mike Kravetz
     [not found]                                   ` <e8cf6227-003d-8a82-8b4d-07176b43810c-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-10-17  6:59                                     ` Vlastimil Babka
2017-10-15  6:58                   ` Pavel Machek
2017-10-16  8:18                     ` Michal Hocko
2017-10-16  9:54                       ` Pavel Machek
2017-10-16 12:18                         ` Michal Hocko
     [not found]                           ` <20171016121808.m4sq3g5nxeyxoymc-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
2017-10-16 16:02                             ` Christopher Lameter
2017-10-16 17:33                               ` Michal Hocko
2017-10-16 17:53                                 ` Christopher Lameter
2017-10-15  8:07     ` Guy Shattah
2017-10-12 10:36   ` [RFC PATCH 0/3] " Anshuman Khandual
2017-10-12 14:25     ` Anshuman Khandual
2017-10-23 22:10 ` [RFC] mmap(MAP_CONTIG) Dave Hansen
2017-10-24 22:49   ` Mike Kravetz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171012014611.18725-4-mike.kravetz@oracle.com \
    --to=mike.kravetz@oracle.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=cl@linux.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=khandual@linux.vnet.ibm.com \
    --cc=labbott@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=m.szyprowski@samsung.com \
    --cc=mina86@mina86.com \
    --cc=sguy@mellanox.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).