From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:51887) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ziep1-00084H-O6 for qemu-devel@nongnu.org; Sun, 04 Oct 2015 04:39:09 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Zieoy-0002wp-Fe for qemu-devel@nongnu.org; Sun, 04 Oct 2015 04:39:07 -0400 Received: from mx1.redhat.com ([209.132.183.28]:44389) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Zieoy-0002wi-7D for qemu-devel@nongnu.org; Sun, 04 Oct 2015 04:39:04 -0400 Date: Sun, 4 Oct 2015 11:38:59 +0300 From: "Michael S. Tsirkin" Message-ID: <20151004110510-mutt-send-email-mst@redhat.com> References: <1443704197-14329-1-git-send-email-mst@redhat.com> <560DD44D.9050309@twiddle.net> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <560DD44D.9050309@twiddle.net> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: Paolo Bonzini , =?iso-8859-1?Q?Marc-Andr=E9?= Lureau , qemu-devel@nongnu.org On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote: > On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote: > >Anonymous and file-backed RAM allocation are now almost exactly the sa= me. > > > >Reduce code duplication by moving RAM mmap code out of oslib-posix.c a= nd > >exec.c. > > > >Reported-by: Marc-Andr=E9 Lureau > >Signed-off-by: Michael S. Tsirkin > >Reviewed-by: Paolo Bonzini > >Acked-by: Paolo Bonzini > >--- > > > >Changes from v1: add shared flag to get MAP_SHARED mappings > >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM. > > > > include/qemu/mmap-alloc.h | 10 +++++++ > > exec.c | 47 +++++++------------------------ > > util/mmap-alloc.c | 71 +++++++++++++++++++++++++++++++++++++= ++++++++++ > > util/oslib-posix.c | 28 +++---------------- > > util/Makefile.objs | 2 +- > > 5 files changed, 96 insertions(+), 62 deletions(-) > > create mode 100644 include/qemu/mmap-alloc.h > > create mode 100644 util/mmap-alloc.c > > > >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > >new file mode 100644 > >index 0000000..56388e6 > >--- /dev/null > >+++ b/include/qemu/mmap-alloc.h > >@@ -0,0 +1,10 @@ > >+#ifndef QEMU_MMAP_ALLOC > >+#define QEMU_MMAP_ALLOC > >+ > >+#include "qemu-common.h" > >+ > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); > >+ > >+void qemu_ram_munmap(void *ptr, size_t size); > >+ > >+#endif > >diff --git a/exec.c b/exec.c > >index 7d90a52..4505dc7 100644 > >--- a/exec.c > >+++ b/exec.c > >@@ -55,6 +55,9 @@ > > #include "exec/ram_addr.h" > > > > #include "qemu/range.h" > >+#ifndef _WIN32 > >+#include "qemu/mmap-alloc.h" > >+#endif > > > > //#define DEBUG_SUBPAGE > > > >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; > > */ > > #define RAM_RESIZEABLE (1 << 2) > > > >-/* An extra page is mapped on top of this RAM. > >+/* RAM is backed by an mmapped file. > > */ > >-#define RAM_EXTRA (1 << 3) > >+#define RAM_FILE (1 << 3) > > #endif > > > > struct CPUTailQ cpus =3D QTAILQ_HEAD_INITIALIZER(cpus); > >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, > > char *filename; > > char *sanitized_name; > > char *c; > >- void *ptr; > >- void *area =3D NULL; > >+ void *area; > > int fd; > > uint64_t hpagesize; > >- uint64_t total; > > Error *local_err =3D NULL; > >- size_t offset; > > > > hpagesize =3D gethugepagesize(path, &local_err); > > if (local_err) { > >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, > > g_free(filename); > > > > memory =3D ROUND_UP(memory, hpagesize); > >- total =3D memory + hpagesize; > > > > /* > > * ftruncate is not supported by hugetlbfs in older > >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, > > perror("ftruncate"); > > } > > > >- ptr =3D mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, > >- -1, 0); > >- if (ptr =3D=3D MAP_FAILED) { > >- error_setg_errno(errp, errno, > >- "unable to allocate memory range for hugepag= es"); > >- close(fd); > >- goto error; > >- } > >- > >- offset =3D QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)= ptr; > >- > >- area =3D mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, > >- (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE= ) | > >- MAP_FIXED, > >- fd, 0); > >+ area =3D qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_= SHARED); > > if (area =3D=3D MAP_FAILED) { > > error_setg_errno(errp, errno, > > "unable to map backing store for hugepages"= ); > >- munmap(ptr, total); > > close(fd); > > goto error; > > } > > > >- if (offset > 0) { > >- munmap(ptr, offset); > >- } > >- ptr +=3D offset; > >- total -=3D offset; > >- > >- if (total > memory + getpagesize()) { > >- munmap(ptr + memory + getpagesize(), > >- total - memory - getpagesize()); > >- } > >- > > if (mem_prealloc) { > > os_mem_prealloc(fd, area, memory); > > } > >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t s= ize, MemoryRegion *mr, > > new_block->used_length =3D size; > > new_block->max_length =3D size; > > new_block->flags =3D share ? RAM_SHARED : 0; > >- new_block->flags |=3D RAM_EXTRA; > >+ new_block->flags |=3D RAM_FILE; > > new_block->host =3D file_ram_alloc(new_block, size, > > mem_path, errp); > > if (!new_block->host) { > >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) > > xen_invalidate_map_cache_entry(block->host); > > #ifndef _WIN32 > > } else if (block->fd >=3D 0) { > >- if (block->flags & RAM_EXTRA) { > >- munmap(block->host, block->max_length + getpagesize()); > >+ if (block->flags & RAM_FILE) { > >+ qemu_ram_munmap(block->host, block->max_length); > > } else { > > munmap(block->host, block->max_length); > > } > >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > >new file mode 100644 > >index 0000000..e82cc94 > >--- /dev/null > >+++ b/util/mmap-alloc.c > >@@ -0,0 +1,71 @@ > >+/* > >+ * Support for RAM backed by mmaped host memory. > >+ * > >+ * Copyright (c) 2015 Red Hat, Inc. > >+ * > >+ * Authors: > >+ * Michael S. Tsirkin > >+ * > >+ * This work is licensed under the terms of the GNU GPL, version 2 or > >+ * later. See the COPYING file in the top-level directory. > >+ */ > >+#include > >+#include > >+#include > >+#include > >+ > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) >=20 > Do we need generic "align", or would a bool hugepage suffice? Or I sup= pose > this is also used for non-anonymous hugepages via /dev/hugepage where w= e > have multiple huge page sizes? Right. Further, passing in alignment makes it possible to keep page size probing in exec.c which is the only user. > Should we call this function something else, so that we can use it to > allocate hugepage aligned storage for code_gen_buffer and/or the tbs ar= ray? I don't much care what do we call it - with two callers it would be easy to rename down the road. The reason I put in "RAM" is because it allocates a guard page which only seems to make sense for RAM memory that can be mapped. But if you have a better name in mind, please let me know. > >+{ > >+ /* > >+ * Note: this always allocates at least one extra page of virtual= address > >+ * space, even if size is already aligned. > >+ */ > >+ size_t total =3D size + align; > >+ void *ptr =3D mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVA= TE, -1, 0); > >+ size_t offset =3D QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr= _t)ptr; > >+ void *ptr1; > >+ > >+ if (ptr =3D=3D MAP_FAILED) { > >+ return NULL; > >+ } > >+ > >+ /* Make sure align is a power of 2 */ > >+ assert(!(align & (align - 1))); > >+ /* Always align to host page size */ > >+ assert(align >=3D getpagesize()); >=20 > Is qemu_real_host_page_size initialized yet? > If not, should we move the initialization forward? I think it is. Though qemu_real_host_page_size seems to only be needed for portability, and this file is posix-specific. Anyway, we are moving code from util/oslib-posix.c and exec.c that currently both call getpagesize in this function, it seems better to be consistent. We can refactor afterwards, correct? > >+ > >+ ptr1 =3D mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > >+ MAP_FIXED | > >+ (fd =3D=3D -1 ? MAP_ANONYMOUS : 0) | > >+ (shared ? MAP_SHARED : MAP_PRIVATE), > >+ fd, 0); >=20 > If fd =3D=3D -1, we really only need mprotect on the region, correct? Yes. But that's already like this in util/oslib-posix.c, this is just re-factoring without logic changes. Any reason to prefer mprotect? That would be more LOC. > >+ if (ptr1 =3D=3D MAP_FAILED) { > >+ munmap(ptr, total); > >+ return NULL; > >+ } > >+ > >+ ptr +=3D offset; > >+ total -=3D offset; > >+ > >+ if (offset > 0) { > >+ munmap(ptr - offset, offset); > >+ } > >+ > >+ /* > >+ * Leave a single PROT_NONE page allocated after the RAM block, t= o serve as > >+ * a guard page guarding against potential buffer overflows. > >+ */ > >+ if (total > size + getpagesize()) { > >+ munmap(ptr + size + getpagesize(), total - size - getpagesize= ()); > >+ } > >+ > >+ return ptr; > >+}