From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:58171) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZkHxr-0002WF-Ey for qemu-devel@nongnu.org; Thu, 08 Oct 2015 16:39:01 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZkHxm-0008Au-CY for qemu-devel@nongnu.org; Thu, 08 Oct 2015 16:38:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:58546) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZkHxm-0008Af-37 for qemu-devel@nongnu.org; Thu, 08 Oct 2015 16:38:54 -0400 Date: Thu, 8 Oct 2015 23:38:50 +0300 From: "Michael S. Tsirkin" Message-ID: <20151008233810-mutt-send-email-mst@redhat.com> References: <1443704197-14329-1-git-send-email-mst@redhat.com> <560DD44D.9050309@twiddle.net> <20151004110510-mutt-send-email-mst@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <20151004110510-mutt-send-email-mst@redhat.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: Paolo Bonzini , =?iso-8859-1?Q?Marc-Andr=E9?= Lureau , qemu-devel@nongnu.org Any objections to merging as is, and renaming later as appropriate? On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote: > On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote: > > On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote: > > >Anonymous and file-backed RAM allocation are now almost exactly the = same. > > > > > >Reduce code duplication by moving RAM mmap code out of oslib-posix.c= and > > >exec.c. > > > > > >Reported-by: Marc-Andr=E9 Lureau > > >Signed-off-by: Michael S. Tsirkin > > >Reviewed-by: Paolo Bonzini > > >Acked-by: Paolo Bonzini > > >--- > > > > > >Changes from v1: add shared flag to get MAP_SHARED mappings > > >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM. > > > > > > include/qemu/mmap-alloc.h | 10 +++++++ > > > exec.c | 47 +++++++------------------------ > > > util/mmap-alloc.c | 71 +++++++++++++++++++++++++++++++++++= ++++++++++++ > > > util/oslib-posix.c | 28 +++---------------- > > > util/Makefile.objs | 2 +- > > > 5 files changed, 96 insertions(+), 62 deletions(-) > > > create mode 100644 include/qemu/mmap-alloc.h > > > create mode 100644 util/mmap-alloc.c > > > > > >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > > >new file mode 100644 > > >index 0000000..56388e6 > > >--- /dev/null > > >+++ b/include/qemu/mmap-alloc.h > > >@@ -0,0 +1,10 @@ > > >+#ifndef QEMU_MMAP_ALLOC > > >+#define QEMU_MMAP_ALLOC > > >+ > > >+#include "qemu-common.h" > > >+ > > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)= ; > > >+ > > >+void qemu_ram_munmap(void *ptr, size_t size); > > >+ > > >+#endif > > >diff --git a/exec.c b/exec.c > > >index 7d90a52..4505dc7 100644 > > >--- a/exec.c > > >+++ b/exec.c > > >@@ -55,6 +55,9 @@ > > > #include "exec/ram_addr.h" > > > > > > #include "qemu/range.h" > > >+#ifndef _WIN32 > > >+#include "qemu/mmap-alloc.h" > > >+#endif > > > > > > //#define DEBUG_SUBPAGE > > > > > >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; > > > */ > > > #define RAM_RESIZEABLE (1 << 2) > > > > > >-/* An extra page is mapped on top of this RAM. > > >+/* RAM is backed by an mmapped file. > > > */ > > >-#define RAM_EXTRA (1 << 3) > > >+#define RAM_FILE (1 << 3) > > > #endif > > > > > > struct CPUTailQ cpus =3D QTAILQ_HEAD_INITIALIZER(cpus); > > >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, > > > char *filename; > > > char *sanitized_name; > > > char *c; > > >- void *ptr; > > >- void *area =3D NULL; > > >+ void *area; > > > int fd; > > > uint64_t hpagesize; > > >- uint64_t total; > > > Error *local_err =3D NULL; > > >- size_t offset; > > > > > > hpagesize =3D gethugepagesize(path, &local_err); > > > if (local_err) { > > >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, > > > g_free(filename); > > > > > > memory =3D ROUND_UP(memory, hpagesize); > > >- total =3D memory + hpagesize; > > > > > > /* > > > * ftruncate is not supported by hugetlbfs in older > > >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, > > > perror("ftruncate"); > > > } > > > > > >- ptr =3D mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, > > >- -1, 0); > > >- if (ptr =3D=3D MAP_FAILED) { > > >- error_setg_errno(errp, errno, > > >- "unable to allocate memory range for hugep= ages"); > > >- close(fd); > > >- goto error; > > >- } > > >- > > >- offset =3D QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_= t)ptr; > > >- > > >- area =3D mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, > > >- (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVA= TE) | > > >- MAP_FIXED, > > >- fd, 0); > > >+ area =3D qemu_ram_mmap(fd, memory, hpagesize, block->flags & RA= M_SHARED); > > > if (area =3D=3D MAP_FAILED) { > > > error_setg_errno(errp, errno, > > > "unable to map backing store for hugepage= s"); > > >- munmap(ptr, total); > > > close(fd); > > > goto error; > > > } > > > > > >- if (offset > 0) { > > >- munmap(ptr, offset); > > >- } > > >- ptr +=3D offset; > > >- total -=3D offset; > > >- > > >- if (total > memory + getpagesize()) { > > >- munmap(ptr + memory + getpagesize(), > > >- total - memory - getpagesize()); > > >- } > > >- > > > if (mem_prealloc) { > > > os_mem_prealloc(fd, area, memory); > > > } > > >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t= size, MemoryRegion *mr, > > > new_block->used_length =3D size; > > > new_block->max_length =3D size; > > > new_block->flags =3D share ? RAM_SHARED : 0; > > >- new_block->flags |=3D RAM_EXTRA; > > >+ new_block->flags |=3D RAM_FILE; > > > new_block->host =3D file_ram_alloc(new_block, size, > > > mem_path, errp); > > > if (!new_block->host) { > > >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) > > > xen_invalidate_map_cache_entry(block->host); > > > #ifndef _WIN32 > > > } else if (block->fd >=3D 0) { > > >- if (block->flags & RAM_EXTRA) { > > >- munmap(block->host, block->max_length + getpagesize()); > > >+ if (block->flags & RAM_FILE) { > > >+ qemu_ram_munmap(block->host, block->max_length); > > > } else { > > > munmap(block->host, block->max_length); > > > } > > >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > > >new file mode 100644 > > >index 0000000..e82cc94 > > >--- /dev/null > > >+++ b/util/mmap-alloc.c > > >@@ -0,0 +1,71 @@ > > >+/* > > >+ * Support for RAM backed by mmaped host memory. > > >+ * > > >+ * Copyright (c) 2015 Red Hat, Inc. > > >+ * > > >+ * Authors: > > >+ * Michael S. Tsirkin > > >+ * > > >+ * This work is licensed under the terms of the GNU GPL, version 2 = or > > >+ * later. See the COPYING file in the top-level directory. > > >+ */ > > >+#include > > >+#include > > >+#include > > >+#include > > >+ > > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) > >=20 > > Do we need generic "align", or would a bool hugepage suffice? Or I s= uppose > > this is also used for non-anonymous hugepages via /dev/hugepage where= we > > have multiple huge page sizes? >=20 > Right. Further, passing in alignment makes it possible to > keep page size probing in exec.c which is the only user. >=20 >=20 > > Should we call this function something else, so that we can use it to > > allocate hugepage aligned storage for code_gen_buffer and/or the tbs = array? >=20 >=20 > I don't much care what do we call it - with two callers it would be eas= y > to rename down the road. > The reason I put in "RAM" is because it allocates a guard page > which only seems to make sense for RAM memory that can be mapped. >=20 > But if you have a better name in mind, please let me know. >=20 >=20 > > >+{ > > >+ /* > > >+ * Note: this always allocates at least one extra page of virtu= al address > > >+ * space, even if size is already aligned. > > >+ */ > > >+ size_t total =3D size + align; > > >+ void *ptr =3D mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRI= VATE, -1, 0); > > >+ size_t offset =3D QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintp= tr_t)ptr; > > >+ void *ptr1; > > >+ > > >+ if (ptr =3D=3D MAP_FAILED) { > > >+ return NULL; > > >+ } > > >+ > > >+ /* Make sure align is a power of 2 */ > > >+ assert(!(align & (align - 1))); > > >+ /* Always align to host page size */ > > >+ assert(align >=3D getpagesize()); > >=20 > > Is qemu_real_host_page_size initialized yet? > > If not, should we move the initialization forward? >=20 > I think it is. > Though qemu_real_host_page_size seems to only be needed > for portability, and this file is posix-specific. >=20 > Anyway, we are moving code from util/oslib-posix.c and exec.c that > currently both call getpagesize in this function, it seems better to be > consistent. >=20 > We can refactor afterwards, correct? >=20 >=20 > > >+ > > >+ ptr1 =3D mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > > >+ MAP_FIXED | > > >+ (fd =3D=3D -1 ? MAP_ANONYMOUS : 0) | > > >+ (shared ? MAP_SHARED : MAP_PRIVATE), > > >+ fd, 0); > >=20 > > If fd =3D=3D -1, we really only need mprotect on the region, correct? >=20 > Yes. But that's already like this in util/oslib-posix.c, > this is just re-factoring without logic changes. >=20 > Any reason to prefer mprotect? That would be more LOC. >=20 > > >+ if (ptr1 =3D=3D MAP_FAILED) { > > >+ munmap(ptr, total); > > >+ return NULL; > > >+ } > > >+ > > >+ ptr +=3D offset; > > >+ total -=3D offset; > > >+ > > >+ if (offset > 0) { > > >+ munmap(ptr - offset, offset); > > >+ } > > >+ > > >+ /* > > >+ * Leave a single PROT_NONE page allocated after the RAM block,= to serve as > > >+ * a guard page guarding against potential buffer overflows. > > >+ */ > > >+ if (total > size + getpagesize()) { > > >+ munmap(ptr + size + getpagesize(), total - size - getpagesi= ze()); > > >+ } > > >+ > > >+ return ptr; > > >+}