qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2]  exec: factor out duplicate mmap code
@ 2015-10-01 12:58 Michael S. Tsirkin
  2015-10-02  0:48 ` Richard Henderson
  0 siblings, 1 reply; 5+ messages in thread
From: Michael S. Tsirkin @ 2015-10-01 12:58 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?=

Anonymous and file-backed RAM allocation are now almost exactly the same.

Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
exec.c.

Reported-by: Marc-André Lureau <mlureau@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---

Changes from v1: add shared flag to get MAP_SHARED mappings
(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.

 include/qemu/mmap-alloc.h | 10 +++++++
 exec.c                    | 47 +++++++------------------------
 util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 util/oslib-posix.c        | 28 +++----------------
 util/Makefile.objs        |  2 +-
 5 files changed, 96 insertions(+), 62 deletions(-)
 create mode 100644 include/qemu/mmap-alloc.h
 create mode 100644 util/mmap-alloc.c

diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
new file mode 100644
index 0000000..56388e6
--- /dev/null
+++ b/include/qemu/mmap-alloc.h
@@ -0,0 +1,10 @@
+#ifndef QEMU_MMAP_ALLOC
+#define QEMU_MMAP_ALLOC
+
+#include "qemu-common.h"
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
+
+void qemu_ram_munmap(void *ptr, size_t size);
+
+#endif
diff --git a/exec.c b/exec.c
index 7d90a52..4505dc7 100644
--- a/exec.c
+++ b/exec.c
@@ -55,6 +55,9 @@
 #include "exec/ram_addr.h"
 
 #include "qemu/range.h"
+#ifndef _WIN32
+#include "qemu/mmap-alloc.h"
+#endif
 
 //#define DEBUG_SUBPAGE
 
@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
  */
 #define RAM_RESIZEABLE (1 << 2)
 
-/* An extra page is mapped on top of this RAM.
+/* RAM is backed by an mmapped file.
  */
-#define RAM_EXTRA (1 << 3)
+#define RAM_FILE (1 << 3)
 #endif
 
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
     char *filename;
     char *sanitized_name;
     char *c;
-    void *ptr;
-    void *area = NULL;
+    void *area;
     int fd;
     uint64_t hpagesize;
-    uint64_t total;
     Error *local_err = NULL;
-    size_t offset;
 
     hpagesize = gethugepagesize(path, &local_err);
     if (local_err) {
@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
     g_free(filename);
 
     memory = ROUND_UP(memory, hpagesize);
-    total = memory + hpagesize;
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
-                -1, 0);
-    if (ptr == MAP_FAILED) {
-        error_setg_errno(errp, errno,
-                         "unable to allocate memory range for hugepages");
-        close(fd);
-        goto error;
-    }
-
-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
-
-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
-                MAP_FIXED,
-                fd, 0);
+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for hugepages");
-        munmap(ptr, total);
         close(fd);
         goto error;
     }
 
-    if (offset > 0) {
-        munmap(ptr, offset);
-    }
-    ptr += offset;
-    total -= offset;
-
-    if (total > memory + getpagesize()) {
-        munmap(ptr + memory + getpagesize(),
-               total - memory - getpagesize());
-    }
-
     if (mem_prealloc) {
         os_mem_prealloc(fd, area, memory);
     }
@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = share ? RAM_SHARED : 0;
-    new_block->flags |= RAM_EXTRA;
+    new_block->flags |= RAM_FILE;
     new_block->host = file_ram_alloc(new_block, size,
                                      mem_path, errp);
     if (!new_block->host) {
@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
         xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
     } else if (block->fd >= 0) {
-        if (block->flags & RAM_EXTRA) {
-            munmap(block->host, block->max_length + getpagesize());
+        if (block->flags & RAM_FILE) {
+            qemu_ram_munmap(block->host, block->max_length);
         } else {
             munmap(block->host, block->max_length);
         }
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
new file mode 100644
index 0000000..e82cc94
--- /dev/null
+++ b/util/mmap-alloc.c
@@ -0,0 +1,71 @@
+/* 
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    size_t total = size + align;
+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr1;
+
+    if (ptr == MAP_FAILED) {
+        return NULL;
+    }
+
+    /* Make sure align is a power of 2 */
+    assert(!(align & (align - 1)));
+    /* Always align to host page size */
+    assert(align >= getpagesize());
+
+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+                MAP_FIXED |
+                (fd == -1 ? MAP_ANONYMOUS : 0) |
+                (shared ? MAP_SHARED : MAP_PRIVATE),
+                fd, 0);
+    if (ptr1 == MAP_FAILED) {
+        munmap(ptr, total);
+        return NULL;
+    }
+
+    ptr += offset;
+    total -= offset;
+
+    if (offset > 0) {
+        munmap(ptr - offset, offset);
+    }
+
+    /*
+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+     * a guard page guarding against potential buffer overflows.
+     */
+    if (total > size + getpagesize()) {
+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
+    }
+
+    return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+    if (ptr) {
+        /* Unmap both the RAM block and the guard page */
+        munmap(ptr, size + getpagesize());
+    }
+}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index a0fcdc2..892d2d8 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -72,6 +72,8 @@ extern int daemon(int, int);
 #include <sys/sysctl.h>
 #endif
 
+#include <qemu/mmap-alloc.h>
+
 int qemu_get_thread_id(void)
 {
 #if defined(__linux__)
@@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size)
 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
-    size_t total = size + align;
-    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
-    void *ptr1;
+    void *ptr = qemu_ram_mmap(-1, size, align, false);
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
         *alignment = align;
     }
 
-    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
-                MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-    if (ptr1 == MAP_FAILED) {
-        munmap(ptr, total);
-        return NULL;
-    }
-
-    ptr += offset;
-    total -= offset;
-
-    if (offset > 0) {
-        munmap(ptr - offset, offset);
-    }
-    if (total > size + getpagesize()) {
-        munmap(ptr + size + getpagesize(), total - size - getpagesize());
-    }
-
     trace_qemu_anon_ram_alloc(size, ptr);
     return ptr;
 }
@@ -171,9 +153,7 @@ void qemu_vfree(void *ptr)
 void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
-    if (ptr) {
-        munmap(ptr, size + getpagesize());
-    }
+    qemu_ram_munmap(ptr, size);
 }
 
 void qemu_set_block(int fd)
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 114d657..372e037 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,6 +1,6 @@
 util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
 util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
-util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o mmap-alloc.o
 util-obj-y += envlist.o path.o module.o
 util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
-- 
MST

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
  2015-10-01 12:58 [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code Michael S. Tsirkin
@ 2015-10-02  0:48 ` Richard Henderson
  2015-10-04  8:38   ` Michael S. Tsirkin
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Henderson @ 2015-10-02  0:48 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel; +Cc: Paolo Bonzini, Marc-André Lureau

On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
> Anonymous and file-backed RAM allocation are now almost exactly the same.
>
> Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> exec.c.
>
> Reported-by: Marc-André Lureau <mlureau@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>
> Changes from v1: add shared flag to get MAP_SHARED mappings
> (for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
>
>   include/qemu/mmap-alloc.h | 10 +++++++
>   exec.c                    | 47 +++++++------------------------
>   util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
>   util/oslib-posix.c        | 28 +++----------------
>   util/Makefile.objs        |  2 +-
>   5 files changed, 96 insertions(+), 62 deletions(-)
>   create mode 100644 include/qemu/mmap-alloc.h
>   create mode 100644 util/mmap-alloc.c
>
> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> new file mode 100644
> index 0000000..56388e6
> --- /dev/null
> +++ b/include/qemu/mmap-alloc.h
> @@ -0,0 +1,10 @@
> +#ifndef QEMU_MMAP_ALLOC
> +#define QEMU_MMAP_ALLOC
> +
> +#include "qemu-common.h"
> +
> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> +
> +void qemu_ram_munmap(void *ptr, size_t size);
> +
> +#endif
> diff --git a/exec.c b/exec.c
> index 7d90a52..4505dc7 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -55,6 +55,9 @@
>   #include "exec/ram_addr.h"
>
>   #include "qemu/range.h"
> +#ifndef _WIN32
> +#include "qemu/mmap-alloc.h"
> +#endif
>
>   //#define DEBUG_SUBPAGE
>
> @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
>    */
>   #define RAM_RESIZEABLE (1 << 2)
>
> -/* An extra page is mapped on top of this RAM.
> +/* RAM is backed by an mmapped file.
>    */
> -#define RAM_EXTRA (1 << 3)
> +#define RAM_FILE (1 << 3)
>   #endif
>
>   struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
>       char *filename;
>       char *sanitized_name;
>       char *c;
> -    void *ptr;
> -    void *area = NULL;
> +    void *area;
>       int fd;
>       uint64_t hpagesize;
> -    uint64_t total;
>       Error *local_err = NULL;
> -    size_t offset;
>
>       hpagesize = gethugepagesize(path, &local_err);
>       if (local_err) {
> @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
>       g_free(filename);
>
>       memory = ROUND_UP(memory, hpagesize);
> -    total = memory + hpagesize;
>
>       /*
>        * ftruncate is not supported by hugetlbfs in older
> @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
>           perror("ftruncate");
>       }
>
> -    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> -                -1, 0);
> -    if (ptr == MAP_FAILED) {
> -        error_setg_errno(errp, errno,
> -                         "unable to allocate memory range for hugepages");
> -        close(fd);
> -        goto error;
> -    }
> -
> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> -
> -    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> -                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> -                MAP_FIXED,
> -                fd, 0);
> +    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
>       if (area == MAP_FAILED) {
>           error_setg_errno(errp, errno,
>                            "unable to map backing store for hugepages");
> -        munmap(ptr, total);
>           close(fd);
>           goto error;
>       }
>
> -    if (offset > 0) {
> -        munmap(ptr, offset);
> -    }
> -    ptr += offset;
> -    total -= offset;
> -
> -    if (total > memory + getpagesize()) {
> -        munmap(ptr + memory + getpagesize(),
> -               total - memory - getpagesize());
> -    }
> -
>       if (mem_prealloc) {
>           os_mem_prealloc(fd, area, memory);
>       }
> @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
>       new_block->used_length = size;
>       new_block->max_length = size;
>       new_block->flags = share ? RAM_SHARED : 0;
> -    new_block->flags |= RAM_EXTRA;
> +    new_block->flags |= RAM_FILE;
>       new_block->host = file_ram_alloc(new_block, size,
>                                        mem_path, errp);
>       if (!new_block->host) {
> @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
>           xen_invalidate_map_cache_entry(block->host);
>   #ifndef _WIN32
>       } else if (block->fd >= 0) {
> -        if (block->flags & RAM_EXTRA) {
> -            munmap(block->host, block->max_length + getpagesize());
> +        if (block->flags & RAM_FILE) {
> +            qemu_ram_munmap(block->host, block->max_length);
>           } else {
>               munmap(block->host, block->max_length);
>           }
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> new file mode 100644
> index 0000000..e82cc94
> --- /dev/null
> +++ b/util/mmap-alloc.c
> @@ -0,0 +1,71 @@
> +/*
> + * Support for RAM backed by mmaped host memory.
> + *
> + * Copyright (c) 2015 Red Hat, Inc.
> + *
> + * Authors:
> + *  Michael S. Tsirkin <mst@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +#include <qemu/mmap-alloc.h>
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#include <assert.h>
> +
> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)

Do we need generic "align", or would a bool hugepage suffice?  Or I suppose 
this is also used for non-anonymous hugepages via /dev/hugepage where we have 
multiple huge page sizes?

Should we call this function something else, so that we can use it to allocate 
hugepage aligned storage for code_gen_buffer and/or the tbs array?

> +{
> +    /*
> +     * Note: this always allocates at least one extra page of virtual address
> +     * space, even if size is already aligned.
> +     */
> +    size_t total = size + align;
> +    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> +    void *ptr1;
> +
> +    if (ptr == MAP_FAILED) {
> +        return NULL;
> +    }
> +
> +    /* Make sure align is a power of 2 */
> +    assert(!(align & (align - 1)));
> +    /* Always align to host page size */
> +    assert(align >= getpagesize());

Is qemu_real_host_page_size initialized yet?
If not, should we move the initialization forward?

> +
> +    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> +                MAP_FIXED |
> +                (fd == -1 ? MAP_ANONYMOUS : 0) |
> +                (shared ? MAP_SHARED : MAP_PRIVATE),
> +                fd, 0);

If fd == -1, we really only need mprotect on the region, correct?

> +    if (ptr1 == MAP_FAILED) {
> +        munmap(ptr, total);
> +        return NULL;
> +    }
> +
> +    ptr += offset;
> +    total -= offset;
> +
> +    if (offset > 0) {
> +        munmap(ptr - offset, offset);
> +    }
> +
> +    /*
> +     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
> +     * a guard page guarding against potential buffer overflows.
> +     */
> +    if (total > size + getpagesize()) {
> +        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> +    }
> +
> +    return ptr;
> +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
  2015-10-02  0:48 ` Richard Henderson
@ 2015-10-04  8:38   ` Michael S. Tsirkin
  2015-10-08 20:38     ` Michael S. Tsirkin
  0 siblings, 1 reply; 5+ messages in thread
From: Michael S. Tsirkin @ 2015-10-04  8:38 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paolo Bonzini, Marc-André Lureau, qemu-devel

On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
> On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
> >Anonymous and file-backed RAM allocation are now almost exactly the same.
> >
> >Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> >exec.c.
> >
> >Reported-by: Marc-André Lureau <mlureau@redhat.com>
> >Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> >Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> >---
> >
> >Changes from v1: add shared flag to get MAP_SHARED mappings
> >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
> >
> >  include/qemu/mmap-alloc.h | 10 +++++++
> >  exec.c                    | 47 +++++++------------------------
> >  util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
> >  util/oslib-posix.c        | 28 +++----------------
> >  util/Makefile.objs        |  2 +-
> >  5 files changed, 96 insertions(+), 62 deletions(-)
> >  create mode 100644 include/qemu/mmap-alloc.h
> >  create mode 100644 util/mmap-alloc.c
> >
> >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> >new file mode 100644
> >index 0000000..56388e6
> >--- /dev/null
> >+++ b/include/qemu/mmap-alloc.h
> >@@ -0,0 +1,10 @@
> >+#ifndef QEMU_MMAP_ALLOC
> >+#define QEMU_MMAP_ALLOC
> >+
> >+#include "qemu-common.h"
> >+
> >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> >+
> >+void qemu_ram_munmap(void *ptr, size_t size);
> >+
> >+#endif
> >diff --git a/exec.c b/exec.c
> >index 7d90a52..4505dc7 100644
> >--- a/exec.c
> >+++ b/exec.c
> >@@ -55,6 +55,9 @@
> >  #include "exec/ram_addr.h"
> >
> >  #include "qemu/range.h"
> >+#ifndef _WIN32
> >+#include "qemu/mmap-alloc.h"
> >+#endif
> >
> >  //#define DEBUG_SUBPAGE
> >
> >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
> >   */
> >  #define RAM_RESIZEABLE (1 << 2)
> >
> >-/* An extra page is mapped on top of this RAM.
> >+/* RAM is backed by an mmapped file.
> >   */
> >-#define RAM_EXTRA (1 << 3)
> >+#define RAM_FILE (1 << 3)
> >  #endif
> >
> >  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
> >      char *filename;
> >      char *sanitized_name;
> >      char *c;
> >-    void *ptr;
> >-    void *area = NULL;
> >+    void *area;
> >      int fd;
> >      uint64_t hpagesize;
> >-    uint64_t total;
> >      Error *local_err = NULL;
> >-    size_t offset;
> >
> >      hpagesize = gethugepagesize(path, &local_err);
> >      if (local_err) {
> >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
> >      g_free(filename);
> >
> >      memory = ROUND_UP(memory, hpagesize);
> >-    total = memory + hpagesize;
> >
> >      /*
> >       * ftruncate is not supported by hugetlbfs in older
> >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
> >          perror("ftruncate");
> >      }
> >
> >-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> >-                -1, 0);
> >-    if (ptr == MAP_FAILED) {
> >-        error_setg_errno(errp, errno,
> >-                         "unable to allocate memory range for hugepages");
> >-        close(fd);
> >-        goto error;
> >-    }
> >-
> >-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> >-
> >-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> >-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> >-                MAP_FIXED,
> >-                fd, 0);
> >+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> >      if (area == MAP_FAILED) {
> >          error_setg_errno(errp, errno,
> >                           "unable to map backing store for hugepages");
> >-        munmap(ptr, total);
> >          close(fd);
> >          goto error;
> >      }
> >
> >-    if (offset > 0) {
> >-        munmap(ptr, offset);
> >-    }
> >-    ptr += offset;
> >-    total -= offset;
> >-
> >-    if (total > memory + getpagesize()) {
> >-        munmap(ptr + memory + getpagesize(),
> >-               total - memory - getpagesize());
> >-    }
> >-
> >      if (mem_prealloc) {
> >          os_mem_prealloc(fd, area, memory);
> >      }
> >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
> >      new_block->used_length = size;
> >      new_block->max_length = size;
> >      new_block->flags = share ? RAM_SHARED : 0;
> >-    new_block->flags |= RAM_EXTRA;
> >+    new_block->flags |= RAM_FILE;
> >      new_block->host = file_ram_alloc(new_block, size,
> >                                       mem_path, errp);
> >      if (!new_block->host) {
> >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
> >          xen_invalidate_map_cache_entry(block->host);
> >  #ifndef _WIN32
> >      } else if (block->fd >= 0) {
> >-        if (block->flags & RAM_EXTRA) {
> >-            munmap(block->host, block->max_length + getpagesize());
> >+        if (block->flags & RAM_FILE) {
> >+            qemu_ram_munmap(block->host, block->max_length);
> >          } else {
> >              munmap(block->host, block->max_length);
> >          }
> >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> >new file mode 100644
> >index 0000000..e82cc94
> >--- /dev/null
> >+++ b/util/mmap-alloc.c
> >@@ -0,0 +1,71 @@
> >+/*
> >+ * Support for RAM backed by mmaped host memory.
> >+ *
> >+ * Copyright (c) 2015 Red Hat, Inc.
> >+ *
> >+ * Authors:
> >+ *  Michael S. Tsirkin <mst@redhat.com>
> >+ *
> >+ * This work is licensed under the terms of the GNU GPL, version 2 or
> >+ * later.  See the COPYING file in the top-level directory.
> >+ */
> >+#include <qemu/mmap-alloc.h>
> >+#include <sys/types.h>
> >+#include <sys/mman.h>
> >+#include <assert.h>
> >+
> >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
> 
> Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
> this is also used for non-anonymous hugepages via /dev/hugepage where we
> have multiple huge page sizes?

Right. Further, passing in alignment makes it possible to
keep page size probing in exec.c which is the only user.


> Should we call this function something else, so that we can use it to
> allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?


I don't much care what do we call it - with two callers it would be easy
to rename down the road.
The reason I put in "RAM" is because it allocates a guard page
which only seems to make sense for RAM memory that can be mapped.

But if you have a better name in mind, please let me know.


> >+{
> >+    /*
> >+     * Note: this always allocates at least one extra page of virtual address
> >+     * space, even if size is already aligned.
> >+     */
> >+    size_t total = size + align;
> >+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> >+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> >+    void *ptr1;
> >+
> >+    if (ptr == MAP_FAILED) {
> >+        return NULL;
> >+    }
> >+
> >+    /* Make sure align is a power of 2 */
> >+    assert(!(align & (align - 1)));
> >+    /* Always align to host page size */
> >+    assert(align >= getpagesize());
> 
> Is qemu_real_host_page_size initialized yet?
> If not, should we move the initialization forward?

I think it is.
Though qemu_real_host_page_size seems to only be needed
for portability, and this file is posix-specific.

Anyway, we are moving code from util/oslib-posix.c and exec.c that
currently both call getpagesize in this function, it seems better to be
consistent.

We can refactor afterwards, correct?


> >+
> >+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> >+                MAP_FIXED |
> >+                (fd == -1 ? MAP_ANONYMOUS : 0) |
> >+                (shared ? MAP_SHARED : MAP_PRIVATE),
> >+                fd, 0);
> 
> If fd == -1, we really only need mprotect on the region, correct?

Yes. But that's already like this in util/oslib-posix.c,
this is just re-factoring without logic changes.

Any reason to prefer mprotect? That would be more LOC.

> >+    if (ptr1 == MAP_FAILED) {
> >+        munmap(ptr, total);
> >+        return NULL;
> >+    }
> >+
> >+    ptr += offset;
> >+    total -= offset;
> >+
> >+    if (offset > 0) {
> >+        munmap(ptr - offset, offset);
> >+    }
> >+
> >+    /*
> >+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
> >+     * a guard page guarding against potential buffer overflows.
> >+     */
> >+    if (total > size + getpagesize()) {
> >+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> >+    }
> >+
> >+    return ptr;
> >+}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
  2015-10-04  8:38   ` Michael S. Tsirkin
@ 2015-10-08 20:38     ` Michael S. Tsirkin
  2015-10-08 20:53       ` Richard Henderson
  0 siblings, 1 reply; 5+ messages in thread
From: Michael S. Tsirkin @ 2015-10-08 20:38 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paolo Bonzini, Marc-André Lureau, qemu-devel

Any objections to merging as is, and renaming later
as appropriate?

On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote:
> On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
> > On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
> > >Anonymous and file-backed RAM allocation are now almost exactly the same.
> > >
> > >Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> > >exec.c.
> > >
> > >Reported-by: Marc-André Lureau <mlureau@redhat.com>
> > >Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> > >Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> > >---
> > >
> > >Changes from v1: add shared flag to get MAP_SHARED mappings
> > >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
> > >
> > >  include/qemu/mmap-alloc.h | 10 +++++++
> > >  exec.c                    | 47 +++++++------------------------
> > >  util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
> > >  util/oslib-posix.c        | 28 +++----------------
> > >  util/Makefile.objs        |  2 +-
> > >  5 files changed, 96 insertions(+), 62 deletions(-)
> > >  create mode 100644 include/qemu/mmap-alloc.h
> > >  create mode 100644 util/mmap-alloc.c
> > >
> > >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> > >new file mode 100644
> > >index 0000000..56388e6
> > >--- /dev/null
> > >+++ b/include/qemu/mmap-alloc.h
> > >@@ -0,0 +1,10 @@
> > >+#ifndef QEMU_MMAP_ALLOC
> > >+#define QEMU_MMAP_ALLOC
> > >+
> > >+#include "qemu-common.h"
> > >+
> > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> > >+
> > >+void qemu_ram_munmap(void *ptr, size_t size);
> > >+
> > >+#endif
> > >diff --git a/exec.c b/exec.c
> > >index 7d90a52..4505dc7 100644
> > >--- a/exec.c
> > >+++ b/exec.c
> > >@@ -55,6 +55,9 @@
> > >  #include "exec/ram_addr.h"
> > >
> > >  #include "qemu/range.h"
> > >+#ifndef _WIN32
> > >+#include "qemu/mmap-alloc.h"
> > >+#endif
> > >
> > >  //#define DEBUG_SUBPAGE
> > >
> > >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
> > >   */
> > >  #define RAM_RESIZEABLE (1 << 2)
> > >
> > >-/* An extra page is mapped on top of this RAM.
> > >+/* RAM is backed by an mmapped file.
> > >   */
> > >-#define RAM_EXTRA (1 << 3)
> > >+#define RAM_FILE (1 << 3)
> > >  #endif
> > >
> > >  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> > >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
> > >      char *filename;
> > >      char *sanitized_name;
> > >      char *c;
> > >-    void *ptr;
> > >-    void *area = NULL;
> > >+    void *area;
> > >      int fd;
> > >      uint64_t hpagesize;
> > >-    uint64_t total;
> > >      Error *local_err = NULL;
> > >-    size_t offset;
> > >
> > >      hpagesize = gethugepagesize(path, &local_err);
> > >      if (local_err) {
> > >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
> > >      g_free(filename);
> > >
> > >      memory = ROUND_UP(memory, hpagesize);
> > >-    total = memory + hpagesize;
> > >
> > >      /*
> > >       * ftruncate is not supported by hugetlbfs in older
> > >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > >          perror("ftruncate");
> > >      }
> > >
> > >-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> > >-                -1, 0);
> > >-    if (ptr == MAP_FAILED) {
> > >-        error_setg_errno(errp, errno,
> > >-                         "unable to allocate memory range for hugepages");
> > >-        close(fd);
> > >-        goto error;
> > >-    }
> > >-
> > >-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> > >-
> > >-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> > >-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> > >-                MAP_FIXED,
> > >-                fd, 0);
> > >+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> > >      if (area == MAP_FAILED) {
> > >          error_setg_errno(errp, errno,
> > >                           "unable to map backing store for hugepages");
> > >-        munmap(ptr, total);
> > >          close(fd);
> > >          goto error;
> > >      }
> > >
> > >-    if (offset > 0) {
> > >-        munmap(ptr, offset);
> > >-    }
> > >-    ptr += offset;
> > >-    total -= offset;
> > >-
> > >-    if (total > memory + getpagesize()) {
> > >-        munmap(ptr + memory + getpagesize(),
> > >-               total - memory - getpagesize());
> > >-    }
> > >-
> > >      if (mem_prealloc) {
> > >          os_mem_prealloc(fd, area, memory);
> > >      }
> > >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
> > >      new_block->used_length = size;
> > >      new_block->max_length = size;
> > >      new_block->flags = share ? RAM_SHARED : 0;
> > >-    new_block->flags |= RAM_EXTRA;
> > >+    new_block->flags |= RAM_FILE;
> > >      new_block->host = file_ram_alloc(new_block, size,
> > >                                       mem_path, errp);
> > >      if (!new_block->host) {
> > >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
> > >          xen_invalidate_map_cache_entry(block->host);
> > >  #ifndef _WIN32
> > >      } else if (block->fd >= 0) {
> > >-        if (block->flags & RAM_EXTRA) {
> > >-            munmap(block->host, block->max_length + getpagesize());
> > >+        if (block->flags & RAM_FILE) {
> > >+            qemu_ram_munmap(block->host, block->max_length);
> > >          } else {
> > >              munmap(block->host, block->max_length);
> > >          }
> > >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> > >new file mode 100644
> > >index 0000000..e82cc94
> > >--- /dev/null
> > >+++ b/util/mmap-alloc.c
> > >@@ -0,0 +1,71 @@
> > >+/*
> > >+ * Support for RAM backed by mmaped host memory.
> > >+ *
> > >+ * Copyright (c) 2015 Red Hat, Inc.
> > >+ *
> > >+ * Authors:
> > >+ *  Michael S. Tsirkin <mst@redhat.com>
> > >+ *
> > >+ * This work is licensed under the terms of the GNU GPL, version 2 or
> > >+ * later.  See the COPYING file in the top-level directory.
> > >+ */
> > >+#include <qemu/mmap-alloc.h>
> > >+#include <sys/types.h>
> > >+#include <sys/mman.h>
> > >+#include <assert.h>
> > >+
> > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
> > 
> > Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
> > this is also used for non-anonymous hugepages via /dev/hugepage where we
> > have multiple huge page sizes?
> 
> Right. Further, passing in alignment makes it possible to
> keep page size probing in exec.c which is the only user.
> 
> 
> > Should we call this function something else, so that we can use it to
> > allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?
> 
> 
> I don't much care what do we call it - with two callers it would be easy
> to rename down the road.
> The reason I put in "RAM" is because it allocates a guard page
> which only seems to make sense for RAM memory that can be mapped.
> 
> But if you have a better name in mind, please let me know.
> 
> 
> > >+{
> > >+    /*
> > >+     * Note: this always allocates at least one extra page of virtual address
> > >+     * space, even if size is already aligned.
> > >+     */
> > >+    size_t total = size + align;
> > >+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> > >+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> > >+    void *ptr1;
> > >+
> > >+    if (ptr == MAP_FAILED) {
> > >+        return NULL;
> > >+    }
> > >+
> > >+    /* Make sure align is a power of 2 */
> > >+    assert(!(align & (align - 1)));
> > >+    /* Always align to host page size */
> > >+    assert(align >= getpagesize());
> > 
> > Is qemu_real_host_page_size initialized yet?
> > If not, should we move the initialization forward?
> 
> I think it is.
> Though qemu_real_host_page_size seems to only be needed
> for portability, and this file is posix-specific.
> 
> Anyway, we are moving code from util/oslib-posix.c and exec.c that
> currently both call getpagesize in this function, it seems better to be
> consistent.
> 
> We can refactor afterwards, correct?
> 
> 
> > >+
> > >+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> > >+                MAP_FIXED |
> > >+                (fd == -1 ? MAP_ANONYMOUS : 0) |
> > >+                (shared ? MAP_SHARED : MAP_PRIVATE),
> > >+                fd, 0);
> > 
> > If fd == -1, we really only need mprotect on the region, correct?
> 
> Yes. But that's already like this in util/oslib-posix.c,
> this is just re-factoring without logic changes.
> 
> Any reason to prefer mprotect? That would be more LOC.
> 
> > >+    if (ptr1 == MAP_FAILED) {
> > >+        munmap(ptr, total);
> > >+        return NULL;
> > >+    }
> > >+
> > >+    ptr += offset;
> > >+    total -= offset;
> > >+
> > >+    if (offset > 0) {
> > >+        munmap(ptr - offset, offset);
> > >+    }
> > >+
> > >+    /*
> > >+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
> > >+     * a guard page guarding against potential buffer overflows.
> > >+     */
> > >+    if (total > size + getpagesize()) {
> > >+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> > >+    }
> > >+
> > >+    return ptr;
> > >+}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
  2015-10-08 20:38     ` Michael S. Tsirkin
@ 2015-10-08 20:53       ` Richard Henderson
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2015-10-08 20:53 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Paolo Bonzini, Marc-André Lureau, qemu-devel

On 10/09/2015 07:38 AM, Michael S. Tsirkin wrote:
> Any objections to merging as is, and renaming later
> as appropriate?

None.


r~
>
> On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote:
>> On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
>>> On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
>>>> Anonymous and file-backed RAM allocation are now almost exactly the same.
>>>>
>>>> Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
>>>> exec.c.
>>>>
>>>> Reported-by: Marc-André Lureau <mlureau@redhat.com>
>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> ---
>>>>
>>>> Changes from v1: add shared flag to get MAP_SHARED mappings
>>>> (for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
>>>>
>>>>   include/qemu/mmap-alloc.h | 10 +++++++
>>>>   exec.c                    | 47 +++++++------------------------
>>>>   util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
>>>>   util/oslib-posix.c        | 28 +++----------------
>>>>   util/Makefile.objs        |  2 +-
>>>>   5 files changed, 96 insertions(+), 62 deletions(-)
>>>>   create mode 100644 include/qemu/mmap-alloc.h
>>>>   create mode 100644 util/mmap-alloc.c
>>>>
>>>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
>>>> new file mode 100644
>>>> index 0000000..56388e6
>>>> --- /dev/null
>>>> +++ b/include/qemu/mmap-alloc.h
>>>> @@ -0,0 +1,10 @@
>>>> +#ifndef QEMU_MMAP_ALLOC
>>>> +#define QEMU_MMAP_ALLOC
>>>> +
>>>> +#include "qemu-common.h"
>>>> +
>>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
>>>> +
>>>> +void qemu_ram_munmap(void *ptr, size_t size);
>>>> +
>>>> +#endif
>>>> diff --git a/exec.c b/exec.c
>>>> index 7d90a52..4505dc7 100644
>>>> --- a/exec.c
>>>> +++ b/exec.c
>>>> @@ -55,6 +55,9 @@
>>>>   #include "exec/ram_addr.h"
>>>>
>>>>   #include "qemu/range.h"
>>>> +#ifndef _WIN32
>>>> +#include "qemu/mmap-alloc.h"
>>>> +#endif
>>>>
>>>>   //#define DEBUG_SUBPAGE
>>>>
>>>> @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
>>>>    */
>>>>   #define RAM_RESIZEABLE (1 << 2)
>>>>
>>>> -/* An extra page is mapped on top of this RAM.
>>>> +/* RAM is backed by an mmapped file.
>>>>    */
>>>> -#define RAM_EXTRA (1 << 3)
>>>> +#define RAM_FILE (1 << 3)
>>>>   #endif
>>>>
>>>>   struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
>>>> @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>       char *filename;
>>>>       char *sanitized_name;
>>>>       char *c;
>>>> -    void *ptr;
>>>> -    void *area = NULL;
>>>> +    void *area;
>>>>       int fd;
>>>>       uint64_t hpagesize;
>>>> -    uint64_t total;
>>>>       Error *local_err = NULL;
>>>> -    size_t offset;
>>>>
>>>>       hpagesize = gethugepagesize(path, &local_err);
>>>>       if (local_err) {
>>>> @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>       g_free(filename);
>>>>
>>>>       memory = ROUND_UP(memory, hpagesize);
>>>> -    total = memory + hpagesize;
>>>>
>>>>       /*
>>>>        * ftruncate is not supported by hugetlbfs in older
>>>> @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>           perror("ftruncate");
>>>>       }
>>>>
>>>> -    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
>>>> -                -1, 0);
>>>> -    if (ptr == MAP_FAILED) {
>>>> -        error_setg_errno(errp, errno,
>>>> -                         "unable to allocate memory range for hugepages");
>>>> -        close(fd);
>>>> -        goto error;
>>>> -    }
>>>> -
>>>> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
>>>> -
>>>> -    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
>>>> -                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
>>>> -                MAP_FIXED,
>>>> -                fd, 0);
>>>> +    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
>>>>       if (area == MAP_FAILED) {
>>>>           error_setg_errno(errp, errno,
>>>>                            "unable to map backing store for hugepages");
>>>> -        munmap(ptr, total);
>>>>           close(fd);
>>>>           goto error;
>>>>       }
>>>>
>>>> -    if (offset > 0) {
>>>> -        munmap(ptr, offset);
>>>> -    }
>>>> -    ptr += offset;
>>>> -    total -= offset;
>>>> -
>>>> -    if (total > memory + getpagesize()) {
>>>> -        munmap(ptr + memory + getpagesize(),
>>>> -               total - memory - getpagesize());
>>>> -    }
>>>> -
>>>>       if (mem_prealloc) {
>>>>           os_mem_prealloc(fd, area, memory);
>>>>       }
>>>> @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
>>>>       new_block->used_length = size;
>>>>       new_block->max_length = size;
>>>>       new_block->flags = share ? RAM_SHARED : 0;
>>>> -    new_block->flags |= RAM_EXTRA;
>>>> +    new_block->flags |= RAM_FILE;
>>>>       new_block->host = file_ram_alloc(new_block, size,
>>>>                                        mem_path, errp);
>>>>       if (!new_block->host) {
>>>> @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
>>>>           xen_invalidate_map_cache_entry(block->host);
>>>>   #ifndef _WIN32
>>>>       } else if (block->fd >= 0) {
>>>> -        if (block->flags & RAM_EXTRA) {
>>>> -            munmap(block->host, block->max_length + getpagesize());
>>>> +        if (block->flags & RAM_FILE) {
>>>> +            qemu_ram_munmap(block->host, block->max_length);
>>>>           } else {
>>>>               munmap(block->host, block->max_length);
>>>>           }
>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>>> new file mode 100644
>>>> index 0000000..e82cc94
>>>> --- /dev/null
>>>> +++ b/util/mmap-alloc.c
>>>> @@ -0,0 +1,71 @@
>>>> +/*
>>>> + * Support for RAM backed by mmaped host memory.
>>>> + *
>>>> + * Copyright (c) 2015 Red Hat, Inc.
>>>> + *
>>>> + * Authors:
>>>> + *  Michael S. Tsirkin <mst@redhat.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> + * later.  See the COPYING file in the top-level directory.
>>>> + */
>>>> +#include <qemu/mmap-alloc.h>
>>>> +#include <sys/types.h>
>>>> +#include <sys/mman.h>
>>>> +#include <assert.h>
>>>> +
>>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
>>>
>>> Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
>>> this is also used for non-anonymous hugepages via /dev/hugepage where we
>>> have multiple huge page sizes?
>>
>> Right. Further, passing in alignment makes it possible to
>> keep page size probing in exec.c which is the only user.
>>
>>
>>> Should we call this function something else, so that we can use it to
>>> allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?
>>
>>
>> I don't much care what do we call it - with two callers it would be easy
>> to rename down the road.
>> The reason I put in "RAM" is because it allocates a guard page
>> which only seems to make sense for RAM memory that can be mapped.
>>
>> But if you have a better name in mind, please let me know.
>>
>>
>>>> +{
>>>> +    /*
>>>> +     * Note: this always allocates at least one extra page of virtual address
>>>> +     * space, even if size is already aligned.
>>>> +     */
>>>> +    size_t total = size + align;
>>>> +    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>>>> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
>>>> +    void *ptr1;
>>>> +
>>>> +    if (ptr == MAP_FAILED) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /* Make sure align is a power of 2 */
>>>> +    assert(!(align & (align - 1)));
>>>> +    /* Always align to host page size */
>>>> +    assert(align >= getpagesize());
>>>
>>> Is qemu_real_host_page_size initialized yet?
>>> If not, should we move the initialization forward?
>>
>> I think it is.
>> Though qemu_real_host_page_size seems to only be needed
>> for portability, and this file is posix-specific.
>>
>> Anyway, we are moving code from util/oslib-posix.c and exec.c that
>> currently both call getpagesize in this function, it seems better to be
>> consistent.
>>
>> We can refactor afterwards, correct?
>>
>>
>>>> +
>>>> +    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
>>>> +                MAP_FIXED |
>>>> +                (fd == -1 ? MAP_ANONYMOUS : 0) |
>>>> +                (shared ? MAP_SHARED : MAP_PRIVATE),
>>>> +                fd, 0);
>>>
>>> If fd == -1, we really only need mprotect on the region, correct?
>>
>> Yes. But that's already like this in util/oslib-posix.c,
>> this is just re-factoring without logic changes.
>>
>> Any reason to prefer mprotect? That would be more LOC.
>>
>>>> +    if (ptr1 == MAP_FAILED) {
>>>> +        munmap(ptr, total);
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    ptr += offset;
>>>> +    total -= offset;
>>>> +
>>>> +    if (offset > 0) {
>>>> +        munmap(ptr - offset, offset);
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
>>>> +     * a guard page guarding against potential buffer overflows.
>>>> +     */
>>>> +    if (total > size + getpagesize()) {
>>>> +        munmap(ptr + size + getpagesize(), total - size - getpagesize());
>>>> +    }
>>>> +
>>>> +    return ptr;
>>>> +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-10-08 21:13 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-01 12:58 [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code Michael S. Tsirkin
2015-10-02  0:48 ` Richard Henderson
2015-10-04  8:38   ` Michael S. Tsirkin
2015-10-08 20:38     ` Michael S. Tsirkin
2015-10-08 20:53       ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).