All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Richard Henderson <rth@twiddle.net>
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Marc-André Lureau" <mlureau@redhat.com>,
	qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
Date: Sun, 4 Oct 2015 11:38:59 +0300	[thread overview]
Message-ID: <20151004110510-mutt-send-email-mst@redhat.com> (raw)
In-Reply-To: <560DD44D.9050309@twiddle.net>

On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
> On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
> >Anonymous and file-backed RAM allocation are now almost exactly the same.
> >
> >Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> >exec.c.
> >
> >Reported-by: Marc-André Lureau <mlureau@redhat.com>
> >Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> >Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> >---
> >
> >Changes from v1: add shared flag to get MAP_SHARED mappings
> >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
> >
> >  include/qemu/mmap-alloc.h | 10 +++++++
> >  exec.c                    | 47 +++++++------------------------
> >  util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
> >  util/oslib-posix.c        | 28 +++----------------
> >  util/Makefile.objs        |  2 +-
> >  5 files changed, 96 insertions(+), 62 deletions(-)
> >  create mode 100644 include/qemu/mmap-alloc.h
> >  create mode 100644 util/mmap-alloc.c
> >
> >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> >new file mode 100644
> >index 0000000..56388e6
> >--- /dev/null
> >+++ b/include/qemu/mmap-alloc.h
> >@@ -0,0 +1,10 @@
> >+#ifndef QEMU_MMAP_ALLOC
> >+#define QEMU_MMAP_ALLOC
> >+
> >+#include "qemu-common.h"
> >+
> >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> >+
> >+void qemu_ram_munmap(void *ptr, size_t size);
> >+
> >+#endif
> >diff --git a/exec.c b/exec.c
> >index 7d90a52..4505dc7 100644
> >--- a/exec.c
> >+++ b/exec.c
> >@@ -55,6 +55,9 @@
> >  #include "exec/ram_addr.h"
> >
> >  #include "qemu/range.h"
> >+#ifndef _WIN32
> >+#include "qemu/mmap-alloc.h"
> >+#endif
> >
> >  //#define DEBUG_SUBPAGE
> >
> >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
> >   */
> >  #define RAM_RESIZEABLE (1 << 2)
> >
> >-/* An extra page is mapped on top of this RAM.
> >+/* RAM is backed by an mmapped file.
> >   */
> >-#define RAM_EXTRA (1 << 3)
> >+#define RAM_FILE (1 << 3)
> >  #endif
> >
> >  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
> >      char *filename;
> >      char *sanitized_name;
> >      char *c;
> >-    void *ptr;
> >-    void *area = NULL;
> >+    void *area;
> >      int fd;
> >      uint64_t hpagesize;
> >-    uint64_t total;
> >      Error *local_err = NULL;
> >-    size_t offset;
> >
> >      hpagesize = gethugepagesize(path, &local_err);
> >      if (local_err) {
> >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
> >      g_free(filename);
> >
> >      memory = ROUND_UP(memory, hpagesize);
> >-    total = memory + hpagesize;
> >
> >      /*
> >       * ftruncate is not supported by hugetlbfs in older
> >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
> >          perror("ftruncate");
> >      }
> >
> >-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> >-                -1, 0);
> >-    if (ptr == MAP_FAILED) {
> >-        error_setg_errno(errp, errno,
> >-                         "unable to allocate memory range for hugepages");
> >-        close(fd);
> >-        goto error;
> >-    }
> >-
> >-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> >-
> >-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> >-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> >-                MAP_FIXED,
> >-                fd, 0);
> >+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> >      if (area == MAP_FAILED) {
> >          error_setg_errno(errp, errno,
> >                           "unable to map backing store for hugepages");
> >-        munmap(ptr, total);
> >          close(fd);
> >          goto error;
> >      }
> >
> >-    if (offset > 0) {
> >-        munmap(ptr, offset);
> >-    }
> >-    ptr += offset;
> >-    total -= offset;
> >-
> >-    if (total > memory + getpagesize()) {
> >-        munmap(ptr + memory + getpagesize(),
> >-               total - memory - getpagesize());
> >-    }
> >-
> >      if (mem_prealloc) {
> >          os_mem_prealloc(fd, area, memory);
> >      }
> >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
> >      new_block->used_length = size;
> >      new_block->max_length = size;
> >      new_block->flags = share ? RAM_SHARED : 0;
> >-    new_block->flags |= RAM_EXTRA;
> >+    new_block->flags |= RAM_FILE;
> >      new_block->host = file_ram_alloc(new_block, size,
> >                                       mem_path, errp);
> >      if (!new_block->host) {
> >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
> >          xen_invalidate_map_cache_entry(block->host);
> >  #ifndef _WIN32
> >      } else if (block->fd >= 0) {
> >-        if (block->flags & RAM_EXTRA) {
> >-            munmap(block->host, block->max_length + getpagesize());
> >+        if (block->flags & RAM_FILE) {
> >+            qemu_ram_munmap(block->host, block->max_length);
> >          } else {
> >              munmap(block->host, block->max_length);
> >          }
> >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> >new file mode 100644
> >index 0000000..e82cc94
> >--- /dev/null
> >+++ b/util/mmap-alloc.c
> >@@ -0,0 +1,71 @@
> >+/*
> >+ * Support for RAM backed by mmaped host memory.
> >+ *
> >+ * Copyright (c) 2015 Red Hat, Inc.
> >+ *
> >+ * Authors:
> >+ *  Michael S. Tsirkin <mst@redhat.com>
> >+ *
> >+ * This work is licensed under the terms of the GNU GPL, version 2 or
> >+ * later.  See the COPYING file in the top-level directory.
> >+ */
> >+#include <qemu/mmap-alloc.h>
> >+#include <sys/types.h>
> >+#include <sys/mman.h>
> >+#include <assert.h>
> >+
> >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
> 
> Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
> this is also used for non-anonymous hugepages via /dev/hugepage where we
> have multiple huge page sizes?

Right. Further, passing in alignment makes it possible to
keep page size probing in exec.c which is the only user.


> Should we call this function something else, so that we can use it to
> allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?


I don't much care what do we call it - with two callers it would be easy
to rename down the road.
The reason I put in "RAM" is because it allocates a guard page
which only seems to make sense for RAM memory that can be mapped.

But if you have a better name in mind, please let me know.


> >+{
> >+    /*
> >+     * Note: this always allocates at least one extra page of virtual address
> >+     * space, even if size is already aligned.
> >+     */
> >+    size_t total = size + align;
> >+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> >+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> >+    void *ptr1;
> >+
> >+    if (ptr == MAP_FAILED) {
> >+        return NULL;
> >+    }
> >+
> >+    /* Make sure align is a power of 2 */
> >+    assert(!(align & (align - 1)));
> >+    /* Always align to host page size */
> >+    assert(align >= getpagesize());
> 
> Is qemu_real_host_page_size initialized yet?
> If not, should we move the initialization forward?

I think it is.
Though qemu_real_host_page_size seems to only be needed
for portability, and this file is posix-specific.

Anyway, we are moving code from util/oslib-posix.c and exec.c that
currently both call getpagesize in this function, it seems better to be
consistent.

We can refactor afterwards, correct?


> >+
> >+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> >+                MAP_FIXED |
> >+                (fd == -1 ? MAP_ANONYMOUS : 0) |
> >+                (shared ? MAP_SHARED : MAP_PRIVATE),
> >+                fd, 0);
> 
> If fd == -1, we really only need mprotect on the region, correct?

Yes. But that's already like this in util/oslib-posix.c,
this is just re-factoring without logic changes.

Any reason to prefer mprotect? That would be more LOC.

> >+    if (ptr1 == MAP_FAILED) {
> >+        munmap(ptr, total);
> >+        return NULL;
> >+    }
> >+
> >+    ptr += offset;
> >+    total -= offset;
> >+
> >+    if (offset > 0) {
> >+        munmap(ptr - offset, offset);
> >+    }
> >+
> >+    /*
> >+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
> >+     * a guard page guarding against potential buffer overflows.
> >+     */
> >+    if (total > size + getpagesize()) {
> >+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> >+    }
> >+
> >+    return ptr;
> >+}

  reply	other threads:[~2015-10-04  8:39 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-01 12:58 [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code Michael S. Tsirkin
2015-10-02  0:48 ` Richard Henderson
2015-10-04  8:38   ` Michael S. Tsirkin [this message]
2015-10-08 20:38     ` Michael S. Tsirkin
2015-10-08 20:53       ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151004110510-mutt-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=mlureau@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.