qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Marc-André Lureau" <mlureau@redhat.com>,
	qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
Date: Fri, 9 Oct 2015 07:53:19 +1100	[thread overview]
Message-ID: <5616D7BF.9030500@twiddle.net> (raw)
In-Reply-To: <20151008233810-mutt-send-email-mst@redhat.com>

On 10/09/2015 07:38 AM, Michael S. Tsirkin wrote:
> Any objections to merging as is, and renaming later
> as appropriate?

None.


r~
>
> On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote:
>> On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
>>> On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
>>>> Anonymous and file-backed RAM allocation are now almost exactly the same.
>>>>
>>>> Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
>>>> exec.c.
>>>>
>>>> Reported-by: Marc-André Lureau <mlureau@redhat.com>
>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
>>>> ---
>>>>
>>>> Changes from v1: add shared flag to get MAP_SHARED mappings
>>>> (for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
>>>>
>>>>   include/qemu/mmap-alloc.h | 10 +++++++
>>>>   exec.c                    | 47 +++++++------------------------
>>>>   util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
>>>>   util/oslib-posix.c        | 28 +++----------------
>>>>   util/Makefile.objs        |  2 +-
>>>>   5 files changed, 96 insertions(+), 62 deletions(-)
>>>>   create mode 100644 include/qemu/mmap-alloc.h
>>>>   create mode 100644 util/mmap-alloc.c
>>>>
>>>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
>>>> new file mode 100644
>>>> index 0000000..56388e6
>>>> --- /dev/null
>>>> +++ b/include/qemu/mmap-alloc.h
>>>> @@ -0,0 +1,10 @@
>>>> +#ifndef QEMU_MMAP_ALLOC
>>>> +#define QEMU_MMAP_ALLOC
>>>> +
>>>> +#include "qemu-common.h"
>>>> +
>>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
>>>> +
>>>> +void qemu_ram_munmap(void *ptr, size_t size);
>>>> +
>>>> +#endif
>>>> diff --git a/exec.c b/exec.c
>>>> index 7d90a52..4505dc7 100644
>>>> --- a/exec.c
>>>> +++ b/exec.c
>>>> @@ -55,6 +55,9 @@
>>>>   #include "exec/ram_addr.h"
>>>>
>>>>   #include "qemu/range.h"
>>>> +#ifndef _WIN32
>>>> +#include "qemu/mmap-alloc.h"
>>>> +#endif
>>>>
>>>>   //#define DEBUG_SUBPAGE
>>>>
>>>> @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
>>>>    */
>>>>   #define RAM_RESIZEABLE (1 << 2)
>>>>
>>>> -/* An extra page is mapped on top of this RAM.
>>>> +/* RAM is backed by an mmapped file.
>>>>    */
>>>> -#define RAM_EXTRA (1 << 3)
>>>> +#define RAM_FILE (1 << 3)
>>>>   #endif
>>>>
>>>>   struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
>>>> @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>       char *filename;
>>>>       char *sanitized_name;
>>>>       char *c;
>>>> -    void *ptr;
>>>> -    void *area = NULL;
>>>> +    void *area;
>>>>       int fd;
>>>>       uint64_t hpagesize;
>>>> -    uint64_t total;
>>>>       Error *local_err = NULL;
>>>> -    size_t offset;
>>>>
>>>>       hpagesize = gethugepagesize(path, &local_err);
>>>>       if (local_err) {
>>>> @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>       g_free(filename);
>>>>
>>>>       memory = ROUND_UP(memory, hpagesize);
>>>> -    total = memory + hpagesize;
>>>>
>>>>       /*
>>>>        * ftruncate is not supported by hugetlbfs in older
>>>> @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
>>>>           perror("ftruncate");
>>>>       }
>>>>
>>>> -    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
>>>> -                -1, 0);
>>>> -    if (ptr == MAP_FAILED) {
>>>> -        error_setg_errno(errp, errno,
>>>> -                         "unable to allocate memory range for hugepages");
>>>> -        close(fd);
>>>> -        goto error;
>>>> -    }
>>>> -
>>>> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
>>>> -
>>>> -    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
>>>> -                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
>>>> -                MAP_FIXED,
>>>> -                fd, 0);
>>>> +    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
>>>>       if (area == MAP_FAILED) {
>>>>           error_setg_errno(errp, errno,
>>>>                            "unable to map backing store for hugepages");
>>>> -        munmap(ptr, total);
>>>>           close(fd);
>>>>           goto error;
>>>>       }
>>>>
>>>> -    if (offset > 0) {
>>>> -        munmap(ptr, offset);
>>>> -    }
>>>> -    ptr += offset;
>>>> -    total -= offset;
>>>> -
>>>> -    if (total > memory + getpagesize()) {
>>>> -        munmap(ptr + memory + getpagesize(),
>>>> -               total - memory - getpagesize());
>>>> -    }
>>>> -
>>>>       if (mem_prealloc) {
>>>>           os_mem_prealloc(fd, area, memory);
>>>>       }
>>>> @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
>>>>       new_block->used_length = size;
>>>>       new_block->max_length = size;
>>>>       new_block->flags = share ? RAM_SHARED : 0;
>>>> -    new_block->flags |= RAM_EXTRA;
>>>> +    new_block->flags |= RAM_FILE;
>>>>       new_block->host = file_ram_alloc(new_block, size,
>>>>                                        mem_path, errp);
>>>>       if (!new_block->host) {
>>>> @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
>>>>           xen_invalidate_map_cache_entry(block->host);
>>>>   #ifndef _WIN32
>>>>       } else if (block->fd >= 0) {
>>>> -        if (block->flags & RAM_EXTRA) {
>>>> -            munmap(block->host, block->max_length + getpagesize());
>>>> +        if (block->flags & RAM_FILE) {
>>>> +            qemu_ram_munmap(block->host, block->max_length);
>>>>           } else {
>>>>               munmap(block->host, block->max_length);
>>>>           }
>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>>>> new file mode 100644
>>>> index 0000000..e82cc94
>>>> --- /dev/null
>>>> +++ b/util/mmap-alloc.c
>>>> @@ -0,0 +1,71 @@
>>>> +/*
>>>> + * Support for RAM backed by mmaped host memory.
>>>> + *
>>>> + * Copyright (c) 2015 Red Hat, Inc.
>>>> + *
>>>> + * Authors:
>>>> + *  Michael S. Tsirkin <mst@redhat.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> + * later.  See the COPYING file in the top-level directory.
>>>> + */
>>>> +#include <qemu/mmap-alloc.h>
>>>> +#include <sys/types.h>
>>>> +#include <sys/mman.h>
>>>> +#include <assert.h>
>>>> +
>>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
>>>
>>> Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
>>> this is also used for non-anonymous hugepages via /dev/hugepage where we
>>> have multiple huge page sizes?
>>
>> Right. Further, passing in alignment makes it possible to
>> keep page size probing in exec.c which is the only user.
>>
>>
>>> Should we call this function something else, so that we can use it to
>>> allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?
>>
>>
>> I don't much care what do we call it - with two callers it would be easy
>> to rename down the road.
>> The reason I put in "RAM" is because it allocates a guard page
>> which only seems to make sense for RAM memory that can be mapped.
>>
>> But if you have a better name in mind, please let me know.
>>
>>
>>>> +{
>>>> +    /*
>>>> +     * Note: this always allocates at least one extra page of virtual address
>>>> +     * space, even if size is already aligned.
>>>> +     */
>>>> +    size_t total = size + align;
>>>> +    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>>>> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
>>>> +    void *ptr1;
>>>> +
>>>> +    if (ptr == MAP_FAILED) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /* Make sure align is a power of 2 */
>>>> +    assert(!(align & (align - 1)));
>>>> +    /* Always align to host page size */
>>>> +    assert(align >= getpagesize());
>>>
>>> Is qemu_real_host_page_size initialized yet?
>>> If not, should we move the initialization forward?
>>
>> I think it is.
>> Though qemu_real_host_page_size seems to only be needed
>> for portability, and this file is posix-specific.
>>
>> Anyway, we are moving code from util/oslib-posix.c and exec.c that
>> currently both call getpagesize in this function, it seems better to be
>> consistent.
>>
>> We can refactor afterwards, correct?
>>
>>
>>>> +
>>>> +    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
>>>> +                MAP_FIXED |
>>>> +                (fd == -1 ? MAP_ANONYMOUS : 0) |
>>>> +                (shared ? MAP_SHARED : MAP_PRIVATE),
>>>> +                fd, 0);
>>>
>>> If fd == -1, we really only need mprotect on the region, correct?
>>
>> Yes. But that's already like this in util/oslib-posix.c,
>> this is just re-factoring without logic changes.
>>
>> Any reason to prefer mprotect? That would be more LOC.
>>
>>>> +    if (ptr1 == MAP_FAILED) {
>>>> +        munmap(ptr, total);
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    ptr += offset;
>>>> +    total -= offset;
>>>> +
>>>> +    if (offset > 0) {
>>>> +        munmap(ptr - offset, offset);
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
>>>> +     * a guard page guarding against potential buffer overflows.
>>>> +     */
>>>> +    if (total > size + getpagesize()) {
>>>> +        munmap(ptr + size + getpagesize(), total - size - getpagesize());
>>>> +    }
>>>> +
>>>> +    return ptr;
>>>> +}

      reply	other threads:[~2015-10-08 21:13 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-01 12:58 [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code Michael S. Tsirkin
2015-10-02  0:48 ` Richard Henderson
2015-10-04  8:38   ` Michael S. Tsirkin
2015-10-08 20:38     ` Michael S. Tsirkin
2015-10-08 20:53       ` Richard Henderson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5616D7BF.9030500@twiddle.net \
    --to=rth@twiddle.net \
    --cc=mlureau@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).