From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:57791) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZkIVS-0004hV-Jv for qemu-devel@nongnu.org; Thu, 08 Oct 2015 17:13:44 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZkIVP-0006dh-8J for qemu-devel@nongnu.org; Thu, 08 Oct 2015 17:13:42 -0400 Received: from mail-pa0-x236.google.com ([2607:f8b0:400e:c03::236]:33894) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZkIVP-0006dY-00 for qemu-devel@nongnu.org; Thu, 08 Oct 2015 17:13:39 -0400 Received: by padhy16 with SMTP id hy16so64877667pad.1 for ; Thu, 08 Oct 2015 14:13:38 -0700 (PDT) Sender: Richard Henderson References: <1443704197-14329-1-git-send-email-mst@redhat.com> <560DD44D.9050309@twiddle.net> <20151004110510-mutt-send-email-mst@redhat.com> <20151008233810-mutt-send-email-mst@redhat.com> From: Richard Henderson Message-ID: <5616D7BF.9030500@twiddle.net> Date: Fri, 9 Oct 2015 07:53:19 +1100 MIME-Version: 1.0 In-Reply-To: <20151008233810-mutt-send-email-mst@redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Michael S. Tsirkin" Cc: Paolo Bonzini , =?UTF-8?Q?Marc-Andr=c3=a9_Lureau?= , qemu-devel@nongnu.org On 10/09/2015 07:38 AM, Michael S. Tsirkin wrote: > Any objections to merging as is, and renaming later > as appropriate? None. r~ > > On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote: >> On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote: >>> On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote: >>>> Anonymous and file-backed RAM allocation are now almost exactly the same. >>>> >>>> Reduce code duplication by moving RAM mmap code out of oslib-posix.c and >>>> exec.c. >>>> >>>> Reported-by: Marc-André Lureau >>>> Signed-off-by: Michael S. Tsirkin >>>> Reviewed-by: Paolo Bonzini >>>> Acked-by: Paolo Bonzini >>>> --- >>>> >>>> Changes from v1: add shared flag to get MAP_SHARED mappings >>>> (for vhost-user), only set MAP_ANONYMOUS for anonymous RAM. >>>> >>>> include/qemu/mmap-alloc.h | 10 +++++++ >>>> exec.c | 47 +++++++------------------------ >>>> util/mmap-alloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ >>>> util/oslib-posix.c | 28 +++---------------- >>>> util/Makefile.objs | 2 +- >>>> 5 files changed, 96 insertions(+), 62 deletions(-) >>>> create mode 100644 include/qemu/mmap-alloc.h >>>> create mode 100644 util/mmap-alloc.c >>>> >>>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h >>>> new file mode 100644 >>>> index 0000000..56388e6 >>>> --- /dev/null >>>> +++ b/include/qemu/mmap-alloc.h >>>> @@ -0,0 +1,10 @@ >>>> +#ifndef QEMU_MMAP_ALLOC >>>> +#define QEMU_MMAP_ALLOC >>>> + >>>> +#include "qemu-common.h" >>>> + >>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); >>>> + >>>> +void qemu_ram_munmap(void *ptr, size_t size); >>>> + >>>> +#endif >>>> diff --git a/exec.c b/exec.c >>>> index 7d90a52..4505dc7 100644 >>>> --- a/exec.c >>>> +++ b/exec.c >>>> @@ -55,6 +55,9 @@ >>>> #include "exec/ram_addr.h" >>>> >>>> #include "qemu/range.h" >>>> +#ifndef _WIN32 >>>> +#include "qemu/mmap-alloc.h" >>>> +#endif >>>> >>>> //#define DEBUG_SUBPAGE >>>> >>>> @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; >>>> */ >>>> #define RAM_RESIZEABLE (1 << 2) >>>> >>>> -/* An extra page is mapped on top of this RAM. >>>> +/* RAM is backed by an mmapped file. >>>> */ >>>> -#define RAM_EXTRA (1 << 3) >>>> +#define RAM_FILE (1 << 3) >>>> #endif >>>> >>>> struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); >>>> @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, >>>> char *filename; >>>> char *sanitized_name; >>>> char *c; >>>> - void *ptr; >>>> - void *area = NULL; >>>> + void *area; >>>> int fd; >>>> uint64_t hpagesize; >>>> - uint64_t total; >>>> Error *local_err = NULL; >>>> - size_t offset; >>>> >>>> hpagesize = gethugepagesize(path, &local_err); >>>> if (local_err) { >>>> @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, >>>> g_free(filename); >>>> >>>> memory = ROUND_UP(memory, hpagesize); >>>> - total = memory + hpagesize; >>>> >>>> /* >>>> * ftruncate is not supported by hugetlbfs in older >>>> @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, >>>> perror("ftruncate"); >>>> } >>>> >>>> - ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, >>>> - -1, 0); >>>> - if (ptr == MAP_FAILED) { >>>> - error_setg_errno(errp, errno, >>>> - "unable to allocate memory range for hugepages"); >>>> - close(fd); >>>> - goto error; >>>> - } >>>> - >>>> - offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; >>>> - >>>> - area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, >>>> - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | >>>> - MAP_FIXED, >>>> - fd, 0); >>>> + area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); >>>> if (area == MAP_FAILED) { >>>> error_setg_errno(errp, errno, >>>> "unable to map backing store for hugepages"); >>>> - munmap(ptr, total); >>>> close(fd); >>>> goto error; >>>> } >>>> >>>> - if (offset > 0) { >>>> - munmap(ptr, offset); >>>> - } >>>> - ptr += offset; >>>> - total -= offset; >>>> - >>>> - if (total > memory + getpagesize()) { >>>> - munmap(ptr + memory + getpagesize(), >>>> - total - memory - getpagesize()); >>>> - } >>>> - >>>> if (mem_prealloc) { >>>> os_mem_prealloc(fd, area, memory); >>>> } >>>> @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, >>>> new_block->used_length = size; >>>> new_block->max_length = size; >>>> new_block->flags = share ? RAM_SHARED : 0; >>>> - new_block->flags |= RAM_EXTRA; >>>> + new_block->flags |= RAM_FILE; >>>> new_block->host = file_ram_alloc(new_block, size, >>>> mem_path, errp); >>>> if (!new_block->host) { >>>> @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) >>>> xen_invalidate_map_cache_entry(block->host); >>>> #ifndef _WIN32 >>>> } else if (block->fd >= 0) { >>>> - if (block->flags & RAM_EXTRA) { >>>> - munmap(block->host, block->max_length + getpagesize()); >>>> + if (block->flags & RAM_FILE) { >>>> + qemu_ram_munmap(block->host, block->max_length); >>>> } else { >>>> munmap(block->host, block->max_length); >>>> } >>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c >>>> new file mode 100644 >>>> index 0000000..e82cc94 >>>> --- /dev/null >>>> +++ b/util/mmap-alloc.c >>>> @@ -0,0 +1,71 @@ >>>> +/* >>>> + * Support for RAM backed by mmaped host memory. >>>> + * >>>> + * Copyright (c) 2015 Red Hat, Inc. >>>> + * >>>> + * Authors: >>>> + * Michael S. Tsirkin >>>> + * >>>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>>> + * later. See the COPYING file in the top-level directory. >>>> + */ >>>> +#include >>>> +#include >>>> +#include >>>> +#include >>>> + >>>> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) >>> >>> Do we need generic "align", or would a bool hugepage suffice? Or I suppose >>> this is also used for non-anonymous hugepages via /dev/hugepage where we >>> have multiple huge page sizes? >> >> Right. Further, passing in alignment makes it possible to >> keep page size probing in exec.c which is the only user. >> >> >>> Should we call this function something else, so that we can use it to >>> allocate hugepage aligned storage for code_gen_buffer and/or the tbs array? >> >> >> I don't much care what do we call it - with two callers it would be easy >> to rename down the road. >> The reason I put in "RAM" is because it allocates a guard page >> which only seems to make sense for RAM memory that can be mapped. >> >> But if you have a better name in mind, please let me know. >> >> >>>> +{ >>>> + /* >>>> + * Note: this always allocates at least one extra page of virtual address >>>> + * space, even if size is already aligned. >>>> + */ >>>> + size_t total = size + align; >>>> + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); >>>> + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; >>>> + void *ptr1; >>>> + >>>> + if (ptr == MAP_FAILED) { >>>> + return NULL; >>>> + } >>>> + >>>> + /* Make sure align is a power of 2 */ >>>> + assert(!(align & (align - 1))); >>>> + /* Always align to host page size */ >>>> + assert(align >= getpagesize()); >>> >>> Is qemu_real_host_page_size initialized yet? >>> If not, should we move the initialization forward? >> >> I think it is. >> Though qemu_real_host_page_size seems to only be needed >> for portability, and this file is posix-specific. >> >> Anyway, we are moving code from util/oslib-posix.c and exec.c that >> currently both call getpagesize in this function, it seems better to be >> consistent. >> >> We can refactor afterwards, correct? >> >> >>>> + >>>> + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, >>>> + MAP_FIXED | >>>> + (fd == -1 ? MAP_ANONYMOUS : 0) | >>>> + (shared ? MAP_SHARED : MAP_PRIVATE), >>>> + fd, 0); >>> >>> If fd == -1, we really only need mprotect on the region, correct? >> >> Yes. But that's already like this in util/oslib-posix.c, >> this is just re-factoring without logic changes. >> >> Any reason to prefer mprotect? That would be more LOC. >> >>>> + if (ptr1 == MAP_FAILED) { >>>> + munmap(ptr, total); >>>> + return NULL; >>>> + } >>>> + >>>> + ptr += offset; >>>> + total -= offset; >>>> + >>>> + if (offset > 0) { >>>> + munmap(ptr - offset, offset); >>>> + } >>>> + >>>> + /* >>>> + * Leave a single PROT_NONE page allocated after the RAM block, to serve as >>>> + * a guard page guarding against potential buffer overflows. >>>> + */ >>>> + if (total > size + getpagesize()) { >>>> + munmap(ptr + size + getpagesize(), total - size - getpagesize()); >>>> + } >>>> + >>>> + return ptr; >>>> +}