From mboxrd@z Thu Jan 1 00:00:00 1970 From: john cooper Subject: Re: patch: qemu + hugetlbfs.. Date: Thu, 10 Jul 2008 16:16:03 -0400 Message-ID: <48766E03.4090901@third-harmonic.com> References: <4873E400.4000409@third-harmonic.com> <4873F395.6030209@codemonkey.ws> <4874051A.8000802@third-harmonic.com> <48740F86.3050306@codemonkey.ws> <20080709170301.GA11439@dmt.cnet> <4874F156.2010708@codemonkey.ws> <48763B86.6060402@third-harmonic.com> <48764DAF.6060502@codemonkey.ws> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020907080702090409080400" Cc: Marcelo Tosatti , kvm@vger.kernel.org, john.cooper@redhat.com To: Anthony Liguori Return-path: Received: from dpc691978010.direcpc.com ([69.19.78.10]:45146 "EHLO anvil.third-harmonic.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751216AbYGJU27 (ORCPT ); Thu, 10 Jul 2008 16:28:59 -0400 In-Reply-To: <48764DAF.6060502@codemonkey.ws> Sender: kvm-owner@vger.kernel.org List-ID: This is a multi-part message in MIME format. --------------020907080702090409080400 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Anthony Liguori wrote: >> +#include >> > > I don't think this is necessary anymore. Depending on a Linux headers > breaks the QEMU build on other unices so it's a bad thing. It is no longer required, but see below. > hpage is a misnomer too as we aren't actually dependent on huge pages (this > code should work equally well for tmpfs). As it currently exists alloc_hpage_mem() is tied to the notion of huge page allocation as it will reference gethugepagesize() irrespective of *mem_path. So even in the case of tmpfs backed files, if the host kernel has been configured with CONFIG_HUGETLBFS we will wind up doing allocations of /dev/shm mapped files at /proc/meminfo:Hugepagesize granularity. Otherwise if HUGETLBFS is not configured gethugepagesize() returns zero and alloc_hpage_mem() itself will not perform the allocation. Probably not what was intended but probably not too much of a concern as "-mem-path /dev/shm" is likely only used in debug of this flag and associated logic. I don't see it currently being worth the trouble to correct from a squeaky clean POV, and doing so may drag in far more than the header file we've just booted above to deal with this architecture/config dependency. An updated patch is attached. -john -- john.cooper@third-harmonic.com --------------020907080702090409080400 Content-Type: text/plain; name="prealloc.diff-08071001" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="prealloc.diff-08071001" --- a/qemu/vl.c +++ b/qemu/vl.c @@ -234,6 +234,7 @@ int autostart = 1; int time_drift_fix = 0; unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; +int mem_prealloc = 1; /* force preallocation of physical target memory */ int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM @@ -7809,7 +7810,10 @@ static void help(int exitcode) #endif "-tdf inject timer interrupts that got lost\n" "-kvm-shadow-memory megs set the amount of shadow pages to be allocated\n" - "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n" + "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also\n" + " enables allocation of guest memory with huge pages\n" + "-mem-prealloc toggles preallocation of -mem-path backed physical memory\n" + " at startup. Default is enabled.\n" "-option-rom rom load a file, rom, into the option ROM space\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" @@ -7932,6 +7936,7 @@ enum { QEMU_OPTION_tdf, QEMU_OPTION_kvm_shadow_memory, QEMU_OPTION_mempath, + QEMU_OPTION_mem_prealloc }; typedef struct QEMUOption { @@ -8059,6 +8064,7 @@ const QEMUOption qemu_options[] = { { "startdate", HAS_ARG, QEMU_OPTION_startdate }, { "tb-size", HAS_ARG, QEMU_OPTION_tb_size }, { "mem-path", HAS_ARG, QEMU_OPTION_mempath }, + { "mem-prealloc", 0, QEMU_OPTION_mem_prealloc }, { NULL }, }; @@ -8276,11 +8282,13 @@ static int gethugepagesize(void) return hugepagesize; } -void *alloc_mem_area(unsigned long memory, const char *path) +/* attempt to allocate memory mmap'ed to mem_path + */ +void *alloc_hpage_mem(unsigned long memory, const char *path) { char *filename; void *area; - int fd; + int fd, flags; if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) return NULL; @@ -8308,26 +8316,27 @@ void *alloc_mem_area(unsigned long memor */ ftruncate(fd, memory); - area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - if (area == MAP_FAILED) { - perror("mmap"); - close(fd); - return NULL; - } - - return area; + /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED + * to sidestep this quirk. + */ + flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE; + area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0); + if (area != MAP_FAILED) + return (area); + perror("alloc_hpage_mem: can't mmap hugetlbfs pages"); + close(fd); + return (NULL); } -void *qemu_alloc_physram(unsigned long memory) +/* allocate guest memory as requested + */ +void *qemu_alloc_physram(unsigned long size) { - void *area = NULL; - if (mem_path) - area = alloc_mem_area(memory, mem_path); - if (!area) - area = qemu_vmalloc(memory); - - return area; + return (alloc_hpage_mem(size, mem_path)); + else + return (qemu_vmalloc(size)); } int main(int argc, char **argv) @@ -8962,6 +8971,9 @@ int main(int argc, char **argv) case QEMU_OPTION_mempath: mem_path = optarg; break; + case QEMU_OPTION_mem_prealloc: + mem_prealloc = !mem_prealloc; + break; case QEMU_OPTION_name: qemu_name = optarg; break; --------------020907080702090409080400--