From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andre Przywara Subject: [PATCH 2/3] KVM-userspace: allocate guest resources from different host nodes Date: Thu, 27 Nov 2008 23:27:04 +0100 Message-ID: <492F1EB8.7030303@amd.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------090805000104050906000102" Cc: kvm@vger.kernel.org To: Avi Kivity Return-path: Received: from outbound-dub.frontbridge.com ([213.199.154.16]:59561 "EHLO IE1EHSOBE004.bigfish.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751449AbYK0W1r (ORCPT ); Thu, 27 Nov 2008 17:27:47 -0500 Sender: kvm-owner@vger.kernel.org List-ID: --------------090805000104050906000102 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit According to the host node map given on the command line the VCPUs are pinned to the respective node (allowing at least scheduling between the cores belonging to this node). The mmap'ed guest memory will be bound to the correct host nodes (this will of course not take effect until the memory actually faults in). Since libnuma is not that widespread (in default installations), I chose 'enable via configure' by now: --enable-numa will compile the parts in. Signed-off-by: Andre Przywara -- Andre Przywara AMD-Operating System Research Center (OSRC), Dresden, Germany Tel: +49 351 277-84917 ----to satisfy European Law for business letters: AMD Saxony Limited Liability Company & Co. KG, Wilschdorfer Landstr. 101, 01109 Dresden, Germany Register Court Dresden: HRA 4896, General Partner authorized to represent: AMD Saxony LLC (Wilmington, Delaware, US) General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy --------------090805000104050906000102 Content-Type: text/plain; name="kvmnuma_hostalloc.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="kvmnuma_hostalloc.patch" commit 4c67b88049d673e203ee4337cc70856c1983eb5a Author: Andre Przywara Date: Tue Nov 25 23:12:13 2008 +0100 allocate guest resources from different nodes diff --git a/qemu/Makefile.target b/qemu/Makefile.target index 05ace8e..690903e 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -698,6 +698,10 @@ LIBS += -lkvm DEPLIBS += ../libkvm/libkvm.a endif +ifdef CONFIG_NUMA +LIBS += -lnuma +endif + ifdef CONFIG_VNC_TLS CPPFLAGS += $(CONFIG_VNC_TLS_CFLAGS) LIBS += $(CONFIG_VNC_TLS_LIBS) diff --git a/qemu/configure b/qemu/configure index 18ef980..8ffa33d 100755 --- a/qemu/configure +++ b/qemu/configure @@ -114,6 +114,7 @@ curses="yes" aio="yes" nptl="yes" mixemu="no" +numa="no" bluez="yes" kvm="yes" kvm_cap_pit="no" @@ -387,6 +388,8 @@ for opt do ;; --enable-mixemu) mixemu="yes" ;; + --enable-numa) numa="yes" + ;; --disable-aio) aio="no" ;; --kerneldir=*) kerneldir="$optarg" @@ -483,6 +486,7 @@ echo " Available drivers: $audio_possible_drivers" echo " --audio-card-list=LIST set list of additional emulated audio cards" echo " Available cards: ac97 adlib cs4231a gus" echo " --enable-mixemu enable mixer emulation" +echo " --enable-numa enable NUMA support (host side)" echo " --disable-brlapi disable BrlAPI" echo " --disable-vnc-tls disable TLS encryption for VNC server" echo " --disable-curses disable curses output" @@ -1174,6 +1178,7 @@ echo "mingw32 support $mingw32" echo "Audio drivers $audio_drv_list" echo "Extra audio cards $audio_card_list" echo "Mixer emulation $mixemu" +echo "NUMA support $numa" echo "VNC TLS support $vnc_tls" if test "$vnc_tls" = "yes" ; then echo " TLS CFLAGS $vnc_tls_cflags" @@ -1408,6 +1413,10 @@ if test "$mixemu" = "yes" ; then echo "CONFIG_MIXEMU=yes" >> $config_mak echo "#define CONFIG_MIXEMU 1" >> $config_h fi +if test "$numa" = "yes" ; then + echo "CONFIG_NUMA=yes" >> $config_mak + echo "#define CONFIG_NUMA 1" >> $config_h +fi if test "$vnc_tls" = "yes" ; then echo "CONFIG_VNC_TLS=yes" >> $config_mak echo "CONFIG_VNC_TLS_CFLAGS=$vnc_tls_cflags" >> $config_mak diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c index 8b4cdd6..61384fd 100644 --- a/qemu/qemu-kvm.c +++ b/qemu/qemu-kvm.c @@ -28,6 +28,10 @@ int kvm_pit = 1; #include #include +#ifdef CONFIG_NUMA +#include +#endif + #define false 0 #define true 1 @@ -432,6 +436,13 @@ static void *ap_main_loop(void *_env) current_env = env; env->thread_id = kvm_get_thread_id(); + +#ifdef CONFIG_NUMA + if (numnumanodes > 0 && numa_available() != -1) + numa_run_on_node (hostnodes[env->cpu_index % numnumanodes] % + (numa_max_node() + 1)); +#endif + sigfillset(&signals); sigprocmask(SIG_BLOCK, &signals, NULL); kvm_create_vcpu(kvm_context, env->cpu_index); @@ -828,7 +839,7 @@ void kvm_cpu_unregister_physical_memory(target_phys_addr_t start_addr, kvm_unregister_memory_area(kvm_context, start_addr, size); } -int kvm_setup_guest_memory(void *area, unsigned long size) +int kvm_setup_guest_memory(void *area, unsigned long size, unsigned long ram) { int ret = 0; @@ -840,6 +851,18 @@ int kvm_setup_guest_memory(void *area, unsigned long size) if (ret) perror ("madvise"); +#ifdef CONFIG_NUMA + if (numnumanodes > 0 && numa_available() != -1) + { + unsigned long chunksize = ram / 1024 / 1024 / numnumanodes; + + int i; + for (i = 0; i < numnumanodes; ++i) + numa_tonode_memory ((char*)area + i * chunksize * 1024 * 1024, + chunksize * 1024 * 1024, hostnodes[i] % (numa_max_node() + 1)); + } +#endif + return ret; } diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h index 6da518a..53ee289 100644 --- a/qemu/qemu-kvm.h +++ b/qemu/qemu-kvm.h @@ -50,7 +50,7 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr, unsigned long size); void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size, int log); -int kvm_setup_guest_memory(void *area, unsigned long size); +int kvm_setup_guest_memory(void *area, unsigned long size, unsigned long ram); int kvm_qemu_create_memory_alias(uint64_t phys_start, uint64_t len, uint64_t target_phys); diff --git a/qemu/vl.c b/qemu/vl.c index 681b3de..ee09702 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -4677,7 +4677,7 @@ void *alloc_mem_area(size_t memory, unsigned long *len, const char *path) return area; } -void *qemu_alloc_physram(unsigned long memory) +void *qemu_alloc_physram(unsigned long memory, unsigned long ram) { void *area = NULL; unsigned long map_len = memory; @@ -4689,7 +4689,7 @@ void *qemu_alloc_physram(unsigned long memory) if (!area) area = qemu_vmalloc(memory); #ifdef USE_KVM - if (kvm_setup_guest_memory(area, map_len)) + if (kvm_setup_guest_memory(area, map_len, ram)) area = NULL; #endif return area; @@ -5671,7 +5671,7 @@ int main(int argc, char **argv) } } - phys_ram_base = qemu_alloc_physram(phys_ram_size); + phys_ram_base = qemu_alloc_physram(phys_ram_size, ram_size); if (!phys_ram_base) { fprintf(stderr, "Could not allocate physical memory\n"); exit(1); --------------090805000104050906000102--