commit 0bc93b19ba3132140d5b34746523d0c7c8169093 Author: Andre Przywara Date: Fri Dec 5 14:05:44 2008 +0100 allocate guest resources from different host NUMA nodes diff --git a/qemu/Makefile.target b/qemu/Makefile.target index 05ace8e..690903e 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -698,6 +698,10 @@ LIBS += -lkvm DEPLIBS += ../libkvm/libkvm.a endif +ifdef CONFIG_NUMA +LIBS += -lnuma +endif + ifdef CONFIG_VNC_TLS CPPFLAGS += $(CONFIG_VNC_TLS_CFLAGS) LIBS += $(CONFIG_VNC_TLS_LIBS) diff --git a/qemu/configure b/qemu/configure index 63a85d6..3e2c9f9 100755 --- a/qemu/configure +++ b/qemu/configure @@ -121,6 +121,7 @@ bluez="yes" kvm="yes" kvm_cap_pit="no" kvm_cap_device_assignment="no" +getcpu="no" kerneldir="" aix="no" blobs="yes" @@ -391,6 +392,8 @@ for opt do ;; --enable-mixemu) mixemu="yes" ;; + --disable-numa) numa="no" + ;; --disable-aio) aio="no" ;; --disable-blobs) blobs="no" @@ -489,6 +492,7 @@ echo " Available drivers: $audio_possible_drivers" echo " --audio-card-list=LIST set list of additional emulated audio cards" echo " Available cards: ac97 adlib cs4231a gus" echo " --enable-mixemu enable mixer emulation" +echo " --disable-numa disable NUMA support (host side)" echo " --disable-brlapi disable BrlAPI" echo " --disable-vnc-tls disable TLS encryption for VNC server" echo " --disable-curses disable curses output" @@ -985,6 +989,29 @@ for drv in $audio_drv_list; do done ########################################## +# libnuma probe + +if test -z "$numa" ; then + numa=no + + cat > $TMPC << EOF +#include +int main(void) { return numa_available(); } +EOF + if $cc ${ARCH_CFLAGS} -o $TMPE ${OS_CFLAGS} $TMPC -lnuma 2> /dev/null ; then + numa=yes + fi +fi + +cat > $TMPC << EOF +#include +int main(void) { return sched_getcpu(); } +EOF +if $cc ${ARCH_CFLAGS} -o $TMPE ${OS_CFLAGS} $TMPC 2> /dev/null ; then + getcpu=yes +fi + +########################################## # BrlAPI probe if test -z "$brlapi" ; then @@ -1181,6 +1208,7 @@ echo "mingw32 support $mingw32" echo "Audio drivers $audio_drv_list" echo "Extra audio cards $audio_card_list" echo "Mixer emulation $mixemu" +echo "NUMA support $numa" echo "VNC TLS support $vnc_tls" if test "$vnc_tls" = "yes" ; then echo " TLS CFLAGS $vnc_tls_cflags" @@ -1415,6 +1443,13 @@ if test "$mixemu" = "yes" ; then echo "CONFIG_MIXEMU=yes" >> $config_mak echo "#define CONFIG_MIXEMU 1" >> $config_h fi +if test "$numa" = "yes" ; then + echo "CONFIG_NUMA=yes" >> $config_mak + echo "#define CONFIG_NUMA 1" >> $config_h +fi +if test "$getcpu" = "yes" ; then + echo "#define HAVE_GETCPU 1" >> $config_h +fi if test "$vnc_tls" = "yes" ; then echo "CONFIG_VNC_TLS=yes" >> $config_mak echo "CONFIG_VNC_TLS_CFLAGS=$vnc_tls_cflags" >> $config_mak diff --git a/qemu/hw/fw_cfg.h b/qemu/hw/fw_cfg.h index ef8f378..b370e4e 100644 --- a/qemu/hw/fw_cfg.h +++ b/qemu/hw/fw_cfg.h @@ -8,6 +8,9 @@ #define FW_CFG_NOGRAPHIC 0x04 #define FW_CFG_NB_CPUS 0x05 #define FW_CFG_MACHINE_ID 0x06 +#define FW_CFG_NUMA_NODES 0x07 +#define FW_CFG_NUMA_NODE_CPUS 0x08 +#define FW_CFG_NUMA_NODE_MEM 0x09 #define FW_CFG_MAX_ENTRY 0x10 #define FW_CFG_WRITE_CHANNEL 0x4000 diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 6de460c..b723125 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -439,6 +439,12 @@ static void bochs_bios_init(void) fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_NUMA_NODES, numnumanodes); + + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA_NODE_MEM, (uint8_t*)node_mem, + sizeof(node_mem[0]) * numnumanodes); + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA_NODE_CPUS, (uint8_t*)node_to_cpus, + sizeof(node_to_cpus[0]) * numnumanodes); } /* Generate an initial boot sector which sets state and jump to diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c index a7cfa24..63afe85 100644 --- a/qemu/qemu-kvm.c +++ b/qemu/qemu-kvm.c @@ -28,6 +28,10 @@ int kvm_pit = 1; #include #include +#ifdef CONFIG_NUMA +#include +#endif + #define false 0 #define true 1 @@ -424,6 +428,36 @@ static int kvm_main_loop_cpu(CPUState *env) return 0; } +#ifdef CONFIG_NUMA + +#ifndef HAVE_GETCPU +static int get_cur_node (void) +{ + return -1; +} +#else /* HAVE_GETCPU */ + +#define NUMA_MASK_SIZE 16 +static int get_cur_node (void) +{ +int cpunr, node, maskbits; +unsigned long maskbuf[NUMA_MASK_SIZE]; + + maskbits = sizeof(maskbuf[0]) * 8; + cpunr = sched_getcpu(); + if (cpunr >= maskbits * NUMA_MASK_SIZE) return 0; + for (node = 0; node <= numa_max_node(); node++) + { + numa_node_to_cpus (node, maskbuf, sizeof(maskbuf[0]) * NUMA_MASK_SIZE); + if (maskbuf[cpunr / maskbits] & (1 << (cpunr % maskbits))) + return node; + } + return 0; +} +#endif /* HAVE_GETCPU */ + +#endif /* CONFIG_NUMA */ + static void *ap_main_loop(void *_env) { CPUState *env = _env; @@ -432,6 +466,32 @@ static void *ap_main_loop(void *_env) current_env = env; env->thread_id = kvm_get_thread_id(); + +#ifdef CONFIG_NUMA + if (numnumanodes > 0 && numa_available() != -1) + { + int i; + for (i = 0; i < numnumanodes; i++) { + if (!(node_to_cpus[i] & (1 << env->cpu_index))) continue; + if (hostnodes[i] == (uint64_t)-1) { + int j; + unsigned long offset = 0; + + hostnodes[i] = get_cur_node(); + if (hostnodes[i] != (uint64_t)-1) { + for (j = 0; j < i; ++j) offset += node_mem[i]; + numa_tonode_memory (phys_ram_base + offset, + node_mem[i], hostnodes[i] % (numa_max_node() + 1)); + } + } + + if (hostnodes[i] != (uint64_t)-1) + numa_run_on_node (hostnodes[i] % (numa_max_node() + 1)); + break; + } + } +#endif + sigfillset(&signals); sigprocmask(SIG_BLOCK, &signals, NULL); kvm_create_vcpu(kvm_context, env->cpu_index); @@ -840,6 +900,21 @@ int kvm_setup_guest_memory(void *area, unsigned long size) if (ret) perror ("madvise"); +#ifdef CONFIG_NUMA + if (numnumanodes > 0 && numa_available() != -1) { + unsigned long offset = 0; + int i; + + for (i = 0; i < numnumanodes; ++i) { + if (hostnodes[i] != (uint64_t)-1) { + numa_tonode_memory ((char*)area + offset, + node_mem[i], hostnodes[i] % (numa_max_node() + 1)); + } + offset += node_mem[i]; + } + } +#endif + return ret; }