From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andre Przywara Subject: [PATCH 1/3] v2: KVM-userspace: introduce -numa command line option Date: Fri, 5 Dec 2008 14:30:46 +0100 Message-ID: <49392D06.300@amd.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------040803020507010100090407" Cc: kvm@vger.kernel.org, "Daniel P. Berrange" To: Avi Kivity Return-path: Received: from outbound-dub.frontbridge.com ([213.199.154.16]:23133 "EHLO IE1EHSOBE006.bigfish.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750922AbYLENaV (ORCPT ); Fri, 5 Dec 2008 08:30:21 -0500 Sender: kvm-owner@vger.kernel.org List-ID: --------------040803020507010100090407 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit The attached patch parses the command line options given at -numa and passes it on to lower levels (namely qemu-kvm.c) Signed-off-by: Andre Przywara -- Andre Przywara AMD-Operating System Research Center (OSRC), Dresden, Germany Tel: +49 351 277-84917 ----to satisfy European Law for business letters: AMD Saxony Limited Liability Company & Co. KG, Wilschdorfer Landstr. 101, 01109 Dresden, Germany Register Court Dresden: HRA 4896, General Partner authorized to represent: AMD Saxony LLC (Wilmington, Delaware, US) General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy --------------040803020507010100090407 Content-Type: text/x-patch; name="kvmnuma_cmdline.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="kvmnuma_cmdline.patch" commit 7f9c2380d2d4e79da30dcc219cc4274f3dd36183 Author: Andre Przywara Date: Fri Dec 5 13:36:43 2008 +0100 introduce -numa comand line option diff --git a/qemu/sysemu.h b/qemu/sysemu.h index 5abda5c..40868fc 100644 --- a/qemu/sysemu.h +++ b/qemu/sysemu.h @@ -99,6 +99,13 @@ extern int win2k_install_hack; extern int alt_grab; extern int usb_enabled; extern int smp_cpus; + +#define MAX_NODES 64 +extern int numnumanodes; +extern uint64_t hostnodes[MAX_NODES]; +extern uint64_t node_mem[MAX_NODES]; +extern uint64_t node_to_cpus[MAX_NODES]; + extern int cursor_hide; extern int graphic_rotate; extern int no_quit; diff --git a/qemu/vl.c b/qemu/vl.c index 0eea3b3..27a7cc4 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -228,6 +228,10 @@ int usb_enabled = 0; const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE]; int assigned_devices_index; int smp_cpus = 1; +int numnumanodes = 0; +uint64_t hostnodes[MAX_NODES]; +uint64_t node_mem[MAX_NODES]; +uint64_t node_to_cpus[MAX_NODES]; const char *vnc_display; int acpi_enabled = 1; int fd_bootchk = 1; @@ -4099,6 +4103,10 @@ static void help(int exitcode) "-kvm-shadow-memory megs set the amount of shadow pages to be allocated\n" "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n" "-option-rom rom load a file, rom, into the option ROM space\n" + "-numa nrnodes[,mem:size1[;size2..]][,cpu:cpu1[;cpu2..]][,pin:node1[;node2]]\n" + " create a multi NUMA node guest and optionally pin it to\n" + " to the given host nodes. If mem and cpu are omitted,\n" + " resources are split equally\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" #endif @@ -4196,6 +4204,7 @@ enum { QEMU_OPTION_usb, QEMU_OPTION_usbdevice, QEMU_OPTION_smp, + QEMU_OPTION_numa, QEMU_OPTION_vnc, QEMU_OPTION_no_acpi, QEMU_OPTION_curses, @@ -4323,6 +4332,7 @@ static const QEMUOption qemu_options[] = { { "win2k-hack", 0, QEMU_OPTION_win2k_hack }, { "usbdevice", HAS_ARG, QEMU_OPTION_usbdevice }, { "smp", HAS_ARG, QEMU_OPTION_smp }, + { "numa", HAS_ARG, QEMU_OPTION_numa}, { "vnc", HAS_ARG, QEMU_OPTION_vnc }, #ifdef CONFIG_CURSES { "curses", 0, QEMU_OPTION_curses }, @@ -4714,6 +4724,79 @@ static void termsig_setup(void) #endif +#define PARSE_FLAG_BITMASK 1 +#define PARSE_FLAG_SUFFIX 2 + +static int parse_to_array (const char *arg, uint64_t *array, + char delim, int maxentries, int flags) +{ +const char *s; +char *end; +int num; +unsigned long long int val,endval; + + num=0; + for (s=arg; s!=NULL; s = strchr (end, delim)) + { + if (*s == delim) s++; + val = strtoull (s, &end, 10); + if (end == s) {num++; continue;} + if (num >= maxentries) break; + switch (*end) + { + case 'g': + case 'G': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + case 'm': + case 'M': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + case 'k': + case 'K': + if (flags & PARSE_FLAG_SUFFIX) val *= 1024; + break; + case '-': + if (!(flags & PARSE_FLAG_BITMASK)) break; + s = end + 1; + endval = strtoull (s, &end, 10); + val = (1 << (endval + 1)) - (1 << val); + break; + default: + if (flags & PARSE_FLAG_BITMASK) val = 1 << val; + break; + } + array[num++] = val; + } + return num; +} + +static int parse_numa_args (const char *opt, uint64_t *hostnodes, + uint64_t *mems, uint64_t *cpus, int maxentries) +{ +const char *s; +char *arg, *val, *end; +int num; + + arg = strdup(opt); + s = strtok(arg, ","); + if (s == NULL) {free (arg); return -1;} + num = strtol (s, &end, 10); + if (s == end) {free (arg); return -1;} + while ((s=strtok(NULL, ","))!=NULL) { + if ((val = strchr (s, ':'))) { + *val++ = 0; + if (!strcmp (s, "mem")) { + parse_to_array (val, mems, ';', maxentries, PARSE_FLAG_SUFFIX); + } else if (!strcmp (s, "cpu")) { + parse_to_array (val, cpus, ';', maxentries, PARSE_FLAG_BITMASK); + } else if (!strcmp (s, "pin")) { + parse_to_array (val, hostnodes, ';', maxentries, 0); + } + } + } + free (arg); + return num; +} + int main(int argc, char **argv) { #ifdef CONFIG_GDBSTUB @@ -4812,6 +4895,12 @@ int main(int argc, char **argv) parallel_devices[i] = NULL; parallel_device_index = 0; + for(i = 0; i < MAX_NODES; i++) { + hostnodes[i] = (uint64_t)-1; + node_to_cpus[i] = 0; + node_mem[i] = 0; + } + usb_devices_index = 0; assigned_devices_index = 0; @@ -5293,6 +5382,14 @@ int main(int argc, char **argv) exit(1); } break; + case QEMU_OPTION_numa: + numnumanodes = parse_numa_args (optarg, + hostnodes, node_mem, node_to_cpus, MAX_NODES); + if (numnumanodes < 0) { + fprintf(stderr, "Invalid number of NUMA nodes\n"); + exit(1); + } + break; case QEMU_OPTION_vnc: vnc_display = optarg; break; @@ -5445,6 +5542,24 @@ int main(int argc, char **argv) monitor_device = "stdio"; } + if (numnumanodes > 0) { + int i; + + if (numnumanodes > smp_cpus) + numnumanodes = smp_cpus; + + for (i = 0; i < numnumanodes; i++) if (node_mem[i] != 0) break; + if (i == numnumanodes) { + for (i = 0; i < numnumanodes; i++) + node_mem[i] = (ram_size / numnumanodes) & ~((1 << 20UL) - 1); + } + for (i = 0; i < numnumanodes; i++) if (node_to_cpus[i] != 0) break; + if (i == numnumanodes) { + for (i = 0; i < smp_cpus; i++) + node_to_cpus[i % numnumanodes] |= 1<