public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Andre Przywara <andre.przywara@amd.com>
To: <avi@redhat.com>, <anthony@codemonkey.ws>
Cc: <kvm@vger.kernel.org>, Andre Przywara <andre.przywara@amd.com>
Subject: [PATCH 4/4] NUMA: realize NUMA memory pinning
Date: Wed, 11 Aug 2010 15:52:18 +0200	[thread overview]
Message-ID: <1281534738-8310-5-git-send-email-andre.przywara@amd.com> (raw)
In-Reply-To: <1281534738-8310-1-git-send-email-andre.przywara@amd.com>

According to the user-provided assignment bind the respective part
of the guest's memory to the given host node. This uses Linux'
mbind syscall (which is wrapped only in libnuma) to realize the
pinning right after the allocation.
Failures are not fatal, but produce a warning.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
 hw/pc.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 1b24409..dbfc082 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -42,6 +42,15 @@
 #include "device-assignment.h"
 #include "kvm.h"
 
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#include <numaif.h>
+#ifndef MPOL_F_RELATIVE_NODES
+  #define MPOL_F_RELATIVE_NODES (1 << 14)
+  #define MPOL_F_STATIC_NODES (1 << 15)
+#endif
+#endif
+
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
 
@@ -882,6 +891,53 @@ void pc_cpus_init(const char *cpu_model)
     }
 }
 
+static void bind_numa(ram_addr_t ram_addr)
+{
+#ifdef CONFIG_NUMA
+    int i;
+    char* ram_ptr;
+    ram_addr_t len, ram_offset;
+    int bind_mode;
+
+    ram_ptr = qemu_get_ram_ptr(ram_addr);
+
+    ram_offset = 0;
+    for (i = 0; i < nb_numa_nodes; i++) {
+        len = numa_info[i].guest_mem;
+        if (numa_info[i].flags != 0) {
+            switch (numa_info[i].flags & NODE_HOST_POLICY_MASK) {
+            case NODE_HOST_BIND:
+                bind_mode = MPOL_BIND;
+                break;
+            case NODE_HOST_INTERLEAVE:
+                bind_mode = MPOL_INTERLEAVE;
+                break;
+            case NODE_HOST_PREFERRED:
+                bind_mode = MPOL_PREFERRED;
+                break;
+            default:
+                bind_mode = MPOL_DEFAULT;
+                break;
+            }
+            bind_mode |= (numa_info[i].flags & NODE_HOST_RELATIVE) ?
+                MPOL_F_RELATIVE_NODES : MPOL_F_STATIC_NODES;
+
+            /* This is a workaround for a long standing bug in Linux'
+             * mbind implementation, which cuts off the last specified
+             * node. To stay compatible should this bug be fixed, we
+             * specify one more node and zero this one out.
+             */
+            clear_bit(numa_num_configured_nodes() + 1, numa_info[i].host_mem);
+            if (mbind(ram_ptr + ram_offset, len, bind_mode,
+                numa_info[i].host_mem, numa_num_configured_nodes() + 1, 0))
+                    perror("mbind");
+        }
+        ram_offset += len;
+    }
+#endif
+    return;
+}
+
 void pc_memory_init(ram_addr_t ram_size,
                     const char *kernel_filename,
                     const char *kernel_cmdline,
@@ -919,6 +975,8 @@ void pc_memory_init(ram_addr_t ram_size,
     cpu_register_physical_memory(0x100000,
                  below_4g_mem_size - 0x100000,
                  ram_addr + 0x100000);
+    bind_numa(ram_addr);
+
 #if TARGET_PHYS_ADDR_BITS > 32
     cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
                                  ram_addr + below_4g_mem_size);
-- 
1.6.4



  parent reply	other threads:[~2010-08-11 13:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-11 13:52 [PATCH 0/4]: NUMA: add host binding Andre Przywara
2010-08-11 13:52 ` [PATCH 1/4] NUMA: change existing NUMA guest code to use new bitmap implementation Andre Przywara
2010-08-11 13:52 ` [PATCH 2/4] NUMA: add Linux libnuma detection Andre Przywara
2010-08-11 13:52 ` [PATCH 3/4] NUMA: parse new host dependent command line options Andre Przywara
2010-08-11 13:52 ` Andre Przywara [this message]
2010-08-23 18:59   ` [PATCH 4/4] NUMA: realize NUMA memory pinning Marcelo Tosatti
2010-08-23 19:27     ` Anthony Liguori
2010-08-23 21:16       ` Andre Przywara
2010-08-23 21:27         ` Anthony Liguori
2010-08-31 20:54           ` Andrew Theurer
2010-08-31 22:03             ` Anthony Liguori
2010-09-01  3:38               ` Andrew Theurer
2010-09-09 20:00               ` Andre Przywara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1281534738-8310-5-git-send-email-andre.przywara@amd.com \
    --to=andre.przywara@amd.com \
    --cc=anthony@codemonkey.ws \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox