All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
@ 2007-08-13 10:02 Andre Przywara
  2007-08-13 10:30 ` Keir Fraser
  0 siblings, 1 reply; 10+ messages in thread
From: Andre Przywara @ 2007-08-13 10:02 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 55 bytes --]

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

[-- Attachment #2: numa_hvm_guest2.patch --]
[-- Type: text/plain, Size: 7813 bytes --]

# HG changeset patch
# User andre.przywara@amd.com
# Date 1186492260 -7200
# Node ID e730c1207604414f6f2779cc6adb213e3c1362eb
# Parent  0534ec5aa830c665ac95bc0750a22cd6c5413733
made HVM memory allocation CPU aware

diff -r 0534ec5aa830 -r e730c1207604 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/ioemu/vl.c	Tue Aug 07 15:11:00 2007 +0200
@@ -6847,7 +6847,7 @@ int set_mm_mapping(int xc_handle, uint32
     }
 
     err = xc_domain_memory_populate_physmap(xc_handle, domid, nr_pages, 0,
-                                            address_bits, extent_start);
+                            address_bits, XENMEM_DEFAULT_CPU, extent_start);
     if (err) {
         fprintf(stderr, "Failed to populate physmap\n");
         return -1;
diff -r 0534ec5aa830 -r e730c1207604 tools/libxc/xc_dom_x86.c
--- a/tools/libxc/xc_dom_x86.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/libxc/xc_dom_x86.c	Tue Aug 07 15:11:00 2007 +0200
@@ -711,7 +711,7 @@ int arch_setup_meminit(struct xc_dom_ima
     /* allocate guest memory */
     rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
                                            dom->total_pages, 0, 0,
-                                           dom->p2m_host);
+                                           XENMEM_DEFAULT_CPU, dom->p2m_host);
     return rc;
 }
 
diff -r 0534ec5aa830 -r e730c1207604 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/libxc/xc_domain.c	Tue Aug 07 15:11:00 2007 +0200
@@ -506,6 +506,7 @@ int xc_domain_memory_populate_physmap(in
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
+                                          unsigned int cpu,
                                           xen_pfn_t *extent_start)
 {
     int err;
@@ -513,7 +514,8 @@ int xc_domain_memory_populate_physmap(in
         .nr_extents   = nr_extents,
         .extent_order = extent_order,
         .address_bits = address_bits,
-        .domid        = domid
+        .domid        = domid,
+        .cpu          = cpu
     };
     set_xen_guest_handle(reservation.extent_start, extent_start);
 
diff -r 0534ec5aa830 -r e730c1207604 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/libxc/xc_domain_restore.c	Tue Aug 07 15:11:00 2007 +0200
@@ -126,7 +126,7 @@ static int uncanonicalize_pagetable(int 
     /* Allocate the requisite number of mfns. */
     if ( nr_mfns &&
          (xc_domain_memory_populate_physmap(xc_handle, dom, nr_mfns, 0, 0,
-                                            p2m_batch) != 0) )
+                                     XENMEM_DEFAULT_CPU, p2m_batch) != 0) )
     { 
         ERROR("Failed to allocate memory for batch.!\n"); 
         errno = ENOMEM;
@@ -495,7 +495,7 @@ int xc_domain_restore(int xc_handle, int
         /* Now allocate a bunch of mfns for this batch */
         if ( nr_mfns &&
              (xc_domain_memory_populate_physmap(xc_handle, dom, nr_mfns, 0,
-                                                0, p2m_batch) != 0) )
+                             0, XENMEM_DEFAULT_CPU, p2m_batch) != 0) )
         { 
             ERROR("Failed to allocate memory for batch.!\n"); 
             errno = ENOMEM;
diff -r 0534ec5aa830 -r e730c1207604 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/libxc/xc_hvm_build.c	Tue Aug 07 15:11:00 2007 +0200
@@ -211,10 +211,11 @@ static int setup_guest(int xc_handle,
 
     /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
     rc = xc_domain_memory_populate_physmap(
-        xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
+        xc_handle, dom, 0xa0, 0, 0, XENMEM_DEFAULT_CPU, &page_array[0x00]);
     if ( rc == 0 )
         rc = xc_domain_memory_populate_physmap(
-            xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
+            xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
+            &page_array[0xc0]);
     if ( rc != 0 )
     {
         PERROR("Could not allocate memory for HVM guest.\n");
diff -r 0534ec5aa830 -r e730c1207604 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Tue Aug 07 14:28:18 2007 +0200
+++ b/tools/libxc/xenctrl.h	Tue Aug 07 15:11:00 2007 +0200
@@ -526,6 +526,7 @@ int xc_domain_memory_populate_physmap(in
                                       unsigned long nr_extents,
                                       unsigned int extent_order,
                                       unsigned int address_bits,
+                                      unsigned int cpu,
                                       xen_pfn_t *extent_start);
 
 int xc_domain_ioport_permission(int xc_handle,
diff -r 0534ec5aa830 -r e730c1207604 xen/common/memory.c
--- a/xen/common/memory.c	Tue Aug 07 14:28:18 2007 +0200
+++ b/xen/common/memory.c	Tue Aug 07 15:11:00 2007 +0200
@@ -30,6 +30,7 @@ struct memop_args {
     unsigned int nr_extents;   /* Number of extents to allocate or free. */
     unsigned int extent_order; /* Size of each extent. */
     unsigned int memflags;     /* Allocation flags. */
+    unsigned int cpu;          /* CPU (NUMA node) to take the mem from */
 
     /* INPUT/OUTPUT */
     unsigned int nr_done;    /* Number of extents processed so far. */
@@ -48,7 +49,7 @@ static void increase_reservation(struct 
     unsigned long i;
     xen_pfn_t mfn;
     struct domain *d = a->domain;
-    unsigned int cpu = select_local_cpu(d);
+    unsigned int cpu;
 
     if ( !guest_handle_is_null(a->extent_list) &&
          !guest_handle_okay(a->extent_list, a->nr_extents) )
@@ -57,6 +58,9 @@ static void increase_reservation(struct 
     if ( (a->extent_order != 0) &&
          !multipage_allocation_permitted(current->domain) )
         return;
+
+    if ( a->cpu == XENMEM_DEFAULT_CPU ) cpu = select_local_cpu(d);
+        else cpu = a->cpu;
 
     for ( i = a->nr_done; i < a->nr_extents; i++ )
     {
@@ -95,7 +99,7 @@ static void populate_physmap(struct memo
     unsigned long i, j;
     xen_pfn_t gpfn, mfn;
     struct domain *d = a->domain;
-    unsigned int cpu = select_local_cpu(d);
+    unsigned int cpu;
 
     if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
         return;
@@ -103,6 +107,9 @@ static void populate_physmap(struct memo
     if ( (a->extent_order != 0) &&
          !multipage_allocation_permitted(current->domain) )
         return;
+
+    if ( a->cpu == XENMEM_DEFAULT_CPU ) cpu = select_local_cpu(d);
+        else cpu = a->cpu;
 
     for ( i = a->nr_done; i < a->nr_extents; i++ )
     {
@@ -518,6 +525,7 @@ long do_memory_op(unsigned long cmd, XEN
         args.extent_list  = reservation.extent_start;
         args.nr_extents   = reservation.nr_extents;
         args.extent_order = reservation.extent_order;
+        args.cpu          = reservation.cpu;
         args.nr_done      = start_extent;
         args.preempted    = 0;
         args.memflags     = 0;
diff -r 0534ec5aa830 -r e730c1207604 xen/include/public/memory.h
--- a/xen/include/public/memory.h	Tue Aug 07 14:28:18 2007 +0200
+++ b/xen/include/public/memory.h	Tue Aug 07 15:11:00 2007 +0200
@@ -35,6 +35,7 @@
 #define XENMEM_increase_reservation 0
 #define XENMEM_decrease_reservation 1
 #define XENMEM_populate_physmap     6
+#define XENMEM_DEFAULT_CPU ((unsigned int)-1)
 struct xen_memory_reservation {
 
     /*
@@ -66,6 +67,7 @@ struct xen_memory_reservation {
      * Unprivileged domains can specify only DOMID_SELF.
      */
     domid_t        domid;
+    unsigned int   cpu;
 };
 typedef struct xen_memory_reservation xen_memory_reservation_t;
 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 10:02 [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call Andre Przywara
@ 2007-08-13 10:30 ` Keir Fraser
  2007-08-13 12:59   ` Christoph Egger
  0 siblings, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2007-08-13 10:30 UTC (permalink / raw)
  To: Andre Przywara, xen-devel

On 13/8/07 11:02, "Andre Przywara" <andre.przywara@amd.com> wrote:

> @@ -35,6 +35,7 @@
>  #define XENMEM_increase_reservation 0
>  #define XENMEM_decrease_reservation 1
>  #define XENMEM_populate_physmap     6
> +#define XENMEM_DEFAULT_CPU ((unsigned int)-1)
>  struct xen_memory_reservation {
>  
>      /*
> @@ -66,6 +67,7 @@ struct xen_memory_reservation {
>       * Unprivileged domains can specify only DOMID_SELF.
>       */
>      domid_t        domid;
> +    unsigned int   cpu;
>  };

We cannot change the size of existing hypercall structures. In this case we
could steal bits from address_bits field and create a pair of 16-bit fields
from it. Also, a physical cpu id is not a great fit for this hypercall -- it
is meaningless to most guests who do not see the physical cpu map. Better to
pass a vcpu_id and let Xen work out the most appropriate physical cpu id
based on the vcpu's affinity. Or have a concept of per-guest 'virtual node
identifiers' and pass a 'uint16_t vnodeid'. The latter might actually be a
nice abstraction -- it'd be good to know other people's thoughts on this?

 -- Keir

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 10:30 ` Keir Fraser
@ 2007-08-13 12:59   ` Christoph Egger
  2007-08-13 14:00     ` Isaku Yamahata
  2007-08-13 14:06     ` Keir Fraser
  0 siblings, 2 replies; 10+ messages in thread
From: Christoph Egger @ 2007-08-13 12:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Andre Przywara, Keir Fraser

On Monday 13 August 2007 12:30:15 Keir Fraser wrote:
> On 13/8/07 11:02, "Andre Przywara" <andre.przywara@amd.com> wrote:
> > @@ -35,6 +35,7 @@
> >  #define XENMEM_increase_reservation 0
> >  #define XENMEM_decrease_reservation 1
> >  #define XENMEM_populate_physmap     6
> > +#define XENMEM_DEFAULT_CPU ((unsigned int)-1)
> >  struct xen_memory_reservation {
> >
> >      /*
> > @@ -66,6 +67,7 @@ struct xen_memory_reservation {
> >       * Unprivileged domains can specify only DOMID_SELF.
> >       */
> >      domid_t        domid;
> > +    unsigned int   cpu;
> >  };
>
> We cannot change the size of existing hypercall structures.

Except Xen bumps major version number to 4 ? :-)

You are worrying about PV guests that lag behind with syncing
pulic headers such as NetBSD/Xen ?

> In this case we  could steal bits from address_bits field and create a pair
> of 16-bit fields from it. Also, a physical cpu id is not a great fit for
> this hypercall --  it is meaningless to most guests who do not see the
> physical cpu map. 
> Better to pass a vcpu_id and let Xen work out the most appropriate physical
> cpu id based on the vcpu's affinity. Or have a concept of per-guest
> 'virtual node identifiers' and pass a 'uint16_t vnodeid'. The latter might
> actually be a nice abstraction -- it'd be good to know other people's
> thoughts on this?

Making struct xen_machphys_mapping NUMA-aware is also a no-go, right?
It would additionally need a min_mfn and a vnodeid member.

Oh, and how should the guest query how many vnode's exist?


-- 
AMD Saxony, Dresden, Germany
Operating System Research Center

Legal Information:
AMD Saxony Limited Liability Company & Co. KG
Sitz (Geschäftsanschrift):
   Wilschdorfer Landstr. 101, 01109 Dresden, Deutschland
Registergericht Dresden: HRA 4896
vertretungsberechtigter Komplementär:
   AMD Saxony LLC (Sitz Wilmington, Delaware, USA)
Geschäftsführer der AMD Saxony LLC:
   Dr. Hans-R. Deppe, Thomas McCoy

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 12:59   ` Christoph Egger
@ 2007-08-13 14:00     ` Isaku Yamahata
  2007-08-13 14:06     ` Keir Fraser
  1 sibling, 0 replies; 10+ messages in thread
From: Isaku Yamahata @ 2007-08-13 14:00 UTC (permalink / raw)
  To: Christoph Egger; +Cc: Andre Przywara, xen-devel, Keir Fraser

On Mon, Aug 13, 2007 at 02:59:31PM +0200, Christoph Egger wrote:
> > In this case we  could steal bits from address_bits field and create a pair
> > of 16-bit fields from it. Also, a physical cpu id is not a great fit for
> > this hypercall --  it is meaningless to most guests who do not see the
> > physical cpu map. 
> > Better to pass a vcpu_id and let Xen work out the most appropriate physical
> > cpu id based on the vcpu's affinity. Or have a concept of per-guest
> > 'virtual node identifiers' and pass a 'uint16_t vnodeid'. The latter might
> > actually be a nice abstraction -- it'd be good to know other people's
> > thoughts on this?
> 
> Making struct xen_machphys_mapping NUMA-aware is also a no-go, right?
> It would additionally need a min_mfn and a vnodeid member.
> 
> Oh, and how should the guest query how many vnode's exist?

Domain save/restore/dump-core also want to know those infomations.
Probably One approach is to introduce hypercalls or to store those
in xenstore. Another approach would be to introduce magic pages like
start_info, and embed it as reserved pages.
-- 
yamahata

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 12:59   ` Christoph Egger
  2007-08-13 14:00     ` Isaku Yamahata
@ 2007-08-13 14:06     ` Keir Fraser
  2007-08-13 20:49       ` Ryan Harper
  2007-08-15 10:13       ` Andre Przywara
  1 sibling, 2 replies; 10+ messages in thread
From: Keir Fraser @ 2007-08-13 14:06 UTC (permalink / raw)
  To: Christoph Egger, xen-devel; +Cc: Andre Przywara

On 13/8/07 13:59, "Christoph Egger" <Christoph.Egger@amd.com> wrote:

>> We cannot change the size of existing hypercall structures.
> 
> Except Xen bumps major version number to 4 ? :-)
> 
> You are worrying about PV guests that lag behind with syncing
> pulic headers such as NetBSD/Xen ?

It's not merely an API issue, it's an ABI compatibility issue. Existing
guests will provide structures that are too small (and thus have trailing
garbage, or potentially even cross over into an unmapped page causing
copy_from_guest() to fail). Also this particular structure is included
inside others (like struct xen_memory_exchange) and will change all the
field offsets. Not good.

> Making struct xen_machphys_mapping NUMA-aware is also a no-go, right?
> It would additionally need a min_mfn and a vnodeid member.

Actually I think it can stay as is. Guests are supposed to be robust against
unmapped holes in the m2p table. So we can continue to have one big virtual
address range covering all valid MFNs. This is only going to fail if virtual
address space is scarce compared with machine address space (e.g., we kind
of run up against this in a mild way with x86 PAE).

> Oh, and how should the guest query how many vnode's exist?

I think we should add topology discovery hypercalls. Xen needs to know this
stuff anyway, so we just provide a mechanism for guests to extract it. An
alternative is to start exporting virtual ACPI tables to PV guests.

 -- Keir

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 14:06     ` Keir Fraser
@ 2007-08-13 20:49       ` Ryan Harper
  2007-08-15 10:12         ` Andre Przywara
  2007-08-15 10:13       ` Andre Przywara
  1 sibling, 1 reply; 10+ messages in thread
From: Ryan Harper @ 2007-08-13 20:49 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Andre Przywara, Christoph Egger, xen-devel

* Keir Fraser <keir@xensource.com> [2007-08-13 09:08]:
> On 13/8/07 13:59, "Christoph Egger" <Christoph.Egger@amd.com> wrote:
> 
> >> We cannot change the size of existing hypercall structures.
> > 
> > Except Xen bumps major version number to 4 ? :-)
> > 
> > You are worrying about PV guests that lag behind with syncing
> > pulic headers such as NetBSD/Xen ?
> 
> It's not merely an API issue, it's an ABI compatibility issue. Existing
> guests will provide structures that are too small (and thus have trailing
> garbage, or potentially even cross over into an unmapped page causing
> copy_from_guest() to fail). Also this particular structure is included
> inside others (like struct xen_memory_exchange) and will change all the
> field offsets. Not good.
> 
> > Making struct xen_machphys_mapping NUMA-aware is also a no-go, right?
> > It would additionally need a min_mfn and a vnodeid member.
> 
> Actually I think it can stay as is. Guests are supposed to be robust against
> unmapped holes in the m2p table. So we can continue to have one big virtual
> address range covering all valid MFNs. This is only going to fail if virtual
> address space is scarce compared with machine address space (e.g., we kind
> of run up against this in a mild way with x86 PAE).
> 
> > Oh, and how should the guest query how many vnode's exist?
> 
> I think we should add topology discovery hypercalls. Xen needs to know this
> stuff anyway, so we just provide a mechanism for guests to extract it. An
> alternative is to start exporting virtual ACPI tables to PV guests.

One concern has been the static nature of the ACPI SRAT data versus the
dynamic ability of the vcpu to cpu mapping.  If the scheduler is
migrating the guest vcpu to various cpus, then the SRAT information is
likely to be incorrect.

That said, if one creates a vnode, and it sufficiently restricts the
vcpu affinity, then accurate SRAT information can be exported for the
guest to utilize.


-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 20:49       ` Ryan Harper
@ 2007-08-15 10:12         ` Andre Przywara
  2007-08-15 11:18           ` Andi Kleen
  0 siblings, 1 reply; 10+ messages in thread
From: Andre Przywara @ 2007-08-15 10:12 UTC (permalink / raw)
  To: Ryan Harper; +Cc: Christoph Egger, xen-devel, Keir Fraser

Ryan Harper wrote:
> One concern has been the static nature of the ACPI SRAT data versus the
> dynamic ability of the vcpu to cpu mapping.  If the scheduler is
> migrating the guest vcpu to various cpus, then the SRAT information is
> likely to be incorrect.
I think this is a problem even for the native OSes when you think of 
CPU- and/or memory-hotplugging. Although Linux can do CPU hotplugging, 
AFAIK NUMA isn't currently considered in this process. I think the most 
feasible approach would be to rebuild all affected structures when the 
hotplug event occurs. This will probably considered quite rare and thus 
could be potentially more costly, so I this is not something you want to 
do every time Xen decides to reschedule a VCPU. So IMHO pinning VCPUs to 
a certain node (actually all cores within this node) is OK for now.
> 
> That said, if one creates a vnode, and it sufficiently restricts the
> vcpu affinity, then accurate SRAT information can be exported for the
> guest to utilize.
My patch does this automatically. CPU affinity information from the 
config file is ignored and each VCPUs affinity is set to match the NUMA 
topology.

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG
Sitz (Geschäftsanschrift): Wilschdorfer Landstr. 101, 01109 Dresden, 
Deutschland
Registergericht Dresden: HRA 4896
vertretungsberechtigter Komplementär: AMD Saxony LLC (Sitz Wilmington, 
Delaware, USA)
Geschäftsführer der AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-13 14:06     ` Keir Fraser
  2007-08-13 20:49       ` Ryan Harper
@ 2007-08-15 10:13       ` Andre Przywara
  2007-08-15 10:43         ` Keir Fraser
  1 sibling, 1 reply; 10+ messages in thread
From: Andre Przywara @ 2007-08-15 10:13 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

Keir Fraser wrote:
> I think we should add topology discovery hypercalls. Xen needs to know this
> stuff anyway, so we just provide a mechanism for guests to extract it. An
> alternative is to start exporting virtual ACPI tables to PV guests.
I will look at this next. The HVM approach seemed to be easier from my 
POV, but this NUMA propagation is also benefical for PV guests. Maybe 
one should solve the whole NUMA-ballooning issue while looking at this.

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG
Sitz (Geschäftsanschrift): Wilschdorfer Landstr. 101, 01109 Dresden, 
Deutschland
Registergericht Dresden: HRA 4896
vertretungsberechtigter Komplementär: AMD Saxony LLC (Sitz Wilmington, 
Delaware, USA)
Geschäftsführer der AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-15 10:13       ` Andre Przywara
@ 2007-08-15 10:43         ` Keir Fraser
  0 siblings, 0 replies; 10+ messages in thread
From: Keir Fraser @ 2007-08-15 10:43 UTC (permalink / raw)
  To: Andre Przywara; +Cc: xen-devel




On 15/8/07 11:13, "Andre Przywara" <andre.przywara@amd.com> wrote:

>> I think we should add topology discovery hypercalls. Xen needs to know this
>> stuff anyway, so we just provide a mechanism for guests to extract it. An
>> alternative is to start exporting virtual ACPI tables to PV guests.
> I will look at this next. The HVM approach seemed to be easier from my
> POV, but this NUMA propagation is also benefical for PV guests. Maybe
> one should solve the whole NUMA-ballooning issue while looking at this.

Topology discovery hypercalls are the way to go imo. And to fix ballooking
it's just going to have to become numa-aware, by hooking into whatever numa
apis Linux provides internally.

 -- Keir

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call
  2007-08-15 10:12         ` Andre Przywara
@ 2007-08-15 11:18           ` Andi Kleen
  0 siblings, 0 replies; 10+ messages in thread
From: Andi Kleen @ 2007-08-15 11:18 UTC (permalink / raw)
  To: Andre Przywara; +Cc: Christoph Egger, Ryan Harper, Keir Fraser, xen-devel

"Andre Przywara" <andre.przywara@amd.com> writes:

> Ryan Harper wrote:
> > One concern has been the static nature of the ACPI SRAT data versus the
> > dynamic ability of the vcpu to cpu mapping.  If the scheduler is
> > migrating the guest vcpu to various cpus, then the SRAT information is
> > likely to be incorrect.
> I think this is a problem even for the native OSes when you think of
> CPU- and/or memory-hotplugging. Although Linux can do CPU hotplugging,
> AFAIK NUMA isn't currently considered in this process. 

IA64 (and I think PPC) Linux support node hotplug. Node hot unplug
is currently missing because the memory hotunplug support is not finished
yet. There is no interface to notify NUMA aware user space of topology
changes though.

x86 Linux currently doesn't but will assign new CPUs to existing
nodes as reported in SRAT.

> I think the
> most feasible approach would be to rebuild all affected structures
> when the hotplug event occurs. This will probably considered quite
> rare and thus could be potentially more costly, so I this is not
> something you want to do every time Xen decides to reschedule a
> VCPU. 

In the current Linux implementation just report all nodes at boot up
(even if they have little or no memory) and then you can add/remove CPUs to 
them as needed. 

When you migrate to another box with more nodes that likely won't work,
but that could be probably made configurable.

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-08-15 11:18 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-13 10:02 [PATCH 2/4] [HVM] introduce CPU affinity for allocate_physmap call Andre Przywara
2007-08-13 10:30 ` Keir Fraser
2007-08-13 12:59   ` Christoph Egger
2007-08-13 14:00     ` Isaku Yamahata
2007-08-13 14:06     ` Keir Fraser
2007-08-13 20:49       ` Ryan Harper
2007-08-15 10:12         ` Andre Przywara
2007-08-15 11:18           ` Andi Kleen
2007-08-15 10:13       ` Andre Przywara
2007-08-15 10:43         ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.