All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
@ 2008-07-04  7:57 Andre Przywara
  2008-07-04  9:52 ` Keir Fraser
  0 siblings, 1 reply; 9+ messages in thread
From: Andre Przywara @ 2008-07-04  7:57 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 881 bytes --]

This patch extends the memops hypercall in a compatible way to transport
a desired NUMA node number. The address_bits field will be limited to 8
bits and is now embedded in the mem_flags member, which additionally
contains the node number (limited to 8 bit). Passing a node number of
'0' (currently the default) will revert to automatic node selection
(based on currently scheduled node).

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: 01_numa_guest.patch --]
[-- Type: text/plain, Size: 3535 bytes --]

# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1215082871 -7200
# Node ID e308bd4e9179493e3897143bf6e5841c14b4f357
# Parent  20215b87d0f3587ed5e928c31b1df2596b15ae79
made memops hypercall NUMA capable

diff -r 20215b87d0f3 -r e308bd4e9179 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Thu Jul 03 10:44:13 2008 +0100
+++ b/tools/libxc/xc_domain.c	Thu Jul 03 13:01:11 2008 +0200
@@ -445,7 +445,7 @@
     struct xen_memory_reservation reservation = {
         .nr_extents   = nr_extents,
         .extent_order = extent_order,
-        .address_bits = address_bits,
+        .mem_flags    = XENMEM_addr_bits(address_bits),
         .domid        = domid
     };
 
@@ -478,7 +478,7 @@
     struct xen_memory_reservation reservation = {
         .nr_extents   = nr_extents,
         .extent_order = extent_order,
-        .address_bits = 0,
+        .mem_flags    = 0,
         .domid        = domid
     };
 
@@ -517,7 +517,7 @@
     struct xen_memory_reservation reservation = {
         .nr_extents   = nr_extents,
         .extent_order = extent_order,
-        .address_bits = address_bits,
+        .mem_flags    = XENMEM_addr_bits(address_bits),
         .domid        = domid
     };
     set_xen_guest_handle(reservation.extent_start, extent_start);
diff -r 20215b87d0f3 -r e308bd4e9179 xen/common/memory.c
--- a/xen/common/memory.c	Thu Jul 03 10:44:13 2008 +0100
+++ b/xen/common/memory.c	Thu Jul 03 13:01:11 2008 +0200
@@ -344,8 +344,8 @@
     }
     d = current->domain;
 
-    memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
-        d, exch.out.address_bits ? : (BITS_PER_LONG+PAGE_SHIFT)));
+    memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d,
+       XENMEM_addr_bits(exch.out.mem_flags) ? : (BITS_PER_LONG+PAGE_SHIFT)));
     memflags |= MEMF_node(domain_to_node(d));
 
     for ( i = (exch.nr_exchanged >> in_chunk_order);
@@ -521,14 +521,16 @@
         args.preempted    = 0;
         args.memflags     = 0;
 
-        if ( (reservation.address_bits != 0) &&
-             (reservation.address_bits <
+        if ( (XENMEM_addr_bits(reservation.mem_flags) != 0) &&
+             (XENMEM_addr_bits(reservation.mem_flags) <
               (get_order_from_pages(max_page) + PAGE_SHIFT)) )
         {
-            if ( reservation.address_bits <= PAGE_SHIFT )
+            if ( XENMEM_addr_bits(reservation.mem_flags) <= PAGE_SHIFT )
                 return start_extent;
-            args.memflags = MEMF_bits(reservation.address_bits);
+            args.memflags = MEMF_bits(XENMEM_addr_bits(reservation.mem_flags));
         }
+
+        args.memflags |= MEMF_node(XENMEM_get_node(reservation.mem_flags));
 
         if ( likely(reservation.domid == DOMID_SELF) )
         {
diff -r 20215b87d0f3 -r e308bd4e9179 xen/include/public/memory.h
--- a/xen/include/public/memory.h	Thu Jul 03 10:44:13 2008 +0100
+++ b/xen/include/public/memory.h	Thu Jul 03 13:01:11 2008 +0200
@@ -35,6 +35,11 @@
 #define XENMEM_increase_reservation 0
 #define XENMEM_decrease_reservation 1
 #define XENMEM_populate_physmap     6
+
+#define XENMEM_addr_bits(f) ((f)&0xFF)
+#define XENMEM_get_node(f) (((((f)&0xFF00)>>8)-1)&0xFF)
+#define XENMEM_set_node(n) (((((n)&0xFF)+1)&0xFF)<<8)
+
 struct xen_memory_reservation {
 
     /*
@@ -59,7 +64,7 @@
      * zero then the user has no addressing restriction.
      * This field is not used by XENMEM_decrease_reservation.
      */
-    unsigned int   address_bits;
+    unsigned int   mem_flags;
 
     /*
      * Domain whose reservation is being changed.

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04  7:57 [PATCH 1/4] hvm: NUMA guest: extend memops hypercall Andre Przywara
@ 2008-07-04  9:52 ` Keir Fraser
  2008-07-04 11:14   ` Andre Przywara
  0 siblings, 1 reply; 9+ messages in thread
From: Keir Fraser @ 2008-07-04  9:52 UTC (permalink / raw)
  To: Andre Przywara, xen-devel

On 4/7/08 08:57, "Andre Przywara" <andre.przywara@amd.com> wrote:

> This patch extends the memops hypercall in a compatible way to transport
> a desired NUMA node number. The address_bits field will be limited to 8
> bits and is now embedded in the mem_flags member, which additionally
> contains the node number (limited to 8 bit). Passing a node number of
> '0' (currently the default) will revert to automatic node selection
> (based on currently scheduled node).
> 
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>

Should unprivileged domUs be allowed to specify the NUMA node they allocate
from, regardless of whether they even run there? Seems like a breakage of
guest isolation to me.

 -- Keir

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04  9:52 ` Keir Fraser
@ 2008-07-04 11:14   ` Andre Przywara
  2008-07-04 11:59     ` Keir Fraser
  0 siblings, 1 reply; 9+ messages in thread
From: Andre Przywara @ 2008-07-04 11:14 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1286 bytes --]

Keir Fraser wrote:
> On 4/7/08 08:57, "Andre Przywara" <andre.przywara@amd.com> wrote:
> 
>> This patch extends the memops hypercall in a compatible way to transport
>> a desired NUMA node number. The address_bits field will be limited to 8
>> bits and is now embedded in the mem_flags member, which additionally
>> contains the node number (limited to 8 bit). Passing a node number of
>> '0' (currently the default) will revert to automatic node selection
>> (based on currently scheduled node).
> 
> Should unprivileged domUs be allowed to specify the NUMA node they allocate
> from, regardless of whether they even run there? Seems like a breakage of
> guest isolation to me.
Good catch (I code in HVM land most of the time), I hope this small 
(attached) patch fixes this.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: 01a_numa_guest.patch --]
[-- Type: text/plain, Size: 535 bytes --]

diff -r aa69281c1ecf xen/common/memory.c
--- a/xen/common/memory.c	Thu Jul 03 13:20:35 2008 +0200
+++ b/xen/common/memory.c	Fri Jul 04 13:13:12 2008 +0200
@@ -528,7 +528,8 @@
             args.memflags = MEMF_bits(XENMEM_addr_bits(reservation.mem_flags));
         }
 
-        args.memflags |= MEMF_node(XENMEM_get_node(reservation.mem_flags));
+        if (IS_PRIV(current->domain))
+            args.memflags |= MEMF_node(XENMEM_get_node(reservation.mem_flags));
 
         if ( likely(reservation.domid == DOMID_SELF) )
         {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 11:14   ` Andre Przywara
@ 2008-07-04 11:59     ` Keir Fraser
  2008-07-04 12:48       ` Andre Przywara
  0 siblings, 1 reply; 9+ messages in thread
From: Keir Fraser @ 2008-07-04 11:59 UTC (permalink / raw)
  To: Andre Przywara; +Cc: xen-devel

On 4/7/08 12:14, "Andre Przywara" <andre.przywara@amd.com> wrote:

>> Should unprivileged domUs be allowed to specify the NUMA node they allocate
>> from, regardless of whether they even run there? Seems like a breakage of
>> guest isolation to me.
> Good catch (I code in HVM land most of the time), I hope this small
> (attached) patch fixes this.

Looking some more, I still don't see that this patch can work. Don't the
subfunctions in memory.c go and OR in MEMF_node() values on top of what the
caller may have specified??

 -- Keir

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 11:59     ` Keir Fraser
@ 2008-07-04 12:48       ` Andre Przywara
  2008-07-04 14:54         ` Keir Fraser
  0 siblings, 1 reply; 9+ messages in thread
From: Andre Przywara @ 2008-07-04 12:48 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

Keir Fraser wrote:
> On 4/7/08 12:14, "Andre Przywara" <andre.przywara@amd.com> wrote:
> 
>>> Should unprivileged domUs be allowed to specify the NUMA node they allocate
>>> from, regardless of whether they even run there? Seems like a breakage of
>>> guest isolation to me.
>> Good catch (I code in HVM land most of the time), I hope this small
>> (attached) patch fixes this.
> 
> Looking some more, I still don't see that this patch can work. Don't the
> subfunctions in memory.c go and OR in MEMF_node() values on top of what the
> caller may have specified??
Maybe I don't get your question right, but the only part where the 
caller specified node number is used is the line I handled in the last 
patch. Later they only use the member memflags of struct memop_args, not 
struct xen_memory_reservation. If the node number is not specified (or 
blocked), it will be later determined by looking at the current 
scheduled pCPU (and thus node), but this is the current behavior anyway.

Regards,
Andre.

-- 
Andre Przywara
AMD-OSRC (Dresden)
Tel: x84917

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 12:48       ` Andre Przywara
@ 2008-07-04 14:54         ` Keir Fraser
  2008-07-04 15:28           ` Andre Przywara
  0 siblings, 1 reply; 9+ messages in thread
From: Keir Fraser @ 2008-07-04 14:54 UTC (permalink / raw)
  To: Andre Przywara; +Cc: xen-devel

On 4/7/08 13:48, "Andre Przywara" <andre.przywara@amd.com> wrote:

>> Looking some more, I still don't see that this patch can work. Don't the
>> subfunctions in memory.c go and OR in MEMF_node() values on top of what the
>> caller may have specified??
> Maybe I don't get your question right, but the only part where the
> caller specified node number is used is the line I handled in the last
> patch. Later they only use the member memflags of struct memop_args, not
> struct xen_memory_reservation. If the node number is not specified (or
> blocked), it will be later determined by looking at the current
> scheduled pCPU (and thus node), but this is the current behavior anyway.

Take common/memory.c:populate_physmap() as a specific example. It
unconditionally specifies MEMF_node() in its invocation of
alloc_domheap_pages(), regardless of whether its caller has already
specified a node in the memop_args structure that is passed into it.

 -- Keir

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 14:54         ` Keir Fraser
@ 2008-07-04 15:28           ` Andre Przywara
  2008-07-04 15:34             ` Keir Fraser
  0 siblings, 1 reply; 9+ messages in thread
From: Andre Przywara @ 2008-07-04 15:28 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

Keir Fraser wrote:
> On 4/7/08 13:48, "Andre Przywara" <andre.przywara@amd.com> wrote:
> 
>>> Looking some more, I still don't see that this patch can work. Don't the
>>> subfunctions in memory.c go and OR in MEMF_node() values on top of what the
>>> caller may have specified??
>> Maybe I don't get your question right, but the only part where the
>> caller specified node number is used is the line I handled in the last
>> patch. Later they only use the member memflags of struct memop_args, not
>> struct xen_memory_reservation. If the node number is not specified (or
>> blocked), it will be later determined by looking at the current
>> scheduled pCPU (and thus node), but this is the current behavior anyway.
> 
> Take common/memory.c:populate_physmap() as a specific example. It
> unconditionally specifies MEMF_node() in its invocation of
> alloc_domheap_pages(), regardless of whether its caller has already
> specified a node in the memop_args structure that is passed into it.
Have you applied the patches correctly? From 02_numa_guest.patch:
@@ -115,7 +113,7 @@
              goto out;

          page = alloc_domheap_pages(
-            d, a->extent_order, a->memflags | MEMF_node(node));
+            d, a->extent_order, a->memflags);
          if ( unlikely(page == NULL) )
          {
              gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"

The other use of MEMF_node is in exchange_memory, which is not given any 
NUMA node info from the caller, so this is correct.

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 15:28           ` Andre Przywara
@ 2008-07-04 15:34             ` Keir Fraser
  2008-07-04 23:12               ` Andre Przywara
  0 siblings, 1 reply; 9+ messages in thread
From: Keir Fraser @ 2008-07-04 15:34 UTC (permalink / raw)
  To: Andre Przywara; +Cc: xen-devel

On 4/7/08 16:28, "Andre Przywara" <andre.przywara@amd.com> wrote:

> Have you applied the patches correctly? From 02_numa_guest.patch:
> @@ -115,7 +113,7 @@
>               goto out;
> 
>           page = alloc_domheap_pages(
> -            d, a->extent_order, a->memflags | MEMF_node(node));
> +            d, a->extent_order, a->memflags);
>           if ( unlikely(page == NULL) )
>           {
>               gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
> 
> The other use of MEMF_node is in exchange_memory, which is not given any
> NUMA node info from the caller, so this is correct.

When sent a patch sequence I expect the patches to apply and work
independently (when applied one-by-one in order).

Anyway, your second patch changes the default NUMA allocation policy from
allocate on home node for the domain to allocate on node I'm currently
executing on. That would seem a net loss for PV guests (whose builder will
not be explicitly specifying the numa node for allocations).

 -- Keir

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] hvm: NUMA guest: extend memops hypercall
  2008-07-04 15:34             ` Keir Fraser
@ 2008-07-04 23:12               ` Andre Przywara
  0 siblings, 0 replies; 9+ messages in thread
From: Andre Przywara @ 2008-07-04 23:12 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1907 bytes --]

Keir Fraser wrote:
> On 4/7/08 16:28, "Andre Przywara" <andre.przywara@amd.com> wrote:
> 
>> Have you applied the patches correctly? From 02_numa_guest.patch:
>> @@ -115,7 +113,7 @@
>>               goto out;
>>
>>           page = alloc_domheap_pages(
>> -            d, a->extent_order, a->memflags | MEMF_node(node));
>> +            d, a->extent_order, a->memflags);
>>           if ( unlikely(page == NULL) )
>>           {
>>               gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
>>
>> The other use of MEMF_node is in exchange_memory, which is not given any
>> NUMA node info from the caller, so this is correct.
> 
> When sent a patch sequence I expect the patches to apply and work
> independently (when applied one-by-one in order).
IMHO that is what they do (beside the below issue patch 1 and 2 are more 
or less refactoring without functional changes), but anyway...

> Anyway, your second patch changes the default NUMA allocation policy from
> allocate on home node for the domain to allocate on node I'm currently
> executing on. That would seem a net loss for PV guests (whose builder will
> not be explicitly specifying the numa node for allocations).
Right you are, I have missed the subtle difference between both (the 
code isn't as clear as your sentence). The below patch should fix this 
(by catching NUMA_NO_NODE while still knowing struct domain*).
If there are no further issues, I will resend the patches.

Regards,
Andre.

-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: 02a_numa_guest.patch --]
[-- Type: text/plain, Size: 455 bytes --]

diff -r a3d712538f1b xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Fri Jul 04 15:57:01 2008 +0200
+++ b/xen/common/page_alloc.c	Sat Jul 05 01:09:48 2008 +0200
@@ -792,6 +792,8 @@ struct page_info *alloc_domheap_pages(
 
     ASSERT(!in_irq());
 
+    if ( node == NUMA_NO_NODE ) node = domain_to_node (d);
+
     bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
     if ( bits <= (PAGE_SHIFT + 1) )
         return NULL;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-07-04 23:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-04  7:57 [PATCH 1/4] hvm: NUMA guest: extend memops hypercall Andre Przywara
2008-07-04  9:52 ` Keir Fraser
2008-07-04 11:14   ` Andre Przywara
2008-07-04 11:59     ` Keir Fraser
2008-07-04 12:48       ` Andre Przywara
2008-07-04 14:54         ` Keir Fraser
2008-07-04 15:28           ` Andre Przywara
2008-07-04 15:34             ` Keir Fraser
2008-07-04 23:12               ` Andre Przywara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.