All of lore.kernel.org
 help / color / mirror / Atom feed
* comment request: dom0 dma on large memory systems
@ 2005-06-03  7:35 Scott Parish
  2005-06-03  8:58 ` Keir Fraser
  0 siblings, 1 reply; 7+ messages in thread
From: Scott Parish @ 2005-06-03  7:35 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1404 bytes --]

purpose:

The patches i'm working on is taking more work then i was expecting; this
email's purpose is to get confirmation that the solution i'm working on
is the preferred one.


problem:

On x86_64 with 6gig ram, dom0's initial allocation is from memory
above the pci hole (referred to as "high memory" in this email) if
dom0_mem is set to 2g or higher. The only problem is that most io/dma
devices (non-dac) can only dma to the first 32bits worth of machine
addresses--thus for some configurations, dom0 has no memory which is
dma-able.

This is realized, in my experience, with dom0 unable to find any
partitions on scan and panicking because it can't mount root.


proposed solution to xen:

The attached patch is a (non-functional) prototype of my proposed
solution to the xen. The idea is to add a third MEMZONE for
dma'able memory, change the alloc_domheap_pages() interface so the
caller can request only dma memory if needed. Finally, internal to
common/page_alloc.c, split memory between MEMZONE_DOM and MEMZONE_DMADOM
based on the address.


proposed solution to dom0:

Dom0 might have memory allocated to it from both high and low memory.
The easiest solution would probably be to scan for and preallocate a
chunk of memory that will work for dma, something like what pci-gart.c
does. (or work on letting pci-gart.c work under xen)


sRp

-- 
Scott Parish
Signed-off-by: srparish@us.ibm.com

[-- Attachment #2: memzone-dmadom.diff --]
[-- Type: text/plain, Size: 9141 bytes --]

diff -rN -u -p old-xen-64-4/xen/arch/x86/domain_build.c new-xen-64-4/xen/arch/x86/domain_build.c
--- old-xen-64-4/xen/arch/x86/domain_build.c	2005-06-01 20:04:19.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/domain_build.c	2005-06-03 04:54:47.000000000 +0000
@@ -57,13 +57,14 @@ boolean_param("dom0_translate", opt_dom0
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 
-static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+static struct pfn_info *alloc_largest(struct domain *d, unsigned long max,
+                                      unsigned long flags)
 {
     struct pfn_info *page;
     unsigned int order = get_order(max * PAGE_SIZE);
     if ( (max & (max-1)) != 0 )
         order--;
-    while ( (page = alloc_domheap_pages(d, order)) == NULL )
+    while ( (page = alloc_domheap_pages(d, order, flags)) == NULL )
         if ( order-- == 0 )
             break;
     return page;
@@ -143,7 +144,7 @@ int construct_dom0(struct domain *d,
         nr_pages = avail_domheap_pages() +
             ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
             ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
-    if ( (page = alloc_largest(d, nr_pages)) == NULL )
+    if ( (page = alloc_largest(d, nr_pages, ALLOC_DMADOM_ONLY)) == NULL )
         panic("Not enough RAM for DOM0 reservation.\n");
     alloc_start = page_to_phys(page);
     alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
@@ -554,7 +555,8 @@ int construct_dom0(struct domain *d,
     }
     while ( pfn < nr_pages )
     {
-        if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+        if ( (page = alloc_largest(d, nr_pages - d->tot_pages,
+                                   ALLOC_DOM_ANY)) == NULL )
             panic("Not enough RAM for DOM0 reservation.\n");
         while ( pfn < d->tot_pages )
         {
diff -rN -u -p old-xen-64-4/xen/arch/x86/x86_32/mm.c new-xen-64-4/xen/arch/x86/x86_32/mm.c
--- old-xen-64-4/xen/arch/x86/x86_32/mm.c	2005-06-02 18:10:30.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/x86_32/mm.c	2005-06-03 03:36:49.000000000 +0000
@@ -82,7 +82,8 @@ void __init paging_init(void)
         mpt_size = 4*1024*1024;
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
-        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+              						   ALLOC_DOM_ANY) == NULL )
             panic("Not enough memory to bootstrap Xen.\n");
         idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] =
             l2e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_PSE);
diff -rN -u -p old-xen-64-4/xen/arch/x86/x86_64/mm.c new-xen-64-4/xen/arch/x86/x86_64/mm.c
--- old-xen-64-4/xen/arch/x86/x86_64/mm.c	2005-06-01 20:04:19.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/x86_64/mm.c	2005-06-03 03:36:51.000000000 +0000
@@ -99,7 +99,7 @@ void __init paging_init(void)
      */
     for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
     {
-        pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER);
+        pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, ALLOC_DOM_ANY);
         if ( pg == NULL )
             panic("Not enough memory for m2p table\n");
         map_pages_to_xen(
diff -rN -u -p old-xen-64-4/xen/common/dom_mem_ops.c new-xen-64-4/xen/common/dom_mem_ops.c
--- old-xen-64-4/xen/common/dom_mem_ops.c	2005-05-28 08:49:39.000000000 +0000
+++ new-xen-64-4/xen/common/dom_mem_ops.c	2005-06-03 03:36:53.000000000 +0000
@@ -55,7 +55,8 @@ alloc_dom_mem(struct domain *d, 
     {
         PREEMPT_CHECK(MEMOP_increase_reservation);
 
-        if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) )
+        if ( unlikely((page = alloc_domheap_pages(d, extent_order,
+                                                  ALLOC_DOM_ANY)) == NULL) )
         {
             DPRINTK("Could not allocate a frame\n");
             return i;
diff -rN -u -p old-xen-64-4/xen/common/page_alloc.c new-xen-64-4/xen/common/page_alloc.c
--- old-xen-64-4/xen/common/page_alloc.c	2005-05-25 18:41:08.000000000 +0000
+++ new-xen-64-4/xen/common/page_alloc.c	2005-06-03 05:39:17.000000000 +0000
@@ -210,11 +210,17 @@ unsigned long alloc_boot_pages(unsigned 
  */
 
 #define MEMZONE_XEN 0
 #define MEMZONE_DOM 1
-#define NR_ZONES    2
+#define MEMZONE_DMADOM 2
+#define NR_ZONES    3
+
+
+#define MAX_DMADOM_PFN 0xFFFFF
+#define pfn_dom_zone_type(_pfn)                                 \
+    (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
 
 /* Up to 2^20 pages can be allocated at once. */
 #define MAX_ORDER 20
 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
 
 static unsigned long avail[NR_ZONES];
@@ -239,8 +248,8 @@ void end_boot_allocator(void)
         next_free = !allocated_in_map(i+1);
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
-            free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
+            free_heap_pages(pfn_dom_zone_type(i), pfn_to_page(i), 0);
     }
 }
 
@@ -476,14 +486,21 @@ void init_domheap_pages(unsigned long ps
 {
     ASSERT(!in_irq());
 
-    ps = round_pgup(ps);
-    pe = round_pgdown(pe);
+    ps = round_pgup(ps) >> PAGE_SHIFT;
+    pe = round_pgdown(pe) >> PAGE_SHIFT;
 
-    init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
+    if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) {
+        init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
+        init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN),
+                        pe - MAX_DMADOM_PFN);
+    }
+    else
+        init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
 }
 
 
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
+struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order,
+                                     unsigned int flags)
 {
     struct pfn_info *pg;
     unsigned long mask = 0;
@@ -491,8 +508,12 @@ struct pfn_info *alloc_domheap_pages(str
 
     ASSERT(!in_irq());
 
-    if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
-        return NULL;
+    pg = NULL;
+    if (! (flags & ALLOC_DMADOM_ONLY))
+        pg = alloc_heap_pages(MEMZONE_DOM, order);
+    if (pg == NULL)
+        if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) )
+            return NULL;
 
     for ( i = 0; i < (1 << order); i++ )
     {
@@ -523,7 +544,7 @@ struct pfn_info *alloc_domheap_pages(str
         DPRINTK("...or the domain is dying (%d)\n", 
                 !!test_bit(_DOMF_dying, &d->domain_flags));
         spin_unlock(&d->page_alloc_lock);
-        free_heap_pages(MEMZONE_DOM, pg, order);
+        free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         return NULL;
     }
 
@@ -588,7 +609,7 @@ void free_domheap_pages(struct pfn_info 
 
         if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
         {
-            free_heap_pages(MEMZONE_DOM, pg, order);
+            free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         }
         else
         {
@@ -608,7 +629,7 @@ void free_domheap_pages(struct pfn_info 
     else
     {
         /* Freeing an anonymous domain-heap page. */
-        free_heap_pages(MEMZONE_DOM, pg, order);
+        free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         drop_dom_ref = 0;
     }
 
@@ -619,7 +640,7 @@ void free_domheap_pages(struct pfn_info 
 
 unsigned long avail_domheap_pages(void)
 {
-    return avail[MEMZONE_DOM];
+    return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM];
 }
 
 
@@ -668,7 +689,7 @@ static void page_scrub_softirq(void)
             p = map_domain_mem(page_to_phys(pg));
             clear_page(p);
             unmap_domain_mem(p);
-            free_heap_pages(MEMZONE_DOM, pg, 0);
+            free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, 0);
         }
     } while ( (NOW() - start) < MILLISECS(1) );
 }
diff -rN -u -p old-xen-64-4/xen/include/xen/mm.h new-xen-64-4/xen/include/xen/mm.h
--- old-xen-64-4/xen/include/xen/mm.h	2005-06-01 21:43:01.000000000 +0000
+++ new-xen-64-4/xen/include/xen/mm.h	2005-06-03 03:36:28.000000000 +0000
@@ -32,12 +32,16 @@ void free_xenheap_pages(unsigned long p,
 
 /* Domain suballocator. These functions are *not* interrupt-safe.*/
 void init_domheap_pages(unsigned long ps, unsigned long pe);
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order);
+struct pfn_info *alloc_domheap_pages(
+    struct domain *d, unsigned int order, unsigned int flags);
 void free_domheap_pages(struct pfn_info *pg, unsigned int order);
 unsigned long avail_domheap_pages(void);
-#define alloc_domheap_page(_d) (alloc_domheap_pages(_d,0))
+#define alloc_domheap_page(_d) (alloc_domheap_pages(_d,0,ALLOC_DOM_ANY))
 #define free_domheap_page(_p) (free_domheap_pages(_p,0))
 
+#define ALLOC_DMADOM_ONLY 1
+#define ALLOC_DOM_ANY 0
+
 /* Automatic page scrubbing for dead domains. */
 extern struct list_head page_scrub_list;
 #define page_scrub_schedule_work()              \


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: comment request: dom0 dma on large memory systems
  2005-06-03  7:35 Scott Parish
@ 2005-06-03  8:58 ` Keir Fraser
  0 siblings, 0 replies; 7+ messages in thread
From: Keir Fraser @ 2005-06-03  8:58 UTC (permalink / raw)
  To: Scott Parish; +Cc: xen-devel

Yes, something like this is obviously required, and introducing a 
concept of zones seems a sensible way to go. I haven't looked at your 
patch in great detail but it looks along the right lines. :-)

  -- Keir

On 3 Jun 2005, at 08:35, Scott Parish wrote:

> The attached patch is a (non-functional) prototype of my proposed
> solution to the xen. The idea is to add a third MEMZONE for
> dma'able memory, change the alloc_domheap_pages() interface so the
> caller can request only dma memory if needed. Finally, internal to
> common/page_alloc.c, split memory between MEMZONE_DOM and 
> MEMZONE_DMADOM
> based on the address.
>
>
> proposed solution to dom0:
>
> Dom0 might have memory allocated to it from both high and low memory.
> The easiest solution would probably be to scan for and preallocate a
> chunk of memory that will work for dma, something like what pci-gart.c
> does. (or work on letting pci-gart.c work under xen)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: comment request: dom0 dma on large memory systems
@ 2005-06-03  9:01 Ian Pratt
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Pratt @ 2005-06-03  9:01 UTC (permalink / raw)
  To: Keir Fraser, Scott Parish; +Cc: xen-devel

> Yes, something like this is obviously required, and 
> introducing a concept of zones seems a sensible way to go. I 
> haven't looked at your patch in great detail but it looks 
> along the right lines. :-)

While working up the patch it's worth bearing in mind the closely
related NUMA issue: We'll want a separate heap for each node's memory,
and a way of requesting a prefered node in the alloc function, falling
back to a 'close' node if req'd.

It might even be worth adding in a zone for ISA DMA memory, so we could
get some old PCMCIA cards working, though probably not worth it.

Ian

 
>   -- Keir
> 
> On 3 Jun 2005, at 08:35, Scott Parish wrote:
> 
> > The attached patch is a (non-functional) prototype of my proposed 
> > solution to the xen. The idea is to add a third MEMZONE for 
> dma'able 
> > memory, change the alloc_domheap_pages() interface so the 
> caller can 
> > request only dma memory if needed. Finally, internal to 
> > common/page_alloc.c, split memory between MEMZONE_DOM and 
> > MEMZONE_DMADOM based on the address.
> >
> >
> > proposed solution to dom0:
> >
> > Dom0 might have memory allocated to it from both high and 
> low memory.
> > The easiest solution would probably be to scan for and 
> preallocate a 
> > chunk of memory that will work for dma, something like what 
> pci-gart.c 
> > does. (or work on letting pci-gart.c work under xen)
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: comment request: dom0 dma on large memory systems
@ 2005-06-04  3:48 Tian, Kevin
  2005-06-04  3:59 ` Scott Parish
  0 siblings, 1 reply; 7+ messages in thread
From: Tian, Kevin @ 2005-06-04  3:48 UTC (permalink / raw)
  To: Scott Parish, xen-devel

----Original Message-----
>From: xen-devel-bounces@lists.xensource.com
>[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Scott
Parish
>Sent: Friday, June 03, 2005 3:35 PM
>
>On x86_64 with 6gig ram, dom0's initial allocation is from memory
>above the pci hole (referred to as "high memory" in this email) if
>dom0_mem is set to 2g or higher. The only problem is that most io/dma
>devices (non-dac) can only dma to the first 32bits worth of machine
>addresses--thus for some configurations, dom0 has no memory which is
>dma-able.

IIRC, 2 or 3 months ago, Keir said that default memory allocation for
Dom0 is all available memory. And then CP has to decrease by balloon
interface before creating other domains. If this still holds true, I'm
not sure whether above problem still exists, since all avail memory
including both <4G and >4G belonging to Dom0 then. (XEN itself only
consumes a small trunk). However after looking at your patch and then
the source, it seems that only the max available order, meaning must be
continuous, is allocated to Dom0 currently. So did I misunderstand this
concept? If it really only means maximum continuous trunk, then you
patch definitely shoots straight on the real problem on all 64bit
platform. ;-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: comment request: dom0 dma on large memory systems
  2005-06-04  3:48 comment request: dom0 dma on large memory systems Tian, Kevin
@ 2005-06-04  3:59 ` Scott Parish
  0 siblings, 0 replies; 7+ messages in thread
From: Scott Parish @ 2005-06-04  3:59 UTC (permalink / raw)
  To: Tian, Kevin; +Cc: xen-devel, Scott Parish

On Sat, Jun 04, 2005 at 11:48:16AM +0800, Tian, Kevin wrote:

> ----Original Message-----
> >From: xen-devel-bounces@lists.xensource.com
> >[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Scott
> Parish
> >Sent: Friday, June 03, 2005 3:35 PM
> >
> >On x86_64 with 6gig ram, dom0's initial allocation is from memory
> >above the pci hole (referred to as "high memory" in this email) if
> >dom0_mem is set to 2g or higher. The only problem is that most io/dma
> >devices (non-dac) can only dma to the first 32bits worth of machine
> >addresses--thus for some configurations, dom0 has no memory which is
> >dma-able.
> 
> IIRC, 2 or 3 months ago, Keir said that default memory allocation for
> Dom0 is all available memory. And then CP has to decrease by balloon
> interface before creating other domains. If this still holds true, I'm
> not sure whether above problem still exists, since all avail memory
> including both <4G and >4G belonging to Dom0 then. (XEN itself only
> consumes a small trunk). However after looking at your patch and then
> the source, it seems that only the max available order, meaning must be
> continuous, is allocated to Dom0 currently. So did I misunderstand this
> concept? If it really only means maximum continuous trunk, then you
> patch definitely shoots straight on the real problem on all 64bit
> platform. ;-)

Right, there are several hacks around this problem, a couple i've
thought of are:

  + enforce dom0 take all memory
  + drop the max order size for MEMZONEs to 18 (in which case
    alloc_largest should always allocate from the lower memory)
  + prealloc X amount of low memory (128M for instance) and add
    it into the dom0 allocation

You nailed it when you mentioned driver domains (next email); the long
term goal is to make sure we're able to support them and hopefully avoid
the hogging of that memory unnecessarily for non-dma uses. Thanks for
noticing ;^)  

(i was also glad Ian brought up numa, i had forgotten about it and this
is probably a good time to think about that while i'm tearing up this
code)

sRp

-- 
Scott Parish
Signed-off-by: srparish@us.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: comment request: dom0 dma on large memory systems
@ 2005-06-04  4:12 Tian, Kevin
  0 siblings, 0 replies; 7+ messages in thread
From: Tian, Kevin @ 2005-06-04  4:12 UTC (permalink / raw)
  To: Tian, Kevin, Scott Parish, xen-devel

Aside from following question (just about the concept), actually to have
DMA sense within XEN is really necessary especially for a driver domain
N which has direct physical device access. Currently that domain N
invokes generic increase_reservation interface to get machine contiguous
pages. However that generic interface has no sense about the 4G
limitation for old DMA controller on 64bit platform. So such patch is
really necessary not only for domain0, but also all domains which has
direct relationship with physical devices. Also that hypercall interface
may also be changed a bit to contain flag whether DMA-able.

Thanks,
Kevin

>-----Original Message-----
>From: xen-devel-bounces@lists.xensource.com
>[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Tian, Kevin
>Sent: Saturday, June 04, 2005 11:48 AM
>To: Scott Parish; xen-devel@lists.xensource.com
>Subject: RE: [Xen-devel] comment request: dom0 dma on large memory
>systems
>
>----Original Message-----
>>From: xen-devel-bounces@lists.xensource.com
>>[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Scott
>Parish
>>Sent: Friday, June 03, 2005 3:35 PM
>>
>>On x86_64 with 6gig ram, dom0's initial allocation is from memory
>>above the pci hole (referred to as "high memory" in this email) if
>>dom0_mem is set to 2g or higher. The only problem is that most io/dma
>>devices (non-dac) can only dma to the first 32bits worth of machine
>>addresses--thus for some configurations, dom0 has no memory which is
>>dma-able.
>
>IIRC, 2 or 3 months ago, Keir said that default memory allocation for
>Dom0 is all available memory. And then CP has to decrease by balloon
>interface before creating other domains. If this still holds true, I'm
>not sure whether above problem still exists, since all avail memory
>including both <4G and >4G belonging to Dom0 then. (XEN itself only
>consumes a small trunk). However after looking at your patch and then
>the source, it seems that only the max available order, meaning must be
>continuous, is allocated to Dom0 currently. So did I misunderstand this
>concept? If it really only means maximum continuous trunk, then you
>patch definitely shoots straight on the real problem on all 64bit
>platform. ;-)
>
>Thanks,
>Kevin
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@lists.xensource.com
>http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: comment request: dom0 dma on large memory systems
@ 2005-06-04  5:44 Tian, Kevin
  0 siblings, 0 replies; 7+ messages in thread
From: Tian, Kevin @ 2005-06-04  5:44 UTC (permalink / raw)
  To: Scott Parish; +Cc: xen-devel

>-----Original Message-----
>From: Scott Parish [mailto:srparish@us.ibm.com]
>Sent: Saturday, June 04, 2005 12:00 PM
>To: Tian, Kevin
>
>>
>> IIRC, 2 or 3 months ago, Keir said that default memory allocation for
>> Dom0 is all available memory. And then CP has to decrease by balloon
>> interface before creating other domains. If this still holds true,
I'm
>> not sure whether above problem still exists, since all avail memory
>> including both <4G and >4G belonging to Dom0 then. (XEN itself only
>> consumes a small trunk). However after looking at your patch and then
>> the source, it seems that only the max available order, meaning must
be
>> continuous, is allocated to Dom0 currently. So did I misunderstand
this
>> concept? If it really only means maximum continuous trunk, then you
>> patch definitely shoots straight on the real problem on all 64bit
>> platform. ;-)
>
>Right, there are several hacks around this problem, a couple i've
>thought of are:
>
>  + enforce dom0 take all memory

Just a rough thought. If dom0 can take all the memory, one alternative
is to rely on dom0 to support DMA related allocation. At that time, dom0
can handle all internal requests itself, without HV's intervention. Then
a similar component like balloon driver resides within dom0, to handle
requests from other driver domains. A new event channel will be created
to pass zone information about request between Dom0 and DomN. Then when
driver domain wants to allocate DMA-able pages, the request will go to
dom0, instead of HV. Finally the balloon-like driver will allocate
DMA-able page from Dom0's memory allocator, and then update driver
domain's mapping table. Yes, this adds some overhead for more context
switch. But HV can pre-requested DMA pool from dom0, and then accelerate
the process.

NUMA is somehow different, and I have no clear picture whether this
direction applies to it yet. ;-P

>  + drop the max order size for MEMZONEs to 18 (in which case
>    alloc_largest should always allocate from the lower memory)
>  + prealloc X amount of low memory (128M for instance) and add
>    it into the dom0 allocation

IMO, to add zone info as your patch is better than simply hacks.

>
>You nailed it when you mentioned driver domains (next email); the long
>term goal is to make sure we're able to support them and hopefully
avoid
>the hogging of that memory unnecessarily for non-dma uses. Thanks for
>noticing ;^)
>
>(i was also glad Ian brought up numa, i had forgotten about it and this
>is probably a good time to think about that while i'm tearing up this
>code)
>
>sRp

Yep. ;v)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2005-06-04  5:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-04  3:48 comment request: dom0 dma on large memory systems Tian, Kevin
2005-06-04  3:59 ` Scott Parish
  -- strict thread matches above, loose matches on Subject: below --
2005-06-04  5:44 Tian, Kevin
2005-06-04  4:12 Tian, Kevin
2005-06-03  9:01 Ian Pratt
2005-06-03  7:35 Scott Parish
2005-06-03  8:58 ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.