All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Scott Parish" <srparish@us.ibm.com>
To: xen-devel@lists.xensource.com
Subject: comment request: dom0 dma on large memory systems
Date: Fri, 3 Jun 2005 07:35:01 +0000	[thread overview]
Message-ID: <20050603073500.GL9951@us.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1404 bytes --]

purpose:

The patches i'm working on is taking more work then i was expecting; this
email's purpose is to get confirmation that the solution i'm working on
is the preferred one.


problem:

On x86_64 with 6gig ram, dom0's initial allocation is from memory
above the pci hole (referred to as "high memory" in this email) if
dom0_mem is set to 2g or higher. The only problem is that most io/dma
devices (non-dac) can only dma to the first 32bits worth of machine
addresses--thus for some configurations, dom0 has no memory which is
dma-able.

This is realized, in my experience, with dom0 unable to find any
partitions on scan and panicking because it can't mount root.


proposed solution to xen:

The attached patch is a (non-functional) prototype of my proposed
solution to the xen. The idea is to add a third MEMZONE for
dma'able memory, change the alloc_domheap_pages() interface so the
caller can request only dma memory if needed. Finally, internal to
common/page_alloc.c, split memory between MEMZONE_DOM and MEMZONE_DMADOM
based on the address.


proposed solution to dom0:

Dom0 might have memory allocated to it from both high and low memory.
The easiest solution would probably be to scan for and preallocate a
chunk of memory that will work for dma, something like what pci-gart.c
does. (or work on letting pci-gart.c work under xen)


sRp

-- 
Scott Parish
Signed-off-by: srparish@us.ibm.com

[-- Attachment #2: memzone-dmadom.diff --]
[-- Type: text/plain, Size: 9141 bytes --]

diff -rN -u -p old-xen-64-4/xen/arch/x86/domain_build.c new-xen-64-4/xen/arch/x86/domain_build.c
--- old-xen-64-4/xen/arch/x86/domain_build.c	2005-06-01 20:04:19.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/domain_build.c	2005-06-03 04:54:47.000000000 +0000
@@ -57,13 +57,14 @@ boolean_param("dom0_translate", opt_dom0
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 
-static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+static struct pfn_info *alloc_largest(struct domain *d, unsigned long max,
+                                      unsigned long flags)
 {
     struct pfn_info *page;
     unsigned int order = get_order(max * PAGE_SIZE);
     if ( (max & (max-1)) != 0 )
         order--;
-    while ( (page = alloc_domheap_pages(d, order)) == NULL )
+    while ( (page = alloc_domheap_pages(d, order, flags)) == NULL )
         if ( order-- == 0 )
             break;
     return page;
@@ -143,7 +144,7 @@ int construct_dom0(struct domain *d,
         nr_pages = avail_domheap_pages() +
             ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
             ((image_len  + PAGE_SIZE - 1) >> PAGE_SHIFT);
-    if ( (page = alloc_largest(d, nr_pages)) == NULL )
+    if ( (page = alloc_largest(d, nr_pages, ALLOC_DMADOM_ONLY)) == NULL )
         panic("Not enough RAM for DOM0 reservation.\n");
     alloc_start = page_to_phys(page);
     alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
@@ -554,7 +555,8 @@ int construct_dom0(struct domain *d,
     }
     while ( pfn < nr_pages )
     {
-        if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+        if ( (page = alloc_largest(d, nr_pages - d->tot_pages,
+                                   ALLOC_DOM_ANY)) == NULL )
             panic("Not enough RAM for DOM0 reservation.\n");
         while ( pfn < d->tot_pages )
         {
diff -rN -u -p old-xen-64-4/xen/arch/x86/x86_32/mm.c new-xen-64-4/xen/arch/x86/x86_32/mm.c
--- old-xen-64-4/xen/arch/x86/x86_32/mm.c	2005-06-02 18:10:30.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/x86_32/mm.c	2005-06-03 03:36:49.000000000 +0000
@@ -82,7 +82,8 @@ void __init paging_init(void)
         mpt_size = 4*1024*1024;
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
-        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+              						   ALLOC_DOM_ANY) == NULL )
             panic("Not enough memory to bootstrap Xen.\n");
         idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] =
             l2e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_PSE);
diff -rN -u -p old-xen-64-4/xen/arch/x86/x86_64/mm.c new-xen-64-4/xen/arch/x86/x86_64/mm.c
--- old-xen-64-4/xen/arch/x86/x86_64/mm.c	2005-06-01 20:04:19.000000000 +0000
+++ new-xen-64-4/xen/arch/x86/x86_64/mm.c	2005-06-03 03:36:51.000000000 +0000
@@ -99,7 +99,7 @@ void __init paging_init(void)
      */
     for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
     {
-        pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER);
+        pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, ALLOC_DOM_ANY);
         if ( pg == NULL )
             panic("Not enough memory for m2p table\n");
         map_pages_to_xen(
diff -rN -u -p old-xen-64-4/xen/common/dom_mem_ops.c new-xen-64-4/xen/common/dom_mem_ops.c
--- old-xen-64-4/xen/common/dom_mem_ops.c	2005-05-28 08:49:39.000000000 +0000
+++ new-xen-64-4/xen/common/dom_mem_ops.c	2005-06-03 03:36:53.000000000 +0000
@@ -55,7 +55,8 @@ alloc_dom_mem(struct domain *d, 
     {
         PREEMPT_CHECK(MEMOP_increase_reservation);
 
-        if ( unlikely((page = alloc_domheap_pages(d, extent_order)) == NULL) )
+        if ( unlikely((page = alloc_domheap_pages(d, extent_order,
+                                                  ALLOC_DOM_ANY)) == NULL) )
         {
             DPRINTK("Could not allocate a frame\n");
             return i;
diff -rN -u -p old-xen-64-4/xen/common/page_alloc.c new-xen-64-4/xen/common/page_alloc.c
--- old-xen-64-4/xen/common/page_alloc.c	2005-05-25 18:41:08.000000000 +0000
+++ new-xen-64-4/xen/common/page_alloc.c	2005-06-03 05:39:17.000000000 +0000
@@ -210,11 +210,17 @@ unsigned long alloc_boot_pages(unsigned 
  */
 
 #define MEMZONE_XEN 0
 #define MEMZONE_DOM 1
-#define NR_ZONES    2
+#define MEMZONE_DMADOM 2
+#define NR_ZONES    3
+
+
+#define MAX_DMADOM_PFN 0xFFFFF
+#define pfn_dom_zone_type(_pfn)                                 \
+    (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
 
 /* Up to 2^20 pages can be allocated at once. */
 #define MAX_ORDER 20
 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
 
 static unsigned long avail[NR_ZONES];
@@ -239,8 +248,8 @@ void end_boot_allocator(void)
         next_free = !allocated_in_map(i+1);
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
-            free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
+            free_heap_pages(pfn_dom_zone_type(i), pfn_to_page(i), 0);
     }
 }
 
@@ -476,14 +486,21 @@ void init_domheap_pages(unsigned long ps
 {
     ASSERT(!in_irq());
 
-    ps = round_pgup(ps);
-    pe = round_pgdown(pe);
+    ps = round_pgup(ps) >> PAGE_SHIFT;
+    pe = round_pgdown(pe) >> PAGE_SHIFT;
 
-    init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
+    if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) {
+        init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps);
+        init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN),
+                        pe - MAX_DMADOM_PFN);
+    }
+    else
+        init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps);
 }
 
 
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
+struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order,
+                                     unsigned int flags)
 {
     struct pfn_info *pg;
     unsigned long mask = 0;
@@ -491,8 +508,12 @@ struct pfn_info *alloc_domheap_pages(str
 
     ASSERT(!in_irq());
 
-    if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
-        return NULL;
+    pg = NULL;
+    if (! (flags & ALLOC_DMADOM_ONLY))
+        pg = alloc_heap_pages(MEMZONE_DOM, order);
+    if (pg == NULL)
+        if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) )
+            return NULL;
 
     for ( i = 0; i < (1 << order); i++ )
     {
@@ -523,7 +544,7 @@ struct pfn_info *alloc_domheap_pages(str
         DPRINTK("...or the domain is dying (%d)\n", 
                 !!test_bit(_DOMF_dying, &d->domain_flags));
         spin_unlock(&d->page_alloc_lock);
-        free_heap_pages(MEMZONE_DOM, pg, order);
+        free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         return NULL;
     }
 
@@ -588,7 +609,7 @@ void free_domheap_pages(struct pfn_info 
 
         if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
         {
-            free_heap_pages(MEMZONE_DOM, pg, order);
+            free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         }
         else
         {
@@ -608,7 +629,7 @@ void free_domheap_pages(struct pfn_info 
     else
     {
         /* Freeing an anonymous domain-heap page. */
-        free_heap_pages(MEMZONE_DOM, pg, order);
+        free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, order);
         drop_dom_ref = 0;
     }
 
@@ -619,7 +640,7 @@ void free_domheap_pages(struct pfn_info 
 
 unsigned long avail_domheap_pages(void)
 {
-    return avail[MEMZONE_DOM];
+    return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM];
 }
 
 
@@ -668,7 +689,7 @@ static void page_scrub_softirq(void)
             p = map_domain_mem(page_to_phys(pg));
             clear_page(p);
             unmap_domain_mem(p);
-            free_heap_pages(MEMZONE_DOM, pg, 0);
+            free_heap_pages(pfn_dom_zone_type(page_to_pfn(pg)), pg, 0);
         }
     } while ( (NOW() - start) < MILLISECS(1) );
 }
diff -rN -u -p old-xen-64-4/xen/include/xen/mm.h new-xen-64-4/xen/include/xen/mm.h
--- old-xen-64-4/xen/include/xen/mm.h	2005-06-01 21:43:01.000000000 +0000
+++ new-xen-64-4/xen/include/xen/mm.h	2005-06-03 03:36:28.000000000 +0000
@@ -32,12 +32,16 @@ void free_xenheap_pages(unsigned long p,
 
 /* Domain suballocator. These functions are *not* interrupt-safe.*/
 void init_domheap_pages(unsigned long ps, unsigned long pe);
-struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order);
+struct pfn_info *alloc_domheap_pages(
+    struct domain *d, unsigned int order, unsigned int flags);
 void free_domheap_pages(struct pfn_info *pg, unsigned int order);
 unsigned long avail_domheap_pages(void);
-#define alloc_domheap_page(_d) (alloc_domheap_pages(_d,0))
+#define alloc_domheap_page(_d) (alloc_domheap_pages(_d,0,ALLOC_DOM_ANY))
 #define free_domheap_page(_p) (free_domheap_pages(_p,0))
 
+#define ALLOC_DMADOM_ONLY 1
+#define ALLOC_DOM_ANY 0
+
 /* Automatic page scrubbing for dead domains. */
 extern struct list_head page_scrub_list;
 #define page_scrub_schedule_work()              \


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2005-06-03  7:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-06-03  7:35 Scott Parish [this message]
2005-06-03  8:58 ` comment request: dom0 dma on large memory systems Keir Fraser
  -- strict thread matches above, loose matches on Subject: below --
2005-06-03  9:01 Ian Pratt
2005-06-04  3:48 Tian, Kevin
2005-06-04  3:59 ` Scott Parish
2005-06-04  4:12 Tian, Kevin
2005-06-04  5:44 Tian, Kevin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050603073500.GL9951@us.ibm.com \
    --to=srparish@us.ibm.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.