All of lore.kernel.org
 help / color / mirror / Atom feed
* [1/3] [XEN] gnttab: Add new op unmap_and_replace
@ 2007-05-29  4:55 Herbert Xu
  2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
                   ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Herbert Xu @ 2007-05-29  4:55 UTC (permalink / raw)
  To: Keir Fraser, Xen Development Mailing List

Hi Keir:

Here is a repost of the patches to eventually get rid of netloop.
As stated last time I've added code to deal with outstanding DMA
transfers when copying grant pages in the backend.  That code is
in the 2nd patch of the series.

[XEN] gnttab: Add new op unmap_and_replace

The operation unmap_and_replace is an extension of unmap_grant_ref.
A new argument in the form of a virtual address (for PV) is given.
Instead of modifying the PTE for the mapped grant table entry to
null, we change it to the PTE for the new address.  In turn we
point the new address to null.

As it stands grant table entries once mapped cannot be
remapped by the guest OS (it can however perform a new
mapping on the same entry but that is within our control).
Therefore it's safe to manipulate the mapped PTE entry to
redirect it to a normal page where we've copied the contents.

It's intended to be used as follows:

1) map_grant_ref to v1
2) ...
3) alloc page at v2
4) copy the page at v1 to v2
5) unmap_and_replace v1 with v2

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Thu May 10 11:56:29 2007 +1000
@@ -135,4 +135,19 @@ gnttab_set_unmap_op(struct gnttab_unmap_
 	unmap->dev_bus_addr = 0;
 }
 
+static inline void
+gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, maddr_t addr,
+		      maddr_t new_addr, grant_handle_t handle)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		unmap->host_addr = __pa(addr);
+		unmap->new_addr = __pa(new_addr);
+	} else {
+		unmap->host_addr = addr;
+		unmap->new_addr = new_addr;
+	}
+
+	unmap->handle = handle;
+}
+
 #endif /* __ASM_GNTTAB_H__ */
diff -r 3ef0510e44d0 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c	Tue May 08 10:21:23 2007 +0100
+++ b/xen/arch/ia64/xen/mm.c	Thu May 10 11:56:29 2007 +1000
@@ -63,7 +63,7 @@
  *     assign_domain_page_replace()
  *   - cmpxchg p2m entry
  *     assign_domain_page_cmpxchg_rel()
- *     destroy_grant_host_mapping()
+ *     replace_grant_host_mapping()
  *     steal_page()
  *     zap_domain_page_one()
  *   - read p2m entry
@@ -133,7 +133,7 @@
  * - races between p2m entry update and tlb insert
  *   This is a race between reading/writing the p2m entry.
  *   reader: vcpu_itc_i(), vcpu_itc_d(), ia64_do_page_fault(), vcpu_fc()
- *   writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(), 
+ *   writer: assign_domain_page_cmpxchg_rel(), replace_grant_host_mapping(), 
  *           steal_page(), zap_domain_page_one()
  * 
  *   For example, vcpu_itc_i() is about to insert tlb by calling
@@ -151,7 +151,7 @@
  *   This is a race between reading/writing the p2m entry.
  *   reader: vcpu_get_domain_bundle(), vmx_get_domain_bundle(),
  *           efi_emulate_get_time()
- *   writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(), 
+ *   writer: assign_domain_page_cmpxchg_rel(), replace_grant_host_mapping(), 
  *           steal_page(), zap_domain_page_one()
  * 
  *   A page which assigned to a domain can be de-assigned by another vcpu.
@@ -1509,8 +1509,8 @@ create_grant_host_mapping(unsigned long 
 
 // grant table host unmapping
 int
-destroy_grant_host_mapping(unsigned long gpaddr,
-               unsigned long mfn, unsigned int flags)
+replace_grant_host_mapping(unsigned long gpaddr,
+	       unsigned long mfn, unsigned long new_gpaddr, unsigned int flags)
 {
     struct domain* d = current->domain;
     unsigned long gpfn = gpaddr >> PAGE_SHIFT;
@@ -1521,6 +1521,11 @@ destroy_grant_host_mapping(unsigned long
     pte_t old_pte;
     struct page_info* page = mfn_to_page(mfn);
 
+    if (new_gpaddr) {
+        gdprintk(XENLOG_INFO, "%s: new_gpaddr 0x%lx\n", __func__, new_gpaddr);
+    	return GNTST_general_error;
+    }
+
     if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
         gdprintk(XENLOG_INFO, "%s: flags 0x%x\n", __func__, flags);
         return GNTST_general_error;
@@ -1568,7 +1573,7 @@ destroy_grant_host_mapping(unsigned long
     BUG_ON(pte_pgc_allocated(old_pte));
     domain_page_flush_and_put(d, gpaddr, pte, old_pte, page);
 
-    perfc_incr(destroy_grant_host_mapping);
+    perfc_incr(replace_grant_host_mapping);
     return GNTST_okay;
 }
 
diff -r 3ef0510e44d0 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c	Tue May 08 10:21:23 2007 +0100
+++ b/xen/arch/powerpc/mm.c	Thu May 10 11:56:29 2007 +1000
@@ -183,9 +183,16 @@ int create_grant_host_mapping(
     return create_grant_va_mapping(addr, frame, current);
 }
 
-int destroy_grant_host_mapping(
-    unsigned long addr, unsigned long frame, unsigned int flags)
-{
+int replace_grant_host_mapping(
+    unsigned long addr, unsigned long frame, unsigned long new_addr,
+    unsigned int flags)
+{
+    if (new_addr)
+        printk("%s: new_addr not supported\n", __func__);
+        BUG();
+        return GNTST_general_error;
+    }
+
     if (flags & GNTMAP_contains_pte) {
         printk("%s: GNTMAP_contains_pte not supported\n", __func__);
         BUG();
diff -r 3ef0510e44d0 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c	Tue May 08 10:21:23 2007 +0100
+++ b/xen/arch/x86/mm.c	Thu May 10 11:56:29 2007 +1000
@@ -2619,8 +2619,8 @@ static int create_grant_va_mapping(
     return GNTST_okay;
 }
 
-static int destroy_grant_va_mapping(
-    unsigned long addr, unsigned long frame, struct vcpu *v)
+static int replace_grant_va_mapping(
+    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v)
 {
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
@@ -2644,7 +2644,7 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
+    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
@@ -2654,6 +2654,12 @@ static int destroy_grant_va_mapping(
  out:
     guest_unmap_l1e(v, pl1e);
     return rc;
+}
+
+static int destroy_grant_va_mapping(
+    unsigned long addr, unsigned long frame, struct vcpu *v)
+{
+    return replace_grant_va_mapping(addr, frame, l1e_empty(), v);
 }
 
 int create_grant_host_mapping(
@@ -2671,12 +2677,48 @@ int create_grant_host_mapping(
     return create_grant_va_mapping(addr, pte, current);
 }
 
-int destroy_grant_host_mapping(
-    uint64_t addr, unsigned long frame, unsigned int flags)
-{
+int replace_grant_host_mapping(
+    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags)
+{
+    l1_pgentry_t *pl1e, ol1e;
+    unsigned long gl1mfn;
+    int rc;
+    
     if ( flags & GNTMAP_contains_pte )
-        return destroy_grant_pte_mapping(addr, frame, current->domain);
-    return destroy_grant_va_mapping(addr, frame, current);
+    {
+	if (!new_addr)
+	    return destroy_grant_pte_mapping(addr, frame, current->domain);
+
+	MEM_LOG("Unsupported grant table operation");
+	return GNTST_general_error;
+    }
+
+    if (!new_addr)
+	return destroy_grant_va_mapping(addr, frame, current);
+
+    pl1e = guest_map_l1e(current, new_addr, &gl1mfn);
+    if ( !pl1e )
+    {
+        MEM_LOG("Could not find L1 PTE for address %lx",
+                (unsigned long)new_addr);
+        return GNTST_general_error;
+    }
+    ol1e = *pl1e;
+
+    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, current)) )
+    {
+        MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
+        guest_unmap_l1e(current, pl1e);
+        return GNTST_general_error;
+    }
+
+    guest_unmap_l1e(current, pl1e);
+
+    rc = replace_grant_va_mapping(addr, frame, ol1e, current);
+    if ( rc && !paging_mode_refcounts(current->domain) )
+        put_page_from_l1e(ol1e, current->domain);
+
+    return rc;
 }
 
 int steal_page(
diff -r 3ef0510e44d0 xen/common/grant_table.c
--- a/xen/common/grant_table.c	Tue May 08 10:21:23 2007 +0100
+++ b/xen/common/grant_table.c	Thu May 10 11:56:29 2007 +1000
@@ -58,6 +58,16 @@ union grant_combo {
     } shorts;
 };
 
+/* Used to share code between unmap_grant_ref and unmap_and_replace. */
+struct gnttab_unmap_common {
+    uint64_t host_addr;
+    uint64_t dev_bus_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+
+    int16_t status;
+};
+
 #define PIN_FAIL(_lbl, _rc, _f, _a...)          \
     do {                                        \
         gdprintk(XENLOG_WARNING, _f, ## _a );   \
@@ -397,8 +407,8 @@ gnttab_map_grant_ref(
 }
 
 static void
-__gnttab_unmap_grant_ref(
-    struct gnttab_unmap_grant_ref *op)
+__gnttab_unmap_common(
+    struct gnttab_unmap_common *op)
 {
     domid_t          dom;
     grant_ref_t      ref;
@@ -477,8 +487,8 @@ __gnttab_unmap_grant_ref(
 
     if ( (op->host_addr != 0) && (flags & GNTMAP_host_map) )
     {
-        if ( (rc = destroy_grant_host_mapping(op->host_addr,
-                                              frame, flags)) < 0 )
+        if ( (rc = replace_grant_host_mapping(op->host_addr,
+                                              frame, op->new_addr, flags)) < 0 )
             goto unmap_out;
 
         ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
@@ -518,6 +528,20 @@ __gnttab_unmap_grant_ref(
     rcu_unlock_domain(rd);
 }
 
+static void
+__gnttab_unmap_grant_ref(
+    struct gnttab_unmap_grant_ref *op)
+{
+    struct gnttab_unmap_common common = {
+	.host_addr = op->host_addr,
+	.dev_bus_addr = op->dev_bus_addr,
+	.handle = op->handle,
+    };
+
+    __gnttab_unmap_common(&common);
+    op->status = common.status;
+}
+
 static long
 gnttab_unmap_grant_ref(
     XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t) uop, unsigned int count)
@@ -530,6 +554,44 @@ gnttab_unmap_grant_ref(
         if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) )
             goto fault;
         __gnttab_unmap_grant_ref(&op);
+        if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) )
+            goto fault;
+    }
+
+    flush_tlb_mask(current->domain->domain_dirty_cpumask);
+    return 0;
+
+fault:
+    flush_tlb_mask(current->domain->domain_dirty_cpumask);
+    return -EFAULT;    
+}
+
+static void
+__gnttab_unmap_and_replace(
+    struct gnttab_unmap_and_replace *op)
+{
+    struct gnttab_unmap_common common = {
+	.host_addr = op->host_addr,
+	.new_addr = op->new_addr,
+	.handle = op->handle,
+    };
+
+    __gnttab_unmap_common(&common);
+    op->status = common.status;
+}
+
+static long
+gnttab_unmap_and_replace(
+    XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t) uop, unsigned int count)
+{
+    int i;
+    struct gnttab_unmap_and_replace op;
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) )
+            goto fault;
+        __gnttab_unmap_and_replace(&op);
         if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) )
             goto fault;
     }
@@ -1203,6 +1265,21 @@ do_grant_table_op(
         if ( unlikely(!grant_operation_permitted(d)) )
             goto out;
         rc = gnttab_unmap_grant_ref(unmap, count);
+        break;
+    }
+    case GNTTABOP_unmap_and_replace:
+    {
+        XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t) unmap =
+            guest_handle_cast(uop, gnttab_unmap_and_replace_t);
+        if ( unlikely(!guest_handle_okay(unmap, count)) )
+            goto out;
+        rc = -EPERM;
+        if ( unlikely(!grant_operation_permitted(d)) )
+            goto out;
+        rc = -ENOSYS;
+        if ( unlikely(!replace_grant_supported()) )
+            goto out;
+        rc = gnttab_unmap_and_replace(unmap, count);
         break;
     }
     case GNTTABOP_setup_table:
diff -r 3ef0510e44d0 xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h	Tue May 08 10:21:23 2007 +0100
+++ b/xen/include/asm-ia64/grant_table.h	Thu May 10 11:56:29 2007 +1000
@@ -9,7 +9,7 @@
 
 // for grant map/unmap
 int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
-int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
+int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags);
 
 // for grant transfer
 void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
@@ -67,4 +67,9 @@ static inline void gnttab_clear_flag(uns
 #define gnttab_release_put_page(page)           put_page((page))
 #define gnttab_release_put_page_and_type(page)  put_page_and_type((page))
 
+static inline int replace_grant_supported(void)
+{
+    return 0;
+}
+
 #endif /* __ASM_GRANT_TABLE_H__ */
diff -r 3ef0510e44d0 xen/include/asm-powerpc/grant_table.h
--- a/xen/include/asm-powerpc/grant_table.h	Tue May 08 10:21:23 2007 +0100
+++ b/xen/include/asm-powerpc/grant_table.h	Thu May 10 11:56:29 2007 +1000
@@ -35,8 +35,9 @@ extern long pte_remove(ulong flags, ulon
 
 int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags);
-int destroy_grant_host_mapping(
-    unsigned long addr, unsigned long frame, unsigned int flags);
+int replace_grant_host_mapping(
+    unsigned long addr, unsigned long frame, unsigned long new_addr,
+    unsigned int flags);
 
 #define gnttab_create_shared_page(d, t, i)                               \
     do {                                                                 \
@@ -82,4 +83,8 @@ static inline uint cpu_foreign_map_order
 #define gnttab_release_put_page_and_type(page)  do { } while (0)
 #endif
 
+static inline int replace_grant_supported(void)
+{
+    return 0;
+}
 #endif  /* __ASM_PPC_GRANT_TABLE_H__ */
diff -r 3ef0510e44d0 xen/include/asm-x86/grant_table.h
--- a/xen/include/asm-x86/grant_table.h	Tue May 08 10:21:23 2007 +0100
+++ b/xen/include/asm-x86/grant_table.h	Thu May 10 11:56:29 2007 +1000
@@ -15,8 +15,8 @@
  */
 int create_grant_host_mapping(
     uint64_t addr, unsigned long frame, unsigned int flags);
-int destroy_grant_host_mapping(
-    uint64_t addr, unsigned long frame, unsigned int flags);
+int replace_grant_host_mapping(
+    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags);
 
 #define gnttab_create_shared_page(d, t, i)                               \
     do {                                                                 \
@@ -48,4 +48,9 @@ static inline void gnttab_clear_flag(uns
         /* Done implicitly when page tables are destroyed. */   \
     } while (0)
 
+static inline int replace_grant_supported(void)
+{
+    return 1;
+}
+
 #endif /* __ASM_GRANT_TABLE_H__ */
diff -r 3ef0510e44d0 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h	Tue May 08 10:21:23 2007 +0100
+++ b/xen/include/public/grant_table.h	Thu May 10 11:56:29 2007 +1000
@@ -327,6 +327,29 @@ struct gnttab_query_size {
 };
 typedef struct gnttab_query_size gnttab_query_size_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace    7
+struct gnttab_unmap_and_replace {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
 
 
 /*

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-05-29  4:55 [1/3] [XEN] gnttab: Add new op unmap_and_replace Herbert Xu
@ 2007-05-29  4:55 ` Herbert Xu
  2007-06-06  6:18   ` Isaku Yamahata
  2007-06-07  8:48   ` was " Isaku Yamahata
  2007-05-29  4:55 ` [3/3] [NET] back: Add lazy copying Herbert Xu
  2007-05-30 11:50 ` [1/3] [XEN] gnttab: Add new op unmap_and_replace Keir Fraser
  2 siblings, 2 replies; 29+ messages in thread
From: Herbert Xu @ 2007-05-29  4:55 UTC (permalink / raw)
  To: Keir Fraser, Xen Development Mailing List

Hi:

[LINUX] gnttab: Add basic DMA tracking

This patch adds basic tracking of outstanding DMA requests on
grant table entries marked as PageForeign.

When a PageForeign struct page is about to be mapped for DMA,
we set its map count to 1 (or zero in actual value).  This is
then checked for when we need to free a grant table entry early
to ensure that we don't free an entry that's currently used for
DMA.

So any entry that has been marked for DMA will not be freed early.

If the unmapping API had a struct page (which exists for the sg
case) then we could do this properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> 

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Wed May 16 22:31:20 2007 +1000
@@ -15,6 +15,7 @@
 #include <linux/version.h>
 #include <asm/io.h>
 #include <xen/balloon.h>
+#include <xen/gnttab.h>
 #include <asm/swiotlb.h>
 #include <asm/tlbflush.h>
 #include <asm-i386/mach-xen/asm/swiotlb.h>
@@ -90,7 +91,7 @@ dma_map_sg(struct device *hwdev, struct 
 	} else {
 		for (i = 0; i < nents; i++ ) {
 			sg[i].dma_address =
-				page_to_bus(sg[i].page) + sg[i].offset;
+				gnttab_dma_map_page(sg[i].page) + sg[i].offset;
 			sg[i].dma_length  = sg[i].length;
 			BUG_ON(!sg[i].page);
 			IOMMU_BUG_ON(address_needs_mapping(
@@ -108,9 +109,15 @@ dma_unmap_sg(struct device *hwdev, struc
 dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
 	     enum dma_data_direction direction)
 {
+	int i;
+
 	BUG_ON(direction == DMA_NONE);
 	if (swiotlb)
 		swiotlb_unmap_sg(hwdev, sg, nents, direction);
+	else {
+		for (i = 0; i < nents; i++ )
+			gnttab_dma_unmap_page(sg[i].dma_address);
+	}
 }
 EXPORT_SYMBOL(dma_unmap_sg);
 
@@ -127,7 +134,7 @@ dma_map_page(struct device *dev, struct 
 		dma_addr = swiotlb_map_page(
 			dev, page, offset, size, direction);
 	} else {
-		dma_addr = page_to_bus(page) + offset;
+		dma_addr = gnttab_dma_map_page(page) + offset;
 		IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
 	}
 
@@ -142,6 +149,8 @@ dma_unmap_page(struct device *dev, dma_a
 	BUG_ON(direction == DMA_NONE);
 	if (swiotlb)
 		swiotlb_unmap_page(dev, dma_address, size, direction);
+	else
+		gnttab_dma_unmap_page(dma_address);
 }
 EXPORT_SYMBOL(dma_unmap_page);
 #endif /* CONFIG_HIGHMEM */
@@ -326,7 +335,8 @@ dma_map_single(struct device *dev, void 
 	if (swiotlb) {
 		dma = swiotlb_map_single(dev, ptr, size, direction);
 	} else {
-		dma = virt_to_bus(ptr);
+		dma = gnttab_dma_map_page(virt_to_page(ptr)) +
+		      offset_in_page(ptr);
 		IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
 		IOMMU_BUG_ON(address_needs_mapping(dev, dma));
 	}
@@ -344,6 +354,8 @@ dma_unmap_single(struct device *dev, dma
 		BUG();
 	if (swiotlb)
 		swiotlb_unmap_single(dev, dma_addr, size, direction);
+	else
+		gnttab_dma_unmap_page(dma_addr);
 }
 EXPORT_SYMBOL(dma_unmap_single);
 
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed May 16 22:31:20 2007 +1000
@@ -25,14 +25,13 @@
 #include <asm/pci.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
+#include <xen/gnttab.h>
 #include <xen/interface/memory.h>
 
 int swiotlb;
 EXPORT_SYMBOL(swiotlb);
 
 #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
-
-#define SG_ENT_PHYS_ADDRESS(sg)	(page_to_bus((sg)->page) + (sg)->offset)
 
 /*
  * Maximum allowable number of contiguous slabs to map,
@@ -468,7 +467,8 @@ dma_addr_t
 dma_addr_t
 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
 {
-	dma_addr_t dev_addr = virt_to_bus(ptr);
+	dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
+			      offset_in_page(ptr);
 	void *map;
 	struct phys_addr buffer;
 
@@ -486,6 +486,7 @@ swiotlb_map_single(struct device *hwdev,
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
+	gnttab_dma_unmap_page(dev_addr);
 	buffer.page   = virt_to_page(ptr);
 	buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
 	map = map_single(hwdev, buffer, size, dir);
@@ -513,6 +514,8 @@ swiotlb_unmap_single(struct device *hwde
 	BUG_ON(dir == DMA_NONE);
 	if (in_swiotlb_aperture(dev_addr))
 		unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
+	else
+		gnttab_dma_unmap_page(dev_addr);
 }
 
 /*
@@ -571,8 +574,10 @@ swiotlb_map_sg(struct device *hwdev, str
 	BUG_ON(dir == DMA_NONE);
 
 	for (i = 0; i < nelems; i++, sg++) {
-		dev_addr = SG_ENT_PHYS_ADDRESS(sg);
+		dev_addr = gnttab_dma_map_page(sg->page) + sg->offset;
+
 		if (address_needs_mapping(hwdev, dev_addr)) {
+			gnttab_dma_unmap_page(dev_addr);
 			buffer.page   = sg->page;
 			buffer.offset = sg->offset;
 			map = map_single(hwdev, buffer, sg->length, dir);
@@ -605,10 +610,12 @@ swiotlb_unmap_sg(struct device *hwdev, s
 	BUG_ON(dir == DMA_NONE);
 
 	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+		if (in_swiotlb_aperture(sg->dma_address))
 			unmap_single(hwdev, 
 				     (void *)bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
+		else
+			gnttab_dma_unmap_page(sg->dma_address);
 }
 
 /*
@@ -627,7 +634,7 @@ swiotlb_sync_sg_for_cpu(struct device *h
 	BUG_ON(dir == DMA_NONE);
 
 	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+		if (in_swiotlb_aperture(sg->dma_address))
 			sync_single(hwdev,
 				    (void *)bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir);
@@ -642,7 +649,7 @@ swiotlb_sync_sg_for_device(struct device
 	BUG_ON(dir == DMA_NONE);
 
 	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+		if (in_swiotlb_aperture(sg->dma_address))
 			sync_single(hwdev,
 				    (void *)bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir);
@@ -659,8 +666,9 @@ swiotlb_map_page(struct device *hwdev, s
 	dma_addr_t dev_addr;
 	char *map;
 
-	dev_addr = page_to_bus(page) + offset;
+	dev_addr = gnttab_dma_map_page(page) + offset;
 	if (address_needs_mapping(hwdev, dev_addr)) {
+		gnttab_dma_unmap_page(dev_addr);
 		buffer.page   = page;
 		buffer.offset = offset;
 		map = map_single(hwdev, buffer, size, direction);
@@ -681,6 +689,8 @@ swiotlb_unmap_page(struct device *hwdev,
 	BUG_ON(direction == DMA_NONE);
 	if (in_swiotlb_aperture(dma_address))
 		unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
+	else
+		gnttab_dma_unmap_page(dma_address);
 }
 
 #endif
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed May 16 22:31:20 2007 +1000
@@ -490,6 +490,128 @@ static int gnttab_map(unsigned int start
 	return 0;
 }
 
+static void gnttab_page_free(struct page *page)
+{
+	if (page->mapping) {
+		put_page((struct page *)page->mapping);
+		page->mapping = NULL;
+	}
+
+	ClearPageForeign(page);
+	gnttab_reset_grant_page(page);
+	put_page(page);
+}
+
+/*
+ * Must not be called with IRQs off.  This should only be used on the
+ * slow path.
+ *
+ * Copy a foreign granted page to local memory.
+ */
+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
+{
+	struct gnttab_unmap_and_replace unmap;
+	mmu_update_t mmu;
+	struct page *page;
+	struct page *new_page;
+	void *new_addr;
+	void *addr;
+	paddr_t pfn;
+	maddr_t mfn;
+	maddr_t new_mfn;
+	int err;
+
+	page = *pagep;
+	if (!get_page_unless_zero(page))
+		return -ENOENT;
+
+	err = -ENOMEM;
+	new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!new_page)
+		goto out;
+
+	new_addr = page_address(new_page);
+	addr = page_address(page);
+	memcpy(new_addr, addr, PAGE_SIZE);
+
+	pfn = page_to_pfn(page);
+	mfn = pfn_to_mfn(pfn);
+	new_mfn = virt_to_mfn(new_addr);
+
+	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+		set_phys_to_machine(pfn, new_mfn);
+		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
+
+		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+		mmu.val = pfn;
+		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
+		BUG_ON(err);
+	}
+
+	gnttab_set_replace_op(&unmap, (unsigned long)addr,
+			      (unsigned long)new_addr, ref);
+
+	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					&unmap, 1);
+	BUG_ON(err);
+	BUG_ON(unmap.status);
+
+	new_page->mapping = page->mapping;
+	new_page->index = page->index;
+	set_bit(PG_foreign, &new_page->flags);
+	*pagep = new_page;
+
+	SetPageForeign(page, gnttab_page_free);
+	page->mapping = NULL;
+
+	/*
+	 * Ensure that there is a barrier between setting the p2m entry
+	 * and checking the map count.  See gnttab_dma_map_page.
+	 */
+	smp_mb();
+
+	/* Has the page been DMA-mapped? */
+	if (unlikely(page_mapped(page))) {
+		err = -EBUSY;
+		page->mapping = (void *)new_page;
+	}
+
+out:
+	put_page(page);
+	return err;
+
+}
+EXPORT_SYMBOL(gnttab_copy_grant_page);
+
+/*
+ * Keep track of foreign pages marked as PageForeign so that we don't
+ * return them to the remote domain prematurely.
+ *
+ * PageForeign pages are pinned down by increasing their mapcount.
+ *
+ * All other pages are simply returned as is.
+ */
+maddr_t gnttab_dma_map_page(struct page *page)
+{
+	maddr_t mfn = pfn_to_mfn(page_to_pfn(page)), mfn2;
+
+	if (!PageForeign(page))
+		return mfn << PAGE_SHIFT;
+
+	if (mfn_to_local_pfn(mfn) < max_mapnr)
+		return mfn << PAGE_SHIFT;
+
+	atomic_set(&page->_mapcount, 0);
+
+	/* This barrier corresponds to the one in gnttab_copy_grant_page. */
+	smp_mb();
+
+	/* Has this page been copied in the mean time? */
+	mfn2 = pfn_to_mfn(page_to_pfn(page));
+
+	return mfn2 << PAGE_SHIFT;
+}
+
 int gnttab_resume(void)
 {
 	if (max_nr_grant_frames() < nr_grant_frames)
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Wed May 16 22:31:20 2007 +1000
@@ -39,6 +39,7 @@
 
 #include <asm/hypervisor.h>
 #include <asm/maddr.h> /* maddr_t */
+#include <linux/mm.h>
 #include <xen/interface/grant_table.h>
 #include <xen/features.h>
 
@@ -101,6 +102,19 @@ void gnttab_grant_foreign_transfer_ref(g
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
 
+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep);
+maddr_t gnttab_dma_map_page(struct page *page);
+
+static inline void gnttab_dma_unmap_page(maddr_t mfn)
+{
+}
+
+static inline void gnttab_reset_grant_page(struct page *page)
+{
+	init_page_count(page);
+	reset_page_mapcount(page);
+}
+
 int gnttab_suspend(void);
 int gnttab_resume(void);

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [3/3] [NET] back: Add lazy copying
  2007-05-29  4:55 [1/3] [XEN] gnttab: Add new op unmap_and_replace Herbert Xu
  2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
@ 2007-05-29  4:55 ` Herbert Xu
  2007-05-30 11:50 ` [1/3] [XEN] gnttab: Add new op unmap_and_replace Keir Fraser
  2 siblings, 0 replies; 29+ messages in thread
From: Herbert Xu @ 2007-05-29  4:55 UTC (permalink / raw)
  To: Keir Fraser, Xen Development Mailing List

Hi:

[NET] back: Add lazy copying

This patch adds lazy copying using the new unmap_and_replace grant
table operation.

We keep a list of pending entries sorted by arrival order.  We'll
process this list every time net_tx_action is invoked.  We ensure
that net_tx_action is invoked within one second of the arrival of
the first packet in the list.

When we process the list any entry that has been around for more
than half a second is copied.  This allows up to free the grant
table entry and return it to domU.

If the new grant table operation is not available (e.g., old HV
or architectures that don't support it yet) we simply copy each
packet as we receive them using skb_linearize.  We also disable
SG/TSO if this is the case.

By default the new code is disabled.  In order to enable it,
the module needs to be loaded with the argument copy_skb=1.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> 

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Wed May 16 22:31:20 2007 +1000
@@ -114,6 +114,14 @@ typedef struct netif_st {
 #define netback_carrier_off(netif)	((netif)->carrier = 0)
 #define netback_carrier_ok(netif)	((netif)->carrier)
 
+enum {
+	NETBK_DONT_COPY_SKB,
+	NETBK_DELAYED_COPY_SKB,
+	NETBK_ALWAYS_COPY_SKB,
+};
+
+extern int netbk_copy_skb_mode;
+
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed May 16 22:31:20 2007 +1000
@@ -49,6 +49,11 @@ struct netbk_rx_meta {
 	int copy:1;
 };
 
+struct netbk_tx_pending_inuse {
+	struct list_head list;
+	unsigned long alloc_time;
+};
+
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
 static void make_tx_response(netif_t *netif, 
@@ -68,15 +73,21 @@ static DECLARE_TASKLET(net_rx_tasklet, n
 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
 
 static struct timer_list net_timer;
+static struct timer_list netbk_tx_pending_timer;
 
 #define MAX_PENDING_REQS 256
 
 static struct sk_buff_head rx_queue;
 
 static struct page **mmap_pages;
+static inline unsigned long idx_to_pfn(unsigned int idx)
+{
+	return page_to_pfn(mmap_pages[idx]);
+}
+
 static inline unsigned long idx_to_kaddr(unsigned int idx)
 {
-	return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
 }
 
 #define PKT_PROT_LEN 64
@@ -95,6 +106,10 @@ static u16 dealloc_ring[MAX_PENDING_REQS
 static u16 dealloc_ring[MAX_PENDING_REQS];
 static PEND_RING_IDX dealloc_prod, dealloc_cons;
 
+/* Doubly-linked list of in-use pending entries. */
+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+static LIST_HEAD(pending_inuse_head);
+
 static struct sk_buff_head tx_queue;
 
 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
@@ -107,6 +122,13 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
+
+/* Setting this allows the safe use of this driver without netloop. */
+static int MODPARM_copy_skb;
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+int netbk_copy_skb_mode;
 
 static inline unsigned long alloc_mfn(void)
 {
@@ -719,6 +741,11 @@ static void net_alarm(unsigned long unus
 	tasklet_schedule(&net_rx_tasklet);
 }
 
+static void netbk_tx_pending_timeout(unsigned long unused)
+{
+	tasklet_schedule(&net_tx_tasklet);
+}
+
 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
 	netif_t *netif = netdev_priv(dev);
@@ -812,46 +839,97 @@ static void tx_credit_callback(unsigned 
 	netif_schedule_work(netif);
 }
 
+static inline int copy_pending_req(PEND_RING_IDX pending_idx)
+{
+	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+				      &mmap_pages[pending_idx]);
+}
+
 inline static void net_tx_action_dealloc(void)
 {
+	struct netbk_tx_pending_inuse *inuse, *n;
 	gnttab_unmap_grant_ref_t *gop;
 	u16 pending_idx;
 	PEND_RING_IDX dc, dp;
 	netif_t *netif;
 	int ret;
+	LIST_HEAD(list);
 
 	dc = dealloc_cons;
-	dp = dealloc_prod;
-
-	/* Ensure we see all indexes enqueued by netif_idx_release(). */
-	smp_rmb();
+	gop = tx_unmap_ops;
 
 	/*
 	 * Free up any grants we have finished using
 	 */
-	gop = tx_unmap_ops;
-	while (dc != dp) {
-		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-		gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-				    GNTMAP_host_map,
-				    grant_tx_handle[pending_idx]);
-		gop++;
-	}
+	do {
+		dp = dealloc_prod;
+
+		/* Ensure we see all indices enqueued by netif_idx_release(). */
+		smp_rmb();
+
+		while (dc != dp) {
+			unsigned long pfn;
+
+			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+			list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+			pfn = idx_to_pfn(pending_idx);
+			/* Already unmapped? */
+			if (!phys_to_machine_mapping_valid(pfn))
+				continue;
+
+			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
+					    GNTMAP_host_map,
+					    grant_tx_handle[pending_idx]);
+			gop++;
+		}
+
+		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+		    list_empty(&pending_inuse_head))
+			break;
+
+		/* Copy any entries that have been pending for too long. */
+		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
+			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+				break;
+
+			switch (copy_pending_req(inuse - pending_inuse)) {
+			case 0:
+				list_move_tail(&inuse->list, &list);
+				continue;
+			case -EBUSY:
+				list_del_init(&inuse->list);
+				continue;
+			case -ENOENT:
+				continue;
+			}
+
+			break;
+		}
+	} while (dp != dealloc_prod);
+
+	dealloc_cons = dc;
+
 	ret = HYPERVISOR_grant_table_op(
 		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
 	BUG_ON(ret);
 
-	while (dealloc_cons != dp) {
-		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+	list_for_each_entry_safe(inuse, n, &list, list) {
+		pending_idx = inuse - pending_inuse;
 
 		netif = pending_tx_info[pending_idx].netif;
 
 		make_tx_response(netif, &pending_tx_info[pending_idx].req, 
 				 NETIF_RSP_OKAY);
 
+		/* Ready for next use. */
+		gnttab_reset_grant_page(mmap_pages[pending_idx]);
+
 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 
 		netif_put(netif);
+
+		list_del_init(&inuse->list);
 	}
 }
 
@@ -1023,6 +1101,11 @@ static void netbk_fill_frags(struct sk_b
 		unsigned long pending_idx;
 
 		pending_idx = (unsigned long)frag->page;
+
+		pending_inuse[pending_idx].alloc_time = jiffies;
+		list_add_tail(&pending_inuse[pending_idx].list,
+			      &pending_inuse_head);
+
 		txp = &pending_tx_info[pending_idx].req;
 		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
 		frag->size = txp->size;
@@ -1311,8 +1394,24 @@ static void net_tx_action(unsigned long 
 		netif->stats.rx_bytes += skb->len;
 		netif->stats.rx_packets++;
 
+		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+		    unlikely(skb_linearize(skb))) {
+			DPRINTK("Can't linearize skb in net_tx_action.\n");
+			kfree_skb(skb);
+			continue;
+		}
+
 		netif_rx(skb);
 		netif->dev->last_rx = jiffies;
+	}
+
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&pending_inuse_head)) {
+		struct netbk_tx_pending_inuse *oldest;
+
+		oldest = list_entry(pending_inuse_head.next,
+				    struct netbk_tx_pending_inuse, list);
+		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
 	}
 }
 
@@ -1333,9 +1432,6 @@ static void netif_idx_release(u16 pendin
 
 static void netif_page_release(struct page *page)
 {
-	/* Ready for next use. */
-	init_page_count(page);
-
 	netif_idx_release(netif_page_index(page));
 }
 
@@ -1457,6 +1553,10 @@ static int __init netback_init(void)
 	net_timer.data = 0;
 	net_timer.function = net_alarm;
 
+	init_timer(&netbk_tx_pending_timer);
+	netbk_tx_pending_timer.data = 0;
+	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+
 	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
 	if (mmap_pages == NULL) {
 		printk("%s: out of memory\n", __FUNCTION__);
@@ -1467,6 +1567,7 @@ static int __init netback_init(void)
 		page = mmap_pages[i];
 		SetPageForeign(page, netif_page_release);
 		netif_page_index(page) = i;
+		INIT_LIST_HEAD(&pending_inuse[i].list);
 	}
 
 	pending_cons = 0;
@@ -1476,6 +1577,15 @@ static int __init netback_init(void)
 
 	spin_lock_init(&net_schedule_list_lock);
 	INIT_LIST_HEAD(&net_schedule_list);
+
+	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+	if (MODPARM_copy_skb) {
+		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					      NULL, 0))
+			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+		else
+			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+	}
 
 	netif_xenbus_init();
 
diff -r 3ef0510e44d0 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Tue May 08 10:21:23 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed May 16 22:31:20 2007 +1000
@@ -62,6 +62,7 @@ static int netback_probe(struct xenbus_d
 	const char *message;
 	struct xenbus_transaction xbt;
 	int err;
+	int sg;
 	struct backend_info *be = kzalloc(sizeof(struct backend_info),
 					  GFP_KERNEL);
 	if (!be) {
@@ -73,6 +74,10 @@ static int netback_probe(struct xenbus_d
 	be->dev = dev;
 	dev->dev.driver_data = be;
 
+	sg = 1;
+	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+		sg = 0;
+
 	do {
 		err = xenbus_transaction_start(&xbt);
 		if (err) {
@@ -80,14 +85,14 @@ static int netback_probe(struct xenbus_d
 			goto fail;
 		}
 
-		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
 		if (err) {
 			message = "writing feature-sg";
 			goto abort_transaction;
 		}
 
 		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-				    "%d", 1);
+				    "%d", sg);
 		if (err) {
 			message = "writing feature-gso-tcpv4";
 			goto abort_transaction;

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-05-29  4:55 [1/3] [XEN] gnttab: Add new op unmap_and_replace Herbert Xu
  2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
  2007-05-29  4:55 ` [3/3] [NET] back: Add lazy copying Herbert Xu
@ 2007-05-30 11:50 ` Keir Fraser
  2007-05-30 13:09   ` Herbert Xu
  2007-06-03  2:01   ` Herbert Xu
  2 siblings, 2 replies; 29+ messages in thread
From: Keir Fraser @ 2007-05-30 11:50 UTC (permalink / raw)
  To: Herbert Xu, Xen Development Mailing List

On 29/5/07 05:55, "Herbert Xu" <herbert@gondor.apana.org.au> wrote:

> Here is a repost of the patches to eventually get rid of netloop.
> As stated last time I've added code to deal with outstanding DMA
> transfers when copying grant pages in the backend.  That code is
> in the 2nd patch of the series.

All applied now, thanks. Will you provide a patch to fix our network scripts
to get rid of netloop setup, and remove netloop from our kernel patches (or
at least disable in default configs)?

 Thanks,
 Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-05-30 11:50 ` [1/3] [XEN] gnttab: Add new op unmap_and_replace Keir Fraser
@ 2007-05-30 13:09   ` Herbert Xu
  2007-06-03  2:01   ` Herbert Xu
  1 sibling, 0 replies; 29+ messages in thread
From: Herbert Xu @ 2007-05-30 13:09 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List

On Wed, May 30, 2007 at 12:50:35PM +0100, Keir Fraser wrote:
>
> All applied now, thanks. Will you provide a patch to fix our network scripts
> to get rid of netloop setup, and remove netloop from our kernel patches (or
> at least disable in default configs)?

Sure, I'll rediff the one that we've been using so far for xen-unstable.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-05-30 11:50 ` [1/3] [XEN] gnttab: Add new op unmap_and_replace Keir Fraser
  2007-05-30 13:09   ` Herbert Xu
@ 2007-06-03  2:01   ` Herbert Xu
  2007-06-04 12:29     ` Keir Fraser
  1 sibling, 1 reply; 29+ messages in thread
From: Herbert Xu @ 2007-06-03  2:01 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Daniel P. Berrange

On Wed, May 30, 2007 at 12:50:35PM +0100, Keir Fraser wrote:
> 
> All applied now, thanks. Will you provide a patch to fix our network scripts
> to get rid of netloop setup, and remove netloop from our kernel patches (or
> at least disable in default configs)?

Here it is.  BTW, the public xen-unstable tree is still dated May 17
so if any changes have been made since then in your tree then this may
not apply cleanly.

[NET] Remove netloop and make copy_skb the default

This patch changes the default setting of copy_skb to true.
It also removes the netloop device from the Xen bridge setup.

These two changes can be used without each other with little
impact.  Having only the copy_skb change means a slight overhead
in that we may copy things twice.  Having only the latter means
that packets may be held indefinitely in dom0.  However, that
can already happen anyway for packets delayed on the way to a
physical NIC rather than delayed after going through netloop,
as well as setups which do not use bridging at all.

The scripts are partly based on work by Daniel P. Berrange.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r d83621c3a6cc linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Sun Jun 03 11:49:01 2007 +1000
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Sun Jun 03 11:56:31 2007 +1000
@@ -124,7 +124,7 @@ static unsigned int alloc_index = 0;
 static unsigned int alloc_index = 0;
 
 /* Setting this allows the safe use of this driver without netloop. */
-static int MODPARM_copy_skb;
+static int MODPARM_copy_skb = 1;
 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
 
diff -r d83621c3a6cc tools/examples/network-bridge
--- a/tools/examples/network-bridge	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/examples/network-bridge	Sun Jun 03 11:56:31 2007 +1000
@@ -5,9 +5,10 @@
 # The script name to use is defined in /etc/xen/xend-config.sxp
 # in the network-script field.
 #
-# This script creates a bridge (default xenbr${vifnum}), adds a device
-# (default eth${vifnum}) to it, copies the IP addresses from the device
-# to the bridge and adjusts the routes accordingly.
+# This script creates a bridge (default ${netdev}), adds a device
+# (defaults to the device on the default gateway route) to it, copies
+# the IP addresses from the device to the bridge and adjusts the routes
+# accordingly.
 #
 # If all goes well, this should ensure that networking stays up.
 # However, some configurations are upset by this, especially
@@ -20,31 +21,27 @@
 #
 # Vars:
 #
-# vifnum     Virtual device number to use (default 0). Numbers >=8
-#            require the netback driver to have nloopbacks set to a
-#            higher value than its default of 8.
-# bridge     The bridge to use (default xenbr${vifnum}).
-# netdev     The interface to add to the bridge (default eth${vifnum}).
+# bridge     The bridge to use (default ${netdev}).
+# netdev     The interface to add to the bridge (default gateway device).
 # antispoof  Whether to use iptables to prevent spoofing (default no).
 #
 # Internal Vars:
 # pdev="p${netdev}"
-# vdev="veth${vifnum}"
-# vif0="vif0.${vifnum}"
+# tdev=tmpbridge
 #
 # start:
-# Creates the bridge
-# Copies the IP and MAC addresses from netdev to vdev
+# Creates the bridge as tdev
+# Copies the IP and MAC addresses from pdev to bridge
 # Renames netdev to be pdev 
-# Renames vdev to be netdev 
-# Enslaves pdev, vdev to bridge
+# Renames tdev to bridge
+# Enslaves pdev to bridge
 #
 # stop:
-# Removes netdev from the bridge
-# Transfers addresses, routes from netdev to pdev
-# Renames netdev to vdev
+# Removes pdev from the bridge
+# Transfers addresses, routes from bridge to pdev
+# Renames bridge to tdev
 # Renames pdev to netdev 
-# Deletes bridge
+# Deletes tdev
 #
 # status:
 # Print addresses, interfaces, routes
@@ -59,15 +56,13 @@ findCommand "$@"
 findCommand "$@"
 evalVariables "$@"
 
-vifnum=${vifnum:-$(ip route list | awk '/^default / { print $NF }' | sed 's/^[^0-9]*//')}
-vifnum=${vifnum:-0}
-bridge=${bridge:-xenbr${vifnum}}
-netdev=${netdev:-eth${vifnum}}
+netdev=${netdev:-$(ip route list | awk '/^default / { print $NF }' |
+		   sed 's/.* dev //')}
+bridge=${bridge:-${netdev}}
 antispoof=${antispoof:-no}
 
 pdev="p${netdev}"
-vdev="veth${vifnum}"
-vif0="vif0.${vifnum}"
+tdev=tmpbridge
 
 get_ip_info() {
     addr_pfx=`ip addr show dev $1 | egrep '^ *inet' | sed -e 's/ *inet //' -e 's/ .*//'`
@@ -157,7 +152,6 @@ antispoofing () {
     iptables -P FORWARD DROP
     iptables -F FORWARD
     iptables -A FORWARD -m physdev --physdev-in ${pdev} -j ACCEPT
-    iptables -A FORWARD -m physdev --physdev-in ${vif0} -j ACCEPT
 }
 
 # Usage: show_status dev bridge
@@ -184,53 +178,27 @@ op_start () {
     fi
 
     if link_exists "$pdev"; then
-	# The device is already up.
-	return
-    fi
-    if link_exists veth0 && ! link_exists "$vdev"; then
-	echo "
-Link $vdev is missing.
-This may be because you have reached the limit of the number of interfaces
-that the loopback driver supports.  If the loopback driver is a module, you
-may raise this limit by passing it as a parameter (nloopbacks=<N>); if the
-driver is compiled statically into the kernel, then you may set the parameter
-using netloop.nloopbacks=<N> on the domain 0 kernel command line.
-" >&2
-	exit 1
-    fi
-
-    create_bridge ${bridge}
-
-    if link_exists "$vdev"; then
-	mac=`ip link show ${netdev} | grep 'link\/ether' | sed -e 's/.*ether \(..:..:..:..:..:..\).*/\1/'`
-	preiftransfer ${netdev}
-	transfer_addrs ${netdev} ${vdev}
-	if ! ifdown ${netdev}; then
-	    # If ifdown fails, remember the IP details.
-	    get_ip_info ${netdev}
-	    ip link set ${netdev} down
-	    ip addr flush ${netdev}
-	fi
-	ip link set ${netdev} name ${pdev}
-	ip link set ${vdev} name ${netdev}
-
-	setup_bridge_port ${pdev}
-	setup_bridge_port ${vif0}
-	ip link set ${netdev} addr ${mac} arp on
-
-	ip link set ${bridge} up
-	add_to_bridge  ${bridge} ${vif0}
-	add_to_bridge2 ${bridge} ${pdev}
-	do_ifup ${netdev}
-    else
-	ip link set ${bridge} arp on
-	ip link set ${bridge} multicast on
-	# old style without ${vdev}
-	transfer_addrs  ${netdev} ${bridge}
-	transfer_routes ${netdev} ${bridge}
-	# Attach the real interface to the bridge.
-	add_to_bridge ${bridge} ${netdev}
-    fi
+        # The device is already up.
+        return
+    fi
+
+    create_bridge ${tdev}
+
+    preiftransfer ${netdev}
+    transfer_addrs ${netdev} ${tdev}
+    if ! ifdown ${netdev}; then
+	# If ifdown fails, remember the IP details.
+	get_ip_info ${netdev}
+	ip link set ${netdev} down
+	ip addr flush ${netdev}
+    fi
+    ip link set ${netdev} name ${pdev}
+    ip link set ${tdev} name ${bridge}
+
+    setup_bridge_port ${pdev}
+
+    add_to_bridge2 ${bridge} ${pdev}
+    do_ifup ${bridge}
 
     if [ ${antispoof} = 'yes' ] ; then
 	antispoofing
@@ -245,31 +213,21 @@ op_stop () {
 	return
     fi
 
-    if link_exists "$pdev"; then
-	ip link set dev ${vif0} down
-	mac=`ip link show ${netdev} | grep 'link\/ether' | sed -e 's/.*ether \(..:..:..:..:..:..\).*/\1/'`
-	transfer_addrs ${netdev} ${pdev}
-	if ! ifdown ${netdev}; then
-	    get_ip_info ${netdev}
-	fi
-	ip link set ${netdev} down arp off
-	ip link set ${netdev} addr fe:ff:ff:ff:ff:ff
-	ip link set ${pdev} down
-	ip addr flush ${netdev}
-	ip link set ${pdev} addr ${mac} arp on
-
-	brctl delif ${bridge} ${pdev}
-	brctl delif ${bridge} ${vif0}
-	ip link set ${bridge} down
-
-	ip link set ${netdev} name ${vdev}
-	ip link set ${pdev} name ${netdev}
-	do_ifup ${netdev}
-    else
-	transfer_routes ${bridge} ${netdev}
-	ip link set ${bridge} down
-    fi
-    brctl delbr ${bridge}
+    transfer_addrs ${bridge} ${pdev}
+    if ! ifdown ${bridge}; then
+	get_ip_info ${bridge}
+    fi
+    ip link set ${pdev} down
+    ip addr flush ${bridge}
+
+    brctl delif ${bridge} ${pdev}
+    ip link set ${bridge} down
+
+    ip link set ${bridge} name ${tdev}
+    ip link set ${pdev} name ${netdev}
+    do_ifup ${netdev}
+
+    brctl delbr ${tdev}
 }
 
 # adds $dev to $bridge but waits for $dev to be in running state first
diff -r d83621c3a6cc tools/examples/vif-bridge
--- a/tools/examples/vif-bridge	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/examples/vif-bridge	Sun Jun 03 11:56:31 2007 +1000
@@ -44,6 +44,32 @@ then
   then
      fatal "Could not find bridge, and none was specified"
   fi
+else
+  #
+  # Old style bridge setup with netloop, used to have a bridge name
+  # of xenbrX, enslaving pethX and vif0.X, and then configuring
+  # eth0.
+  #
+  # New style bridge setup does not use netloop, so the bridge name
+  # is ethX and the physical device is enslaved pethX
+  #
+  # So if...
+  #
+  #   - User asks for xenbrX
+  #   - AND xenbrX doesn't exist
+  #   - AND there is a ethX device which is a bridge
+  #
+  # ..then we translate xenbrX to ethX
+  #
+  # This lets old config files work without modification
+  #
+  if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
+  then
+     if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
+     then
+        bridge="eth${bridge#xenbr}"
+     fi
+  fi
 fi
 
 RET=0
@@ -68,7 +94,7 @@ handle_iptable
 handle_iptable
 
 log debug "Successful vif-bridge $command for $vif, bridge $bridge."
-if [ "$command" = "online" ]
+if [ "$command" == "online" ]
 then
   success
 fi
diff -r d83621c3a6cc tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/examples/xen-network-common.sh	Sun Jun 03 11:56:31 2007 +1000
@@ -90,8 +90,6 @@ find_dhcpd_init_file()
 }
 
 # configure interfaces which act as pure bridge ports:
-#  - make quiet: no arp, no multicast (ipv6 autoconf)
-#  - set mac address to fe:ff:ff:ff:ff:ff
 setup_bridge_port() {
     local dev="$1"
 
@@ -99,9 +97,6 @@ setup_bridge_port() {
     ip link set ${dev} down
 
     # ... and configure it
-    ip link set ${dev} arp off
-    ip link set ${dev} multicast off
-    ip link set ${dev} addr fe:ff:ff:ff:ff:ff
     ip addr flush ${dev}
 }
 
@@ -114,15 +109,7 @@ create_bridge () {
 	brctl addbr ${bridge}
 	brctl stp ${bridge} off
 	brctl setfd ${bridge} 0
-        ip link set ${bridge} arp off
-        ip link set ${bridge} multicast off
     fi
-
-    # A small MTU disables IPv6 (and therefore IPv6 addrconf).
-    mtu=$(ip link show ${bridge} | sed -n 's/.* mtu \([0-9]\+\).*/\1/p')
-    ip link set ${bridge} mtu 68
-    ip link set ${bridge} up
-    ip link set ${bridge} mtu ${mtu:-1500}
 }
 
 # Usage: add_to_bridge bridge dev
diff -r d83621c3a6cc tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/examples/xend-config.sxp	Sun Jun 03 11:56:31 2007 +1000
@@ -116,9 +116,7 @@
 ##
 # To bridge network traffic, like this:
 #
-# dom0: fake eth0 -> vif0.0 -+
-#                            |
-#                          bridge -> real eth0 -> the network
+# dom0: ----------------- bridge -> real eth0 -> the network
 #                            |
 # domU: fake eth0 -> vifN.0 -+
 #
diff -r d83621c3a6cc tools/ioemu/patches/qemu-target-i386-dm
--- a/tools/ioemu/patches/qemu-target-i386-dm	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/ioemu/patches/qemu-target-i386-dm	Sun Jun 03 11:56:31 2007 +1000
@@ -1405,8 +1405,8 @@ Index: ioemu/target-i386-dm/qemu-ifup
 Index: ioemu/target-i386-dm/qemu-ifup
 ===================================================================
 --- /dev/null	1970-01-01 00:00:00.000000000 +0000
-+++ ioemu/target-i386-dm/qemu-ifup	2007-05-11 10:01:09.000000000 +0100
-@@ -0,0 +1,9 @@
++++ ioemu/target-i386-dm/qemu-ifup	2007-06-03 11:50:25.000000000 +1000
+@@ -0,0 +1,37 @@
 +#!/bin/sh
 +
 +#. /etc/rc.d/init.d/functions
@@ -1414,5 +1414,33 @@ Index: ioemu/target-i386-dm/qemu-ifup
 +
 +echo 'config qemu network with xen bridge for ' $*
 +
++bridge=$2
++
++#
++# Old style bridge setup with netloop, used to have a bridge name
++# of xenbrX, enslaving pethX and vif0.X, and then configuring
++# eth0.
++#
++# New style bridge setup does not use netloop, so the bridge name
++# is ethX and the physical device is enslaved pethX
++#
++# So if...
++#
++#   - User asks for xenbrX
++#   - AND xenbrX doesn't exist
++#   - AND there is a ethX device which is a bridge
++#
++# ..then we translate xenbrX to ethX
++#
++# This lets old config files work without modification
++#
++if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
++then
++   if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
++   then
++      bridge="eth${bridge#xenbr}"
++   fi
++fi
++
 +ifconfig $1 0.0.0.0 up
-+brctl addif $2 $1
++brctl addif $bridge $1
diff -r d83621c3a6cc tools/ioemu/target-i386-dm/qemu-ifup
--- a/tools/ioemu/target-i386-dm/qemu-ifup	Sun Jun 03 11:49:01 2007 +1000
+++ b/tools/ioemu/target-i386-dm/qemu-ifup	Sun Jun 03 11:56:31 2007 +1000
@@ -5,5 +5,33 @@
 
 echo 'config qemu network with xen bridge for ' $*
 
+bridge=$2
+
+#
+# Old style bridge setup with netloop, used to have a bridge name
+# of xenbrX, enslaving pethX and vif0.X, and then configuring
+# eth0.
+#
+# New style bridge setup does not use netloop, so the bridge name
+# is ethX and the physical device is enslaved pethX
+#
+# So if...
+#
+#   - User asks for xenbrX
+#   - AND xenbrX doesn't exist
+#   - AND there is a ethX device which is a bridge
+#
+# ..then we translate xenbrX to ethX
+#
+# This lets old config files work without modification
+#
+if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
+then
+   if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
+   then
+      bridge="eth${bridge#xenbr}"
+   fi
+fi
+
 ifconfig $1 0.0.0.0 up
-brctl addif $2 $1
+brctl addif $bridge $1

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-03  2:01   ` Herbert Xu
@ 2007-06-04 12:29     ` Keir Fraser
  2007-06-04 12:39       ` Herbert Xu
  2007-06-04 13:12       ` Daniel P. Berrange
  0 siblings, 2 replies; 29+ messages in thread
From: Keir Fraser @ 2007-06-04 12:29 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Daniel P. Berrange


> [NET] Remove netloop and make copy_skb the default

Applied, thanks. But there still seem to be references to xenbr0 in the
tools/ directory. And when I start xend I end up with two bridges, one
called eth0 (which I think is the proper one?) and also a xenbr0 (probably
bogus?). PV guests seem to by default correctly attach to the eth0 bridge,
but HVM guests are ending up on this goes-nowhere xenbr0 bridge. Probably
because it's still the hardcoded default bridge in qemu-dm?

 -- Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 12:29     ` Keir Fraser
@ 2007-06-04 12:39       ` Herbert Xu
  2007-06-04 13:15         ` Keir Fraser
  2007-06-04 13:12       ` Daniel P. Berrange
  1 sibling, 1 reply; 29+ messages in thread
From: Herbert Xu @ 2007-06-04 12:39 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Daniel P. Berrange

On Mon, Jun 04, 2007 at 01:29:27PM +0100, Keir Fraser wrote:
> 
> > [NET] Remove netloop and make copy_skb the default
> 
> Applied, thanks. But there still seem to be references to xenbr0 in the
> tools/ directory. And when I start xend I end up with two bridges, one
> called eth0 (which I think is the proper one?) and also a xenbr0 (probably
> bogus?). PV guests seem to by default correctly attach to the eth0 bridge,
> but HVM guests are ending up on this goes-nowhere xenbr0 bridge. Probably
> because it's still the hardcoded default bridge in qemu-dm?

That's weird.

There is only one call to 'brctl addbr' and that's via network-bridge.
So if xend's only been started once then I can't see how you could end
up with two bridges.

Can you add a set -x to xen-network-common.sh to see who's creating
two bridges?

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 12:29     ` Keir Fraser
  2007-06-04 12:39       ` Herbert Xu
@ 2007-06-04 13:12       ` Daniel P. Berrange
  2007-06-04 13:18         ` Keir Fraser
  1 sibling, 1 reply; 29+ messages in thread
From: Daniel P. Berrange @ 2007-06-04 13:12 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Herbert Xu

On Mon, Jun 04, 2007 at 01:29:27PM +0100, Keir Fraser wrote:
> 
> > [NET] Remove netloop and make copy_skb the default
> 
> Applied, thanks. But there still seem to be references to xenbr0 in the
> tools/ directory. And when I start xend I end up with two bridges, one
> called eth0 (which I think is the proper one?) and also a xenbr0 (probably
> bogus?). PV guests seem to by default correctly attach to the eth0 bridge,
> but HVM guests are ending up on this goes-nowhere xenbr0 bridge. Probably
> because it's still the hardcoded default bridge in qemu-dm?

Is that XenD itself keeping around bogus cached state about your network
and re-creating xenbr0 from this ?  Try rm -rf on /var/lib/xend/state
and reboot. The only places we reference xenbr0 is in the vif-bridge
and qemu-ifup scripts where we do auto-translation from xenbr0 to eth0
for back-compatability with old guest configs.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 12:39       ` Herbert Xu
@ 2007-06-04 13:15         ` Keir Fraser
  2007-06-04 13:22           ` Daniel P. Berrange
  0 siblings, 1 reply; 29+ messages in thread
From: Keir Fraser @ 2007-06-04 13:15 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Daniel P. Berrange

On 4/6/07 13:39, "Herbert Xu" <herbert@gondor.apana.org.au> wrote:

> That's weird.
> 
> There is only one call to 'brctl addbr' and that's via network-bridge.
> So if xend's only been started once then I can't see how you could end
> up with two bridges.
> 
> Can you add a set -x to xen-network-common.sh to see who's creating
> two bridges?

It's also invoked by create_bridge() inside xend. So I had to blow away
/var/lib/xend/state and that's fixed the problem.

Anyway, all seems better now except for some hotplug slowness on my test
machine for the first VM I create after rebooting the host (possibly a stale
lock file that I'm having to wait to time out).

However, there *are* still a lot of references to xenbr0 in the tools/
directory, including in our example configs. Should we get rid of these
references, or change to 'bridge=eth0'?

 -- Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 13:12       ` Daniel P. Berrange
@ 2007-06-04 13:18         ` Keir Fraser
  2007-06-04 13:29           ` Daniel P. Berrange
  0 siblings, 1 reply; 29+ messages in thread
From: Keir Fraser @ 2007-06-04 13:18 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Xen Development Mailing List, Herbert Xu

On 4/6/07 14:12, "Daniel P. Berrange" <berrange@redhat.com> wrote:

> Is that XenD itself keeping around bogus cached state about your network
> and re-creating xenbr0 from this ?  Try rm -rf on /var/lib/xend/state
> and reboot. The only places we reference xenbr0 is in the vif-bridge
> and qemu-ifup scripts where we do auto-translation from xenbr0 to eth0
> for back-compatability with old guest configs.

Oh, I see. In that case I think everything is now fine. Perhaps we should
leave the xenbr0 references as is then: at least it's clear we're talking
about a bridge in the contexts where we use it, as opposed to an interface
for local packet delivery.

 -- Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 13:15         ` Keir Fraser
@ 2007-06-04 13:22           ` Daniel P. Berrange
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel P. Berrange @ 2007-06-04 13:22 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Herbert Xu

On Mon, Jun 04, 2007 at 02:15:20PM +0100, Keir Fraser wrote:
> On 4/6/07 13:39, "Herbert Xu" <herbert@gondor.apana.org.au> wrote:
> 
> > That's weird.
> > 
> > There is only one call to 'brctl addbr' and that's via network-bridge.
> > So if xend's only been started once then I can't see how you could end
> > up with two bridges.
> > 
> > Can you add a set -x to xen-network-common.sh to see who's creating
> > two bridges?
> 
> It's also invoked by create_bridge() inside xend. So I had to blow away
> /var/lib/xend/state and that's fixed the problem.
> 
> Anyway, all seems better now except for some hotplug slowness on my test
> machine for the first VM I create after rebooting the host (possibly a stale
> lock file that I'm having to wait to time out).
> 
> However, there *are* still a lot of references to xenbr0 in the tools/
> directory, including in our example configs. Should we get rid of these
> references, or change to 'bridge=eth0'?

Yeah, any examples should be updated to refer to 'eth0' really.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 13:18         ` Keir Fraser
@ 2007-06-04 13:29           ` Daniel P. Berrange
  2007-06-04 14:12             ` Gerd Hoffmann
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel P. Berrange @ 2007-06-04 13:29 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Herbert Xu

On Mon, Jun 04, 2007 at 02:18:13PM +0100, Keir Fraser wrote:
> On 4/6/07 14:12, "Daniel P. Berrange" <berrange@redhat.com> wrote:
> 
> > Is that XenD itself keeping around bogus cached state about your network
> > and re-creating xenbr0 from this ?  Try rm -rf on /var/lib/xend/state
> > and reboot. The only places we reference xenbr0 is in the vif-bridge
> > and qemu-ifup scripts where we do auto-translation from xenbr0 to eth0
> > for back-compatability with old guest configs.
> 
> Oh, I see. In that case I think everything is now fine. Perhaps we should
> leave the xenbr0 references as is then: at least it's clear we're talking
> about a bridge in the contexts where we use it, as opposed to an interface
> for local packet delivery.

The reason we renamed it to 'eth0' in Fedora is so edge closer towards a
single unified network setup that is applicable for any virtualization
tech.  In Fedora 8 we're going to remove network-bridge from the default
config completely and just use regular init scripts for configuring
the bridge. 

eg, have /etc/sysconfig/network-scripts/ifcfg-peth0 defining the physical
interface config thus:

  DEVICE=peth0
  ONBOOT=yes
  BRIDGE=eth0
  HWADDR=XX:XX:XX:XX:XX:XX


And the bridge device config /etc/sysconfig/network-scripts/ifcfg-eth0

  DEVICE=eth0
  BOOTPROTO=dhcp
  ONBOOT=yes
  TYPE=Bridge

This will make it much easier for people who want to use bonding, or VLANs
and any other funky networking config in Dom0, and the same configs will
work for both Xen & KVM. Hence we wanted to have a generic 'eth0' and 'peth0'
rather than 'xenbr0' and 'eth0'. Of course there's plenty of other naming
schemes too, but figured since Xen already had the idea of a 'p' prefix for
identifying a physical device that seemed reasonable to keep.

Regards,
Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 13:29           ` Daniel P. Berrange
@ 2007-06-04 14:12             ` Gerd Hoffmann
  2007-06-05 16:17               ` Keir Fraser
  0 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2007-06-04 14:12 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Xen Development Mailing List, Keir Fraser, Herbert Xu

   Hi,

> This will make it much easier for people who want to use bonding, or VLANs
> and any other funky networking config in Dom0, and the same configs will
> work for both Xen & KVM. Hence we wanted to have a generic 'eth0' and 'peth0'
> rather than 'xenbr0' and 'eth0'. Of course there's plenty of other naming
> schemes too, but figured since Xen already had the idea of a 'p' prefix for
> identifying a physical device that seemed reasonable to keep.

I'd rather name the things what they are.  Name the ethernet interface 
eth0, name the bridge xenbr0 or virtbr0 or just br0.  Drop peth0.  The 
only reason for the renaming was to trick the distro setup scripts by 
doing "ifdown eth0; bridge-setup (with eth0 -> peth0 and veth0 -> eth0); 
ifup eth0".

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-04 14:12             ` Gerd Hoffmann
@ 2007-06-05 16:17               ` Keir Fraser
  2007-06-05 16:36                 ` Daniel P. Berrange
  2007-06-06  7:50                 ` Gerd Hoffmann
  0 siblings, 2 replies; 29+ messages in thread
From: Keir Fraser @ 2007-06-05 16:17 UTC (permalink / raw)
  To: Gerd Hoffmann, Daniel P. Berrange
  Cc: Xen Development Mailing List, Herbert Xu

On 4/6/07 15:12, "Gerd Hoffmann" <kraxel@redhat.com> wrote:

> I'd rather name the things what they are.  Name the ethernet interface
> eth0, name the bridge xenbr0 or virtbr0 or just br0.  Drop peth0.  The
> only reason for the renaming was to trick the distro setup scripts by
> doing "ifdown eth0; bridge-setup (with eth0 -> peth0 and veth0 -> eth0);
> ifup eth0".

That trick still applies though, right? Just with xenbr0->eth0. The decision
to use or not use this trick seems orthogonal to whether or not we have the
veth0/vif0.0 loopback.

 -- Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-05 16:17               ` Keir Fraser
@ 2007-06-05 16:36                 ` Daniel P. Berrange
  2007-06-06  7:54                   ` Gerd Hoffmann
  2007-06-06  7:50                 ` Gerd Hoffmann
  1 sibling, 1 reply; 29+ messages in thread
From: Daniel P. Berrange @ 2007-06-05 16:36 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Gerd Hoffmann, Herbert Xu

On Tue, Jun 05, 2007 at 05:17:34PM +0100, Keir Fraser wrote:
> On 4/6/07 15:12, "Gerd Hoffmann" <kraxel@redhat.com> wrote:
> 
> > I'd rather name the things what they are.  Name the ethernet interface
> > eth0, name the bridge xenbr0 or virtbr0 or just br0.  Drop peth0.  The
> > only reason for the renaming was to trick the distro setup scripts by
> > doing "ifdown eth0; bridge-setup (with eth0 -> peth0 and veth0 -> eth0);
> > ifup eth0".
> 
> That trick still applies though, right? Just with xenbr0->eth0. The decision
> to use or not use this trick seems orthogonal to whether or not we have the
> veth0/vif0.0 loopback.

Yeah, I think the key point was that whatever interface you end up using
to configure the IP addr on should be eth0. So if the scripts configure
the IP on the bridge, then the bridge must be called eth0. This is so
that the admin's post-ifup scripts for eth0 get run correctly.  If you
further changed vif-bridge so that it kept the IP addr on the physical
interface instead, you could keep it called eth0 and name the bridge as
something else. Now that netloop isn't involved, either of those two
IP config scenarios should work fine.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
@ 2007-06-06  6:18   ` Isaku Yamahata
  2007-06-06  8:37     ` Herbert Xu
  2007-06-07  8:48   ` was " Isaku Yamahata
  1 sibling, 1 reply; 29+ messages in thread
From: Isaku Yamahata @ 2007-06-06  6:18 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Keir Fraser

On Tue, May 29, 2007 at 02:55:28PM +1000, Herbert Xu wrote:
> Hi:

Hi Herbert.
I'm now trying to adopt unmap_and_replace on ia64.
I have some issues related to dma tracking.

This patch treats maddr_t as same as dma_addr_t.
But this isn't true for auto trasnlated physmap mode (i.e. IA64).
The grant table api shouldn't be involved in dma address
conversions, I suppose. The conversion should be done in dma api
implementation.
Thus dma api implementation can reduce address conversion.
On ia64, dma address conversion needs hypercall so that we want to 
avoid the conversion as much as possible.


How about introducing something like the followings and
moving gnttab_dma_map_page(), gnttab_dma_unmap_page()
to x86 (or dma api implementation) specific file?

void __gnttab_dma_map_page(stuct page* page)
{
       atomic_set(&page->_mapcount, 0);

       /* This barrier corresponds to the one in gnttab_copy_grant_page. */
       smp_mb();
}

void __gnttab_dma_unmap_page(struct page* page)
{
}


There are some comments below.

> diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c
> --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Tue May 08 10:21:23 2007 +0100
> +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Wed May 16 22:31:20 2007 +1000
   [snip]
> @@ -326,7 +335,8 @@ dma_map_single(struct device *dev, void 
>  	if (swiotlb) {
>  		dma = swiotlb_map_single(dev, ptr, size, direction);
>  	} else {
> -		dma = virt_to_bus(ptr);
> +		dma = gnttab_dma_map_page(virt_to_page(ptr)) +
> +		      offset_in_page(ptr);
>  		IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
>  		IOMMU_BUG_ON(address_needs_mapping(dev, dma));
>  	}
> @@ -344,6 +354,8 @@ dma_unmap_single(struct device *dev, dma
>  		BUG();
>  	if (swiotlb)
>  		swiotlb_unmap_single(dev, dma_addr, size, direction);
> +	else
> +		gnttab_dma_unmap_page(dma_addr);
>  }
>  EXPORT_SYMBOL(dma_unmap_single);

Is it assumed that size <= PAGE_SIZE?
Or should we iterate on the pointer with PAGE_SIZE?
Am I missing anything else?


> diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
> --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Tue May 08 10:21:23 2007 +0100
> +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed May 16 22:31:20 2007 +1000
   ...
> @@ -468,7 +467,8 @@ dma_addr_t
>  dma_addr_t
>  swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
>  {
> -	dma_addr_t dev_addr = virt_to_bus(ptr);
> +	dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
> +			      offset_in_page(ptr);
>  	void *map;
>  	struct phys_addr buffer;
>  
> @@ -486,6 +486,7 @@ swiotlb_map_single(struct device *hwdev,
>  	/*
>  	 * Oh well, have to allocate and map a bounce buffer.
>  	 */
> +	gnttab_dma_unmap_page(dev_addr);
>  	buffer.page   = virt_to_page(ptr);
>  	buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
>  	map = map_single(hwdev, buffer, size, dir);
> @@ -513,6 +514,8 @@ swiotlb_unmap_single(struct device *hwde
>  	BUG_ON(dir == DMA_NONE);
>  	if (in_swiotlb_aperture(dev_addr))
>  		unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
> +	else
> +		gnttab_dma_unmap_page(dev_addr);
>  }

Ditto.


> +/*
> + * Must not be called with IRQs off.  This should only be used on the
> + * slow path.
> + *
> + * Copy a foreign granted page to local memory.
> + */
> +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
> +{
> +	struct gnttab_unmap_and_replace unmap;
> +	mmu_update_t mmu;
> +	struct page *page;
> +	struct page *new_page;
> +	void *new_addr;
> +	void *addr;
> +	paddr_t pfn;
> +	maddr_t mfn;
> +	maddr_t new_mfn;
> +	int err;
> +
> +	page = *pagep;
> +	if (!get_page_unless_zero(page))
> +		return -ENOENT;
> +
> +	err = -ENOMEM;
> +	new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> +	if (!new_page)
> +		goto out;
> +
> +	new_addr = page_address(new_page);
> +	addr = page_address(page);
> +	memcpy(new_addr, addr, PAGE_SIZE);
> +
> +	pfn = page_to_pfn(page);
> +	mfn = pfn_to_mfn(pfn);
> +	new_mfn = virt_to_mfn(new_addr);
> +
> +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +		set_phys_to_machine(pfn, new_mfn);
> +		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
> +
> +		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
> +		mmu.val = pfn;
> +		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
> +		BUG_ON(err);
> +	}
> +
> +	gnttab_set_replace_op(&unmap, (unsigned long)addr,
> +			      (unsigned long)new_addr, ref);
> +
> +	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
> +					&unmap, 1);
> +	BUG_ON(err);
> +	BUG_ON(unmap.status);
> +
> +	new_page->mapping = page->mapping;
> +	new_page->index = page->index;
> +	set_bit(PG_foreign, &new_page->flags);
> +	*pagep = new_page;
> +
> +	SetPageForeign(page, gnttab_page_free);
> +	page->mapping = NULL;
> +
> +	/*
> +	 * Ensure that there is a barrier between setting the p2m entry
> +	 * and checking the map count.  See gnttab_dma_map_page.
> +	 */
> +	smp_mb();
> +
> +	/* Has the page been DMA-mapped? */
> +	if (unlikely(page_mapped(page))) {
> +		err = -EBUSY;
> +		page->mapping = (void *)new_page;

While DMA might be going on. the grant table mapped page is unmapped here.
Since the page isn't referenced by this backend domain from the xen VMM
point of view, the front end domain can be destroyed. (e.g. by xm destroy)
So the page can be freed and reused for other purpose even though
DMA is still being processed.
The new user of the page (probably another domain) can be upset.
Is this true?

Such a case is very rare and it won't be an issue in practice.
I just want to confirm my understanding.


> +	}
> +
> +out:
> +	put_page(page);
> +	return err;
> +
> +}
> +EXPORT_SYMBOL(gnttab_copy_grant_page);

-- 
yamahata

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-05 16:17               ` Keir Fraser
  2007-06-05 16:36                 ` Daniel P. Berrange
@ 2007-06-06  7:50                 ` Gerd Hoffmann
  1 sibling, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2007-06-06  7:50 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen Development Mailing List, Daniel P. Berrange, Herbert Xu

Keir Fraser wrote:
> On 4/6/07 15:12, "Gerd Hoffmann" <kraxel@redhat.com> wrote:
> 
>> I'd rather name the things what they are.  Name the ethernet interface
>> eth0, name the bridge xenbr0 or virtbr0 or just br0.  Drop peth0.  The
>> only reason for the renaming was to trick the distro setup scripts by
>> doing "ifdown eth0; bridge-setup (with eth0 -> peth0 and veth0 -> eth0);
>> ifup eth0".
> 
> That trick still applies though, right? Just with xenbr0->eth0. The decision
> to use or not use this trick seems orthogonal to whether or not we have the
> veth0/vif0.0 loopback.

With the loopback being gone it is _much_ easier to have the 
distribution network scripts setting up the bridge because the setup can 
look the same no matter whenever a xen kernel or a native kernel has 
been booted.

And have the bridge setup explicitly wired up in the distro 
configuration is much better and cleaner than playing tricks with device 
naming IMHO.  That way you don't confuse the distro network setup tools. 
  Also more advanced setups involving vlan and other fancy stuff are 
much easier to handle then.

Thats why I'd like to see all device naming tricks being dropped.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-05 16:36                 ` Daniel P. Berrange
@ 2007-06-06  7:54                   ` Gerd Hoffmann
  2007-06-12 17:10                     ` Keir Fraser
  0 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2007-06-06  7:54 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Xen Development Mailing List, Keir Fraser, Herbert Xu

   Hi,

> Yeah, I think the key point was that whatever interface you end up using
> to configure the IP addr on should be eth0.

That trick is only needed if xend creates the bridge setup instead of 
letting the distro network scripts do that.

cheers,
   Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-06-06  6:18   ` Isaku Yamahata
@ 2007-06-06  8:37     ` Herbert Xu
  2007-06-06 12:35       ` Herbert Xu
  2007-06-07  8:36       ` [2/3] [LINUX] gnttab: Add basic DMA tracking Isaku Yamahata
  0 siblings, 2 replies; 29+ messages in thread
From: Herbert Xu @ 2007-06-06  8:37 UTC (permalink / raw)
  To: Isaku Yamahata; +Cc: Xen Development Mailing List, Keir Fraser

On Wed, Jun 06, 2007 at 03:18:29PM +0900, Isaku Yamahata wrote:
> 
> This patch treats maddr_t as same as dma_addr_t.

Thanks I'll look into this.
 
> > diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c
> > --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Tue May 08 10:21:23 2007 +0100
> > +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Wed May 16 22:31:20 2007 +1000
>    [snip]
> > @@ -326,7 +335,8 @@ dma_map_single(struct device *dev, void 
> >  	if (swiotlb) {
> >  		dma = swiotlb_map_single(dev, ptr, size, direction);
> >  	} else {
> > -		dma = virt_to_bus(ptr);
> > +		dma = gnttab_dma_map_page(virt_to_page(ptr)) +
> > +		      offset_in_page(ptr);
> >  		IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
> >  		IOMMU_BUG_ON(address_needs_mapping(dev, dma));
> >  	}
> > @@ -344,6 +354,8 @@ dma_unmap_single(struct device *dev, dma
> >  		BUG();
> >  	if (swiotlb)
> >  		swiotlb_unmap_single(dev, dma_addr, size, direction);
> > +	else
> > +		gnttab_dma_unmap_page(dma_addr);
> >  }
> >  EXPORT_SYMBOL(dma_unmap_single);
> 
> Is it assumed that size <= PAGE_SIZE?
> Or should we iterate on the pointer with PAGE_SIZE?
> Am I missing anything else?

In this case it's a BUG if the entry crosses a page boundary.

> > diff -r 3ef0510e44d0 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
> > --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Tue May 08 10:21:23 2007 +0100
> > +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed May 16 22:31:20 2007 +1000
>    ...
> > @@ -468,7 +467,8 @@ dma_addr_t
> >  dma_addr_t
> >  swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
> >  {
> > -	dma_addr_t dev_addr = virt_to_bus(ptr);
> > +	dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
> > +			      offset_in_page(ptr);
> >  	void *map;
> >  	struct phys_addr buffer;
> >  
> > @@ -486,6 +486,7 @@ swiotlb_map_single(struct device *hwdev,
> >  	/*
> >  	 * Oh well, have to allocate and map a bounce buffer.
> >  	 */
> > +	gnttab_dma_unmap_page(dev_addr);
> >  	buffer.page   = virt_to_page(ptr);
> >  	buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
> >  	map = map_single(hwdev, buffer, size, dir);
> > @@ -513,6 +514,8 @@ swiotlb_unmap_single(struct device *hwde
> >  	BUG_ON(dir == DMA_NONE);
> >  	if (in_swiotlb_aperture(dev_addr))
> >  		unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
> > +	else
> > +		gnttab_dma_unmap_page(dev_addr);
> >  }
> 
> Ditto.

It either crosses a page boundary, in which case we use the bounce
buffer, or it doesn't and this works fine.

> > +/*
> > + * Must not be called with IRQs off.  This should only be used on the
> > + * slow path.
> > + *
> > + * Copy a foreign granted page to local memory.
> > + */
> > +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
> > +{
> > +	struct gnttab_unmap_and_replace unmap;
> > +	mmu_update_t mmu;
> > +	struct page *page;
> > +	struct page *new_page;
> > +	void *new_addr;
> > +	void *addr;
> > +	paddr_t pfn;
> > +	maddr_t mfn;
> > +	maddr_t new_mfn;
> > +	int err;
> > +
> > +	page = *pagep;
> > +	if (!get_page_unless_zero(page))
> > +		return -ENOENT;
> > +
> > +	err = -ENOMEM;
> > +	new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
> > +	if (!new_page)
> > +		goto out;
> > +
> > +	new_addr = page_address(new_page);
> > +	addr = page_address(page);
> > +	memcpy(new_addr, addr, PAGE_SIZE);
> > +
> > +	pfn = page_to_pfn(page);
> > +	mfn = pfn_to_mfn(pfn);
> > +	new_mfn = virt_to_mfn(new_addr);
> > +
> > +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> > +		set_phys_to_machine(pfn, new_mfn);
> > +		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
> > +
> > +		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
> > +		mmu.val = pfn;
> > +		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
> > +		BUG_ON(err);
> > +	}
> > +
> > +	gnttab_set_replace_op(&unmap, (unsigned long)addr,
> > +			      (unsigned long)new_addr, ref);
> > +
> > +	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
> > +					&unmap, 1);
> > +	BUG_ON(err);
> > +	BUG_ON(unmap.status);
> > +
> > +	new_page->mapping = page->mapping;
> > +	new_page->index = page->index;
> > +	set_bit(PG_foreign, &new_page->flags);
> > +	*pagep = new_page;
> > +
> > +	SetPageForeign(page, gnttab_page_free);
> > +	page->mapping = NULL;
> > +
> > +	/*
> > +	 * Ensure that there is a barrier between setting the p2m entry
> > +	 * and checking the map count.  See gnttab_dma_map_page.
> > +	 */
> > +	smp_mb();
> > +
> > +	/* Has the page been DMA-mapped? */
> > +	if (unlikely(page_mapped(page))) {
> > +		err = -EBUSY;
> > +		page->mapping = (void *)new_page;
> 
> While DMA might be going on. the grant table mapped page is unmapped here.
> Since the page isn't referenced by this backend domain from the xen VMM
> point of view, the front end domain can be destroyed. (e.g. by xm destroy)
> So the page can be freed and reused for other purpose even though
> DMA is still being processed.
> The new user of the page (probably another domain) can be upset.
> Is this true?

Good catch.  If the frontend freed the page we'd be in trouble.  I suppose
we need a better solution.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-06-06  8:37     ` Herbert Xu
@ 2007-06-06 12:35       ` Herbert Xu
  2007-06-07  4:32         ` [LINUX] gnttab: Fix copy_grant_page race with seqlock Herbert Xu
  2007-06-07  8:36       ` [2/3] [LINUX] gnttab: Add basic DMA tracking Isaku Yamahata
  1 sibling, 1 reply; 29+ messages in thread
From: Herbert Xu @ 2007-06-06 12:35 UTC (permalink / raw)
  To: Isaku Yamahata; +Cc: Xen Development Mailing List, Keir Fraser

On Wed, Jun 06, 2007 at 06:37:36PM +1000, Herbert Xu wrote:
>
> > While DMA might be going on. the grant table mapped page is unmapped here.
> > Since the page isn't referenced by this backend domain from the xen VMM
> > point of view, the front end domain can be destroyed. (e.g. by xm destroy)
> > So the page can be freed and reused for other purpose even though
> > DMA is still being processed.
> > The new user of the page (probably another domain) can be upset.
> > Is this true?
> 
> Good catch.  If the frontend freed the page we'd be in trouble.  I suppose
> we need a better solution.

Here's how we can resolve this race condition using a seqlock.  Please
be warned that I haven't even compiled it yet.  I'll post this again
once I've tested it tomorrow.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r bd3d6b4c52ec linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Fri Jun 01 14:50:52 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed Jun 06 22:32:55 2007 +1000
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/seqlock.h>
 #include <xen/interface/xen.h>
 #include <xen/gnttab.h>
 #include <asm/pgtable.h>
@@ -62,6 +63,8 @@ static struct grant_entry *shared;
 static struct grant_entry *shared;
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
+
+static DEFINE_SEQLOCK(gnttab_dma_lock);
 
 static int gnttab_expand(unsigned int req_entries);
 
@@ -492,11 +495,6 @@ static int gnttab_map(unsigned int start
 
 static void gnttab_page_free(struct page *page)
 {
-	if (page->mapping) {
-		put_page((struct page *)page->mapping);
-		page->mapping = NULL;
-	}
-
 	ClearPageForeign(page);
 	gnttab_reset_grant_page(page);
 	put_page(page);
@@ -538,8 +536,30 @@ int gnttab_copy_grant_page(grant_ref_t r
 	mfn = pfn_to_mfn(pfn);
 	new_mfn = virt_to_mfn(new_addr);
 
+	write_seqlock(&gnttab_dma_lock);
+
+	/* Has the page been DMA-mapped? */
+	if (unlikely(page_mapped(page)))
+		write_sequnlock(&gnttab_dma_lock);
+		put_page(new_page);
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		set_phys_to_machine(pfn, new_mfn);
+
+	gnttab_set_replace_op(&unmap, (unsigned long)addr,
+			      (unsigned long)new_addr, ref);
+
+	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					&unmap, 1);
+	BUG_ON(err);
+	BUG_ON(unmap.status);
+
+	write_sequnlock(&gnttab_dma_lock);
+
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		set_phys_to_machine(pfn, new_mfn);
 		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
 
 		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -548,14 +568,6 @@ int gnttab_copy_grant_page(grant_ref_t r
 		BUG_ON(err);
 	}
 
-	gnttab_set_replace_op(&unmap, (unsigned long)addr,
-			      (unsigned long)new_addr, ref);
-
-	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-					&unmap, 1);
-	BUG_ON(err);
-	BUG_ON(unmap.status);
-
 	new_page->mapping = page->mapping;
 	new_page->index = page->index;
 	set_bit(PG_foreign, &new_page->flags);
@@ -564,22 +576,9 @@ int gnttab_copy_grant_page(grant_ref_t r
 	SetPageForeign(page, gnttab_page_free);
 	page->mapping = NULL;
 
-	/*
-	 * Ensure that there is a barrier between setting the p2m entry
-	 * and checking the map count.  See gnttab_dma_map_page.
-	 */
-	smp_mb();
-
-	/* Has the page been DMA-mapped? */
-	if (unlikely(page_mapped(page))) {
-		err = -EBUSY;
-		page->mapping = (void *)new_page;
-	}
-
 out:
 	put_page(page);
 	return err;
-
 }
 EXPORT_SYMBOL(gnttab_copy_grant_page);
 
@@ -593,23 +592,27 @@ EXPORT_SYMBOL(gnttab_copy_grant_page);
  */
 maddr_t gnttab_dma_map_page(struct page *page)
 {
-	maddr_t mfn = pfn_to_mfn(page_to_pfn(page)), mfn2;
+	maddr_t maddr = page_to_bus(page);
+	unsigned int seq;
 
 	if (!PageForeign(page))
-		return mfn << PAGE_SHIFT;
-
-	if (mfn_to_local_pfn(mfn) < max_mapnr)
-		return mfn << PAGE_SHIFT;
-
-	atomic_set(&page->_mapcount, 0);
-
-	/* This barrier corresponds to the one in gnttab_copy_grant_page. */
-	smp_mb();
-
-	/* Has this page been copied in the mean time? */
-	mfn2 = pfn_to_mfn(page_to_pfn(page));
-
-	return mfn2 << PAGE_SHIFT;
+		return maddr;
+
+	do {
+		seq = read_seqbegin(&gnttab_dma_lock);
+		maddr = page_to_bus(page);
+
+		/* Has it become a local MFN? */
+		if (pfn_valid(mfn_to_local_pfn(maddr >> PAGE_SHIFT)))
+			break;
+
+		atomic_set(&page->_mapcount, 0);
+
+		/* Make _mapcount visible before read_seqretry. */
+		smp_mb();
+	} while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
+
+	return maddr;
 }
 
 int gnttab_resume(void)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [LINUX] gnttab: Fix copy_grant_page race with seqlock
  2007-06-06 12:35       ` Herbert Xu
@ 2007-06-07  4:32         ` Herbert Xu
  0 siblings, 0 replies; 29+ messages in thread
From: Herbert Xu @ 2007-06-07  4:32 UTC (permalink / raw)
  To: Isaku Yamahata; +Cc: Xen Development Mailing List, Keir Fraser

On Wed, Jun 06, 2007 at 10:35:26PM +1000, Herbert Xu wrote:
> 
> Here's how we can resolve this race condition using a seqlock.  Please
> be warned that I haven't even compiled it yet.  I'll post this again
> once I've tested it tomorrow.

OK, now tested.

[LINUX] gnttab: Fix copy_grant_page race with seqlock

Previously gnttab_copy_grant_page would always unmap the grant table
entry, even if DMA operations were outstanding.  This would allow a
hostile guest to free a page still used by DMA to the hypervisor.

This patch fixes this by making sure that we don't free the grant table
entry if a DMA operation has taken place.  To achieve this a seqlock is
used to synchronise the DMA operations and copy_grant_page.

The DMA operations use the read side of the seqlock so performance should
be largely unaffected.

Thanks to Isaku Yamahata for noticing the race condition.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff -r bd3d6b4c52ec linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Fri Jun 01 14:50:52 2007 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 07 13:33:35 2007 +1000
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/seqlock.h>
 #include <xen/interface/xen.h>
 #include <xen/gnttab.h>
 #include <asm/pgtable.h>
@@ -62,6 +63,8 @@ static struct grant_entry *shared;
 static struct grant_entry *shared;
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
+
+static DEFINE_SEQLOCK(gnttab_dma_lock);
 
 static int gnttab_expand(unsigned int req_entries);
 
@@ -492,11 +495,6 @@ static int gnttab_map(unsigned int start
 
 static void gnttab_page_free(struct page *page)
 {
-	if (page->mapping) {
-		put_page((struct page *)page->mapping);
-		page->mapping = NULL;
-	}
-
 	ClearPageForeign(page);
 	gnttab_reset_grant_page(page);
 	put_page(page);
@@ -538,8 +536,33 @@ int gnttab_copy_grant_page(grant_ref_t r
 	mfn = pfn_to_mfn(pfn);
 	new_mfn = virt_to_mfn(new_addr);
 
+	write_seqlock(&gnttab_dma_lock);
+
+	/* Make seq visible before checking page_mapped. */
+	smp_mb();
+
+	/* Has the page been DMA-mapped? */
+	if (unlikely(page_mapped(page))) {
+		write_sequnlock(&gnttab_dma_lock);
+		put_page(new_page);
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		set_phys_to_machine(pfn, new_mfn);
+
+	gnttab_set_replace_op(&unmap, (unsigned long)addr,
+			      (unsigned long)new_addr, ref);
+
+	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					&unmap, 1);
+	BUG_ON(err);
+	BUG_ON(unmap.status);
+
+	write_sequnlock(&gnttab_dma_lock);
+
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		set_phys_to_machine(pfn, new_mfn);
 		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
 
 		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -548,14 +571,6 @@ int gnttab_copy_grant_page(grant_ref_t r
 		BUG_ON(err);
 	}
 
-	gnttab_set_replace_op(&unmap, (unsigned long)addr,
-			      (unsigned long)new_addr, ref);
-
-	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-					&unmap, 1);
-	BUG_ON(err);
-	BUG_ON(unmap.status);
-
 	new_page->mapping = page->mapping;
 	new_page->index = page->index;
 	set_bit(PG_foreign, &new_page->flags);
@@ -564,22 +579,9 @@ int gnttab_copy_grant_page(grant_ref_t r
 	SetPageForeign(page, gnttab_page_free);
 	page->mapping = NULL;
 
-	/*
-	 * Ensure that there is a barrier between setting the p2m entry
-	 * and checking the map count.  See gnttab_dma_map_page.
-	 */
-	smp_mb();
-
-	/* Has the page been DMA-mapped? */
-	if (unlikely(page_mapped(page))) {
-		err = -EBUSY;
-		page->mapping = (void *)new_page;
-	}
-
 out:
 	put_page(page);
 	return err;
-
 }
 EXPORT_SYMBOL(gnttab_copy_grant_page);
 
@@ -593,23 +595,27 @@ EXPORT_SYMBOL(gnttab_copy_grant_page);
  */
 maddr_t gnttab_dma_map_page(struct page *page)
 {
-	maddr_t mfn = pfn_to_mfn(page_to_pfn(page)), mfn2;
+	maddr_t maddr = page_to_bus(page);
+	unsigned int seq;
 
 	if (!PageForeign(page))
-		return mfn << PAGE_SHIFT;
-
-	if (mfn_to_local_pfn(mfn) < max_mapnr)
-		return mfn << PAGE_SHIFT;
-
-	atomic_set(&page->_mapcount, 0);
-
-	/* This barrier corresponds to the one in gnttab_copy_grant_page. */
-	smp_mb();
-
-	/* Has this page been copied in the mean time? */
-	mfn2 = pfn_to_mfn(page_to_pfn(page));
-
-	return mfn2 << PAGE_SHIFT;
+		return maddr;
+
+	do {
+		seq = read_seqbegin(&gnttab_dma_lock);
+		maddr = page_to_bus(page);
+
+		/* Has it become a local MFN? */
+		if (pfn_valid(mfn_to_local_pfn(maddr >> PAGE_SHIFT)))
+			break;
+
+		atomic_set(&page->_mapcount, 0);
+
+		/* Make _mapcount visible before read_seqretry. */
+		smp_mb();
+	} while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
+
+	return maddr;
 }
 
 int gnttab_resume(void)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-06-06  8:37     ` Herbert Xu
  2007-06-06 12:35       ` Herbert Xu
@ 2007-06-07  8:36       ` Isaku Yamahata
  2007-06-07  8:44         ` Isaku Yamahata
  1 sibling, 1 reply; 29+ messages in thread
From: Isaku Yamahata @ 2007-06-07  8:36 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Keir Fraser, xen-ia64-devel

[-- Attachment #1: Type: text/plain, Size: 180 bytes --]

On Wed, Jun 06, 2007 at 06:37:36PM +1000, Herbert Xu wrote:
> > This patch treats maddr_t as same as dma_addr_t.
> Thanks I'll look into this.

How about this patch?

-- 
yamahata

[-- Attachment #2: 15203_9fcca2df865e_separate_dma_address_conversion.patch --]
[-- Type: text/x-diff, Size: 4944 bytes --]

# HG changeset patch
# User yamahata@valinux.co.jp
# Date 1181203954 -32400
# Node ID 9fcca2df865e61f96d6f32eb2b5f1a1aa4fe9099
# Parent  1c4aac883f56c707306b57221c20a0d12549e4ad
separated out dma address conversion part from grant table dma map api
into arch specific file.
sorted out confusing maddr_t with dma_addr_t.
PATCHNAME: separate_dma_address_conversion

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

diff -r 1c4aac883f56 -r 9fcca2df865e linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Thu Jun 07 17:03:56 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Thu Jun 07 17:12:34 2007 +0900
@@ -19,6 +19,7 @@
 #include <asm/swiotlb.h>
 #include <asm/tlbflush.h>
 #include <asm-i386/mach-xen/asm/swiotlb.h>
+#include <asm-i386/mach-xen/asm/gnttab_dma.h>
 #include <asm/bug.h>
 
 #ifdef __x86_64__
diff -r 1c4aac883f56 -r 9fcca2df865e linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Thu Jun 07 17:03:56 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Thu Jun 07 17:12:34 2007 +0900
@@ -27,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <xen/gnttab.h>
 #include <xen/interface/memory.h>
+#include <asm-i386/mach-xen/asm/gnttab_dma.h>
 
 int swiotlb;
 EXPORT_SYMBOL(swiotlb);
diff -r 1c4aac883f56 -r 9fcca2df865e linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 07 17:03:56 2007 +0900
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 07 17:12:34 2007 +0900
@@ -593,20 +593,19 @@ EXPORT_SYMBOL(gnttab_copy_grant_page);
  *
  * All other pages are simply returned as is.
  */
-maddr_t gnttab_dma_map_page(struct page *page)
-{
-	maddr_t maddr = page_to_bus(page);
+void __gnttab_dma_map_page(struct page *page,
+			   int (*local_pfn)(struct page *page))
+{
+	extern seqlock_t gnttab_dma_lock;
 	unsigned int seq;
 
-	if (!PageForeign(page))
-		return maddr;
+	if (!is_running_on_xen() || !PageForeign(page))
+		return;
 
 	do {
 		seq = read_seqbegin(&gnttab_dma_lock);
-		maddr = page_to_bus(page);
-
-		/* Has it become a local MFN? */
-		if (pfn_valid(mfn_to_local_pfn(maddr >> PAGE_SHIFT)))
+
+		if (local_pfn && (*local_pfn)(page))
 			break;
 
 		atomic_set(&page->_mapcount, 0);
@@ -614,8 +613,6 @@ maddr_t gnttab_dma_map_page(struct page 
 		/* Make _mapcount visible before read_seqretry. */
 		smp_mb();
 	} while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
-
-	return maddr;
 }
 
 int gnttab_resume(void)
diff -r 1c4aac883f56 -r 9fcca2df865e linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/gnttab_dma.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/gnttab_dma.h	Thu Jun 07 17:12:34 2007 +0900
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _ASM_I386_GNTTAB_DMA_H
+#define _ASM_I386_GNTTAB_DMA_H
+
+static inline int gnttab_dma_local_pfn(struct page *page)
+{
+	return pfn_valid(mfn_to_local_pfn(pfn_to_mfn(page_to_pfn(page))));
+}
+
+static inline dma_addr_t gnttab_dma_map_page(struct page *page)
+{
+	__gnttab_dma_map_page(page, &gnttab_dma_local_pfn);
+	return page_to_bus(page);
+}
+
+static inline void gnttab_dma_unmap_page(dma_addr_t dma_address)
+{
+	__gnttab_dma_unmap_page(virt_to_page(bus_to_virt(dma_address)));
+}
+
+#endif /* _ASM_I386_GNTTAB_DMA_H */
diff -r 1c4aac883f56 -r 9fcca2df865e linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Thu Jun 07 17:03:56 2007 +0900
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Thu Jun 07 17:12:34 2007 +0900
@@ -103,9 +103,9 @@ void gnttab_grant_foreign_transfer_ref(g
 				       unsigned long pfn);
 
 int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep);
-maddr_t gnttab_dma_map_page(struct page *page);
-
-static inline void gnttab_dma_unmap_page(maddr_t mfn)
+void __gnttab_dma_map_page(struct page *page,
+			   int (*local_pfn)(struct page *page));
+static inline void __gnttab_dma_unmap_page(struct page *page)
 {
 }
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-06-07  8:36       ` [2/3] [LINUX] gnttab: Add basic DMA tracking Isaku Yamahata
@ 2007-06-07  8:44         ` Isaku Yamahata
  0 siblings, 0 replies; 29+ messages in thread
From: Isaku Yamahata @ 2007-06-07  8:44 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Keir Fraser, xen-ia64-devel

[-- Attachment #1: Type: text/plain, Size: 581 bytes --]


I attached the patch of ia64 linux part for xen-ia64-unstable.hg with
your patches.
While this patch isn't for commit yet, you may want to see it.

Thanks,

On Thu, Jun 07, 2007 at 05:36:37PM +0900, Isaku Yamahata wrote:
> On Wed, Jun 06, 2007 at 06:37:36PM +1000, Herbert Xu wrote:
> > > This patch treats maddr_t as same as dma_addr_t.
> > Thanks I'll look into this.
> 
> How about this patch?
> 
> -- 
> yamahata


> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel


-- 
yamahata

[-- Attachment #2: 15175_ce1c94602516_ia64_dma_tracking_support.patch --]
[-- Type: text/x-diff, Size: 12734 bytes --]

# HG changeset patch
# User yamahata@valinux.co.jp
# Date 1181205552 -32400
# Node ID ce1c94602516444bcadd38f9678d8abd0f3147c6
# Parent  ca58c12a9dff5732c5a164f48b9abf1724539b84
ia64 dma tracking support
PATCHNAME: ia64_dma_tracking_support

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
--- a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c	Thu Jun 07 17:39:12 2007 +0900
@@ -42,6 +42,10 @@
 #include <asm/system.h>		/* wmb() */
 
 #include <asm/acpi-ext.h>
+#ifdef CONFIG_XEN
+#include <xen/gnttab.h>
+#include <asm/gnttab_dma.h>
+#endif
 
 #define PFX "IOC: "
 
@@ -901,7 +905,19 @@ sba_map_single(struct device *dev, void 
 	unsigned long flags;
 #endif
 #ifdef ALLOW_IOV_BYPASS
+#ifdef CONFIG_XEN
+	unsigned long pci_addr;
+	void* tmp_addr = addr - PAGE_SIZE;
+	size_t tmp_size = size + PAGE_SIZE;
+	do {
+		tmp_addr += PAGE_SIZE;
+		tmp_size -= PAGE_SIZE;
+		gnttab_dma_use_page(virt_to_page(addr));
+	} while (tmp_size > PAGE_SIZE);
+	pci_addr = virt_to_bus(addr);
+#else
 	unsigned long pci_addr = virt_to_bus(addr);
+#endif
 
 	ASSERT(to_pci_dev(dev)->dma_mask);
 	/*
@@ -994,6 +1010,48 @@ sba_mark_clean(struct ioc *ioc, dma_addr
 }
 #endif
 
+#ifdef CONFIG_XEN
+static void
+sba_gnttab_dma_unmap_page(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+	u32    iovp = (u32) SBA_IOVP(ioc,iova);
+	int    off = PDIR_INDEX(iovp);
+	int    i;
+	size_t step_size = max(PAGE_SIZE, iovp_size);
+
+	if (!is_running_on_xen())
+		return;
+	
+	for (;;) {
+		size_t unmap_pages =
+			(min(step_size, size) + PAGE_SIZE - 1) / PAGE_SIZE;
+		dma_addr_t dma_address = ioc->pdir_base[off] &
+			~0xE000000000000FFFULL;
+		for (i = 0; i < unmap_pages; i++) {
+			gnttab_dma_unmap_page(dma_address);
+			dma_address += PAGE_SIZE;
+		}
+
+		if (size <= step_size)
+			break;
+		off += step_size / iovp_size;
+		size -= step_size;
+	}
+}
+
+static void
+sba_gnttab_dma_use_sg(struct scatterlist *sglist, int nents)
+{
+	int i;
+
+	if (!is_running_on_xen())
+		return;
+
+	for (i = 0; i < nents; i++)
+		gnttab_dma_use_page(sglist[i].page);
+}
+#endif
+
 /**
  * sba_unmap_single - unmap one IOVA and free resources
  * @dev: instance of PCI owned by the driver that's asking.
@@ -1003,7 +1061,11 @@ sba_mark_clean(struct ioc *ioc, dma_addr
  *
  * See Documentation/DMA-mapping.txt
  */
+#ifdef CONFIG_XEN
+static void __sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir, int gnttab_unmap)
+#else
 void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+#endif
 {
 	struct ioc *ioc;
 #if DELAYED_RESOURCE_CNT > 0
@@ -1016,7 +1078,8 @@ void sba_unmap_single(struct device *dev
 	ASSERT(ioc);
 
 #ifdef ALLOW_IOV_BYPASS
-	if (likely((iova & ioc->imask) != ioc->ibase)) {
+	if (likely((iova & ioc->imask) != ioc->ibase) &&
+	    !range_straddles_page_boundary(bus_to_virt(iova), size)) {
 		/*
 		** Address does not fall w/in IOVA, must be bypassing
 		*/
@@ -1027,6 +1090,17 @@ void sba_unmap_single(struct device *dev
 			mark_clean(bus_to_virt(iova), size);
 		}
 #endif
+#ifdef CONFIG_XEN
+		if (gnttab_unmap) {
+			for (;;) {
+				gnttab_dma_unmap_page(iova);
+				if (size <= PAGE_SIZE)
+					break;
+				iova += PAGE_SIZE;
+				size -= PAGE_SIZE;
+			}
+		}
+#endif
 		return;
 	}
 #endif
@@ -1043,7 +1117,11 @@ void sba_unmap_single(struct device *dev
 	if (dir == DMA_FROM_DEVICE)
 		sba_mark_clean(ioc, iova, size);
 #endif
-
+#ifdef CONFIG_XEN
+	if (gnttab_unmap)
+		sba_gnttab_dma_unmap_page(ioc, iova, size);
+#endif
+	
 #if DELAYED_RESOURCE_CNT > 0
 	spin_lock_irqsave(&ioc->saved_lock, flags);
 	d = &(ioc->saved[ioc->saved_cnt]);
@@ -1071,6 +1149,14 @@ void sba_unmap_single(struct device *dev
 #endif /* DELAYED_RESOURCE_CNT == 0 */
 }
 
+#ifdef CONFIG_XEN
+void
+sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+{
+	__sba_unmap_single(dev, iova, size, dir, 1);
+}
+#endif
+
 
 /**
  * sba_alloc_coherent - allocate/map shared mem for DMA
@@ -1427,7 +1513,11 @@ int sba_map_sg(struct device *dev, struc
 	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
 		for (sg = sglist ; filled < nents ; filled++, sg++){
 			sg->dma_length = sg->length;
+#ifdef CONFIG_XEN
+			sg->dma_address = gnttab_dma_map_page(sg->page) + sg->offset;
+#else
 			sg->dma_address = virt_to_bus(sba_sg_address(sg));
+#endif
 		}
 		return filled;
 	}
@@ -1450,6 +1540,10 @@ int sba_map_sg(struct device *dev, struc
 #endif
 
 	prefetch(ioc->res_hint);
+
+#ifdef CONFIG_XEN
+	sba_gnttab_dma_use_sg(sglist, nents);
+#endif
 
 	/*
 	** First coalesce the chunks and allocate I/O pdir space
@@ -1517,8 +1611,12 @@ void sba_unmap_sg (struct device *dev, s
 #endif
 
 	while (nents && sglist->dma_length) {
-
+#ifdef CONFIG_XEN
+		__sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir, 0);
+		__gnttab_dma_unmap_page(sglist->page);
+#else
 		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
+#endif
 		sglist++;
 		nents--;
 	}
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c	Thu Jun 07 17:39:12 2007 +0900
@@ -33,6 +33,8 @@
 #include <linux/bootmem.h>
 
 #ifdef CONFIG_XEN
+#include <xen/gnttab.h>
+#include <asm/gnttab_dma.h>
 /*
  * What DMA mask should Xen use to remap the bounce buffer pool?  Most
  * reports seem to indicate 30 bits is sufficient, except maybe for old
@@ -597,7 +599,7 @@ dma_addr_t
 dma_addr_t
 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
 {
-	unsigned long dev_addr = virt_to_bus(ptr);
+	unsigned long dev_addr = gnttab_dma_map_virt(ptr);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -610,6 +612,7 @@ swiotlb_map_single(struct device *hwdev,
 	    !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
 		return dev_addr;
 
+	__gnttab_dma_unmap_page(virt_to_page(ptr));
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
@@ -672,8 +675,11 @@ swiotlb_unmap_single(struct device *hwde
 	BUG_ON(dir == DMA_NONE);
 	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
 		unmap_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
+	else {
+		gnttab_dma_unmap_page(dev_addr);
+		if (dir == DMA_FROM_DEVICE)
+			mark_clean(dma_addr, size);
+	}
 }
 
 /*
@@ -774,9 +780,11 @@ swiotlb_map_sg(struct device *hwdev, str
 
 	for (i = 0; i < nelems; i++, sg++) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
-		dev_addr = virt_to_bus(addr);
+		dev_addr = gnttab_dma_map_virt(addr);
 		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
-			void *map = map_single(hwdev, addr, sg->length, dir);
+			void *map;
+			gnttab_dma_unmap_page(dev_addr);
+			map = map_single(hwdev, addr, sg->length, dir);
 			sg->dma_address = virt_to_bus(map);
 			if (!map) {
 				/* Don't panic here, we expect map_sg users
@@ -808,8 +816,12 @@ swiotlb_unmap_sg(struct device *hwdev, s
 	for (i = 0; i < nelems; i++, sg++)
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
 			unmap_single(hwdev, (void *) bus_to_virt(sg->dma_address), sg->dma_length, dir);
-		else if (dir == DMA_FROM_DEVICE)
-			mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+		else {
+			gnttab_dma_unmap_page(sg->dma_address);
+			if (dir == DMA_FROM_DEVICE)
+				mark_clean(SG_ENT_VIRT_ADDRESS(sg),
+					   sg->dma_length);
+		}
 }
 
 /*
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c	Thu Jun 07 17:39:12 2007 +0900
@@ -105,6 +105,7 @@ xencommize_grant_table_op(unsigned int c
 	switch (cmd) {
 	case GNTTABOP_map_grant_ref:
 	case GNTTABOP_unmap_grant_ref:
+	case GNTTABOP_unmap_and_replace:
 		break;
 	case GNTTABOP_setup_table:
 	{
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c	Thu Jun 07 17:39:12 2007 +0900
@@ -70,6 +70,9 @@ xencommize_mini_grant_table_op(struct xe
 	case GNTTABOP_unmap_grant_ref:
 		argsize = sizeof(struct gnttab_unmap_grant_ref);
 		break;
+	case GNTTABOP_unmap_and_replace:
+		argsize = sizeof(struct gnttab_unmap_and_replace);
+		break;
 	case GNTTABOP_setup_table:
 	{
 		struct gnttab_setup_table *setup = op;
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c	Thu Jun 07 17:39:12 2007 +0900
@@ -25,6 +25,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <asm/scatterlist.h>
+#include <xen/gnttab.h>
+#include <asm/gnttab_dma.h>
 
 #define IOMMU_BUG_ON(test)					\
 do {								\
@@ -57,7 +59,7 @@ xen_map_sg(struct device *hwdev, struct 
 	int i;
 
 	for (i = 0 ; i < nents ; i++) {
-		sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+		sg[i].dma_address = gnttab_dma_map_page(sg[i].page) + sg[i].offset;
 		sg[i].dma_length  = sg[i].length;
 
 		IOMMU_BUG_ON(address_needs_mapping(hwdev, sg[i].dma_address));
@@ -71,6 +73,9 @@ xen_unmap_sg(struct device *hwdev, struc
 xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
 	     int direction)
 {
+	int i;
+	for (i = 0; i < nents; i++)
+		__gnttab_dma_unmap_page(sg[i].page);
 }
 EXPORT_SYMBOL(xen_unmap_sg);
 
@@ -128,7 +133,7 @@ xen_map_single(struct device *dev, void 
 xen_map_single(struct device *dev, void *ptr, size_t size,
 	       int direction)
 {
-	dma_addr_t dma_addr = virt_to_bus(ptr);
+	dma_addr_t dma_addr = gnttab_dma_map_virt(ptr);
 
 	IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
 	IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
@@ -141,5 +146,6 @@ xen_unmap_single(struct device *dev, dma
 xen_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 		 int direction)
 {
+	gnttab_dma_unmap_page(dma_addr);
 }
 EXPORT_SYMBOL(xen_unmap_single);
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/include/asm-ia64/gnttab_dma.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/gnttab_dma.h	Thu Jun 07 17:39:12 2007 +0900
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _ASM_IA64_GNTTAB_DMA_H
+#define _ASM_IA64_GNTTAB_DMA_H
+
+/* caller must get dma address after calling this function */
+static inline void gnttab_dma_use_page(struct page *page)
+{
+	__gnttab_dma_map_page(page, NULL);
+}
+
+static inline dma_addr_t gnttab_dma_map_page(struct page *page)
+{
+	gnttab_dma_use_page(page);
+	return page_to_bus(page);
+}
+
+static inline dma_addr_t gnttab_dma_map_virt(void *ptr)
+{
+	return gnttab_dma_map_page(virt_to_page(ptr)) + offset_in_page(ptr);
+}
+
+static inline void gnttab_dma_unmap_page(dma_addr_t dma_address)
+{
+	__gnttab_dma_unmap_page(virt_to_page(bus_to_virt(dma_address)));
+}
+
+#endif /* _ASM_IA64_GNTTAB_DMA_H */
diff -r ca58c12a9dff -r ce1c94602516 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h	Thu Jun 07 17:22:17 2007 +0900
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h	Thu Jun 07 17:39:12 2007 +0900
@@ -422,4 +422,7 @@ xencomm_arch_hypercall_fpswa_revision(st
 #define HYPERVISOR_suspend xencomm_hypercall_suspend
 #define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op
 
+/* to compile gnttab_copy_grant_page() in drivers/xen/core/gnttab.c */
+#define HYPERVISOR_mmu_update(req, count, success_count, domid) ({BUG();0;})
+
 #endif /* __HYPERCALL_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* was Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
  2007-06-06  6:18   ` Isaku Yamahata
@ 2007-06-07  8:48   ` Isaku Yamahata
  2007-06-07  8:51     ` Herbert Xu
  1 sibling, 1 reply; 29+ messages in thread
From: Isaku Yamahata @ 2007-06-07  8:48 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Xen Development Mailing List, Keir Fraser, xen-ia64-devel

[-- Attachment #1: Type: text/plain, Size: 333 bytes --]

On Tue, May 29, 2007 at 02:55:28PM +1000, Herbert Xu wrote:
> +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
> +{
...
> +	paddr_t pfn;
> +	maddr_t mfn;
> +	maddr_t new_mfn;

The type of page frame number is unsigned long.
Especially paddr_t isn't defined in ia64 so that it causes compilation error.

-- 
yamahata

[-- Attachment #2: 15202_1c4aac883f56_use_unsigned_long_instead_of_maddr_t_or_paddr_t.patch --]
[-- Type: text/x-diff, Size: 1328 bytes --]

# HG changeset patch
# User yamahata@valinux.co.jp
# Date 1181203436 -32400
# Node ID 1c4aac883f56c707306b57221c20a0d12549e4ad
# Parent  dc6a6311dd6fd530ca0ea8088fe2f56eb5cd02c1
The type of page frame number (pfn or mfn) is unsigned long in other places.
Use unsigned long for consistency.
PATCHNAME: use_unsigned_long_instead_of_maddr_t_or_paddr_t

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

diff -r dc6a6311dd6f -r 1c4aac883f56 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 07 16:55:44 2007 +0900
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 07 17:03:56 2007 +0900
@@ -514,9 +514,9 @@ int gnttab_copy_grant_page(grant_ref_t r
 	struct page *new_page;
 	void *new_addr;
 	void *addr;
-	paddr_t pfn;
-	maddr_t mfn;
-	maddr_t new_mfn;
+	unsigned long pfn;
+	unsigned long mfn;
+	unsigned long new_mfn;
 	int err;
 
 	page = *pagep;
@@ -565,7 +565,7 @@ int gnttab_copy_grant_page(grant_ref_t r
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
 
-		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+		mmu.ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
 		mmu.val = pfn;
 		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
 		BUG_ON(err);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: was Re: [2/3] [LINUX] gnttab: Add basic DMA tracking
  2007-06-07  8:48   ` was " Isaku Yamahata
@ 2007-06-07  8:51     ` Herbert Xu
  0 siblings, 0 replies; 29+ messages in thread
From: Herbert Xu @ 2007-06-07  8:51 UTC (permalink / raw)
  To: Isaku Yamahata; +Cc: Xen Development Mailing List, Keir Fraser, xen-ia64-devel

On Thu, Jun 07, 2007 at 05:48:48PM +0900, Isaku Yamahata wrote:
> 
> The type of page frame number is unsigned long.
> Especially paddr_t isn't defined in ia64 so that it causes compilation error.

unsigned long breaks in a 32-bit guest on a 64-bit host.  Granted
this code isn't currently used in such a situation but it's still
good to be consistent.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-06  7:54                   ` Gerd Hoffmann
@ 2007-06-12 17:10                     ` Keir Fraser
  2007-06-14  7:41                       ` Gerd Hoffmann
  0 siblings, 1 reply; 29+ messages in thread
From: Keir Fraser @ 2007-06-12 17:10 UTC (permalink / raw)
  To: Gerd Hoffmann, Daniel P. Berrange
  Cc: Xen Development Mailing List, Keir Fraser, Herbert Xu

On 6/6/07 08:54, "Gerd Hoffmann" <kraxel@redhat.com> wrote:

>> Yeah, I think the key point was that whatever interface you end up using
>> to configure the IP addr on should be eth0.
> 
> That trick is only needed if xend creates the bridge setup instead of
> letting the distro network scripts do that.

How would we do that in the distro-agnostic Xen tree?

 -- Keir

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-12 17:10                     ` Keir Fraser
@ 2007-06-14  7:41                       ` Gerd Hoffmann
  2007-06-14 11:01                         ` Daniel P. Berrange
  0 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2007-06-14  7:41 UTC (permalink / raw)
  To: Keir Fraser
  Cc: Keir Fraser, Xen Development Mailing List, Daniel P. Berrange,
	Herbert Xu

Keir Fraser wrote:
> On 6/6/07 08:54, "Gerd Hoffmann" <kraxel@redhat.com> wrote:
> 
>>> Yeah, I think the key point was that whatever interface you end up using
>>> to configure the IP addr on should be eth0.
>> That trick is only needed if xend creates the bridge setup instead of
>> letting the distro network scripts do that.
> 
> How would we do that in the distro-agnostic Xen tree?

Documentation and sample config files?

Sure, the down side is that "xend start" alone isn't enougth to make the 
network fly.  But that doesn't work that well, anything more complex 
than "single ethernet interface" isn't covered by the scripts anyway.

cheers,

   Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [1/3] [XEN] gnttab: Add new op unmap_and_replace
  2007-06-14  7:41                       ` Gerd Hoffmann
@ 2007-06-14 11:01                         ` Daniel P. Berrange
  0 siblings, 0 replies; 29+ messages in thread
From: Daniel P. Berrange @ 2007-06-14 11:01 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Xen Development Mailing List, Keir Fraser, Herbert Xu

On Thu, Jun 14, 2007 at 09:41:55AM +0200, Gerd Hoffmann wrote:
> Keir Fraser wrote:
> >On 6/6/07 08:54, "Gerd Hoffmann" <kraxel@redhat.com> wrote:
> >
> >>>Yeah, I think the key point was that whatever interface you end up using
> >>>to configure the IP addr on should be eth0.
> >>That trick is only needed if xend creates the bridge setup instead of
> >>letting the distro network scripts do that.
> >
> >How would we do that in the distro-agnostic Xen tree?
> 
> Documentation and sample config files?
> 
> Sure, the down side is that "xend start" alone isn't enougth to make the 
> network fly.  But that doesn't work that well, anything more complex 
> than "single ethernet interface" isn't covered by the scripts anyway.

Even with a mere single ethernet interface, having 'xend start' mess with 
networking breaks anyone using iSCSI, AOE or NFS root/boot too.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2007-06-14 11:01 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-29  4:55 [1/3] [XEN] gnttab: Add new op unmap_and_replace Herbert Xu
2007-05-29  4:55 ` [2/3] [LINUX] gnttab: Add basic DMA tracking Herbert Xu
2007-06-06  6:18   ` Isaku Yamahata
2007-06-06  8:37     ` Herbert Xu
2007-06-06 12:35       ` Herbert Xu
2007-06-07  4:32         ` [LINUX] gnttab: Fix copy_grant_page race with seqlock Herbert Xu
2007-06-07  8:36       ` [2/3] [LINUX] gnttab: Add basic DMA tracking Isaku Yamahata
2007-06-07  8:44         ` Isaku Yamahata
2007-06-07  8:48   ` was " Isaku Yamahata
2007-06-07  8:51     ` Herbert Xu
2007-05-29  4:55 ` [3/3] [NET] back: Add lazy copying Herbert Xu
2007-05-30 11:50 ` [1/3] [XEN] gnttab: Add new op unmap_and_replace Keir Fraser
2007-05-30 13:09   ` Herbert Xu
2007-06-03  2:01   ` Herbert Xu
2007-06-04 12:29     ` Keir Fraser
2007-06-04 12:39       ` Herbert Xu
2007-06-04 13:15         ` Keir Fraser
2007-06-04 13:22           ` Daniel P. Berrange
2007-06-04 13:12       ` Daniel P. Berrange
2007-06-04 13:18         ` Keir Fraser
2007-06-04 13:29           ` Daniel P. Berrange
2007-06-04 14:12             ` Gerd Hoffmann
2007-06-05 16:17               ` Keir Fraser
2007-06-05 16:36                 ` Daniel P. Berrange
2007-06-06  7:54                   ` Gerd Hoffmann
2007-06-12 17:10                     ` Keir Fraser
2007-06-14  7:41                       ` Gerd Hoffmann
2007-06-14 11:01                         ` Daniel P. Berrange
2007-06-06  7:50                 ` Gerd Hoffmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.