All of lore.kernel.org
 help / color / mirror / Atom feed
* netif & grant tables
@ 2005-07-01  2:28 Matt Chapman
  2005-07-01 10:08 ` Vincent Hanquez
  2005-07-01 20:29 ` Stefan Berger
  0 siblings, 2 replies; 13+ messages in thread
From: Matt Chapman @ 2005-07-01  2:28 UTC (permalink / raw)
  To: xen-devel

Hi,

I'm currently looking at getting domU networking working
on IA64, and to do this I need to make netback/netfront
use grant tables.

I'm told that there's already a patch floating around,
can someone tell me where to find it?

Matt

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01  2:28 netif & grant tables Matt Chapman
@ 2005-07-01 10:08 ` Vincent Hanquez
  2005-07-01 20:29 ` Stefan Berger
  1 sibling, 0 replies; 13+ messages in thread
From: Vincent Hanquez @ 2005-07-01 10:08 UTC (permalink / raw)
  To: Matt Chapman; +Cc: xen-devel

On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote:
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
> 
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?

Hi Matt,

Here is the patch for having grant tables with netback and netfront.

diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig	2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig	2005-06-02 15:02:46 +01:00
@@ -97,6 +97,20 @@
 	  dedicated device-driver domain, or your master control domain
 	  (domain 0), then you almost certainly want to say Y here.
 
+config XEN_NETDEV_GRANT_TX
+        bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+        default y
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+        bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+        default y
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
+
 config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
 	bool "Pipelined transmitter (DANGEROUS)"
 	depends on XEN_NETDEV_FRONTEND
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	2005-06-02 15:02:46 +01:00
@@ -165,8 +165,14 @@
     u16           flags;
 
     flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    /*
+     * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+     * if gnttab_donate executes without interruption???
+     */
+#else
     ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
     /*
      * If a transfer is committed then wait for the frame address to appear.
      * Otherwise invalidate the grant entry against future use.
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h	2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h	2005-06-02 15:02:46 +01:00
@@ -50,6 +50,9 @@
     /* Private indexes into shared ring. */
     NETIF_RING_IDX rx_req_cons;
     NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
     NETIF_RING_IDX tx_req_cons;
     NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
 
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	2005-06-02 15:02:46 +01:00
@@ -18,6 +18,24 @@
 #include <linux/delay.h>
 #endif
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+	int i;
+
+	printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+	for (i = 0; i < 20; i++) {
+		printk("%02x", p[i]);
+	}
+	printk("\n");
+}
+#endif
+#endif
+
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
 static void make_tx_response(netif_t *netif, 
@@ -41,7 +59,9 @@
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,21 @@
 static PEND_RING_IDX dealloc_prod, dealloc_cons;
 
 static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_op_t grant_tx_op[MAX_PENDING_REQS];
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_op_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -91,6 +125,7 @@
     return mfn;
 }
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static void free_mfn(unsigned long mfn)
 {
     unsigned long flags;
@@ -102,6 +137,7 @@
         BUG();
     spin_unlock_irqrestore(&mfn_lock, flags);
 }
+#endif
 
 static inline void maybe_schedule_tx_action(void)
 {
@@ -160,7 +196,17 @@
         dev_kfree_skb(skb);
         skb = nskb;
     }
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+    printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
+           netif->rx->req_prod,
+           netif->rx_req_cons,
+           netif->rx->ring[
+		   MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+           netif->rx->ring[
+		   MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
     netif->rx_req_cons++;
     netif_get(netif);
 
@@ -201,7 +247,11 @@
     u16 size, id, evtchn;
     multicall_entry_t *mcl;
     mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_op_t *gop;
+#else
     struct mmuext_op *mmuext;
+#endif
     unsigned long vdata, mdata, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
@@ -212,7 +262,12 @@
 
     mcl = rx_mcl;
     mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
+
     while ( (skb = skb_dequeue(&rx_queue)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -228,7 +283,6 @@
             skb_queue_head(&rx_queue, skb);
             break;
         }
-
         /*
          * Set the new P2M table entry before reassigning the old data page.
          * Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -241,6 +295,14 @@
         mcl->args[2] = 0;
         mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        gop->u.donate.mfn = mdata >> PAGE_SHIFT;
+        gop->u.donate.domid = netif->domid;
+        gop->u.donate.handle = netif->rx->ring[
+        MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+        netif->rx_resp_prod_copy++;
+        gop++;
+#else
         mcl->op = __HYPERVISOR_mmuext_op;
         mcl->args[0] = (unsigned long)mmuext;
         mcl->args[1] = 1;
@@ -251,13 +313,16 @@
         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
         mmuext->mfn = mdata >> PAGE_SHIFT;
         mmuext++;
-
+#endif
         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
         mmu->val = __pa(vdata) >> PAGE_SHIFT;  
         mmu++;
 
         __skb_queue_tail(&rxq, skb);
 
+#ifdef DEBUG_GRANT
+        dump_packet('a', mdata, vdata);
+#endif
         /* Filled the batch queue? */
         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
             break;
@@ -273,12 +338,24 @@
     mcl->args[3] = DOMID_SELF;
     mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
     mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
         BUG();
 
     mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+                                           grant_rx_op, gop - grant_rx_op))) {
+        BUG();
+    }
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
     while ( (skb = __skb_dequeue(&rxq)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -286,9 +363,12 @@
 
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
         mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
                    ((unsigned long)skb->data & ~PAGE_MASK));
-        
+#endif
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
         skb_shinfo(skb)->frag_list = NULL;
@@ -302,13 +382,18 @@
 
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        if (unlikely(gop->u.donate.status != 0)) {
+            BUG();
+        }
+#else
         if ( unlikely(mcl[1].result != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
             free_mfn(mdata >> PAGE_SHIFT);
             status = NETIF_RSP_ERROR;
         }
-
+#endif
         evtchn = netif->evtchn;
         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
         if ( make_rx_response(netif, id, status, mdata,
@@ -321,9 +406,13 @@
 
         netif_put(netif);
         dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mcl++;
+        gop++;
+#else
         mcl += 2;
         mmuext += 1;
+#endif
     }
 
     while ( notify_nr != 0 )
@@ -407,6 +496,7 @@
     netif_schedule_work(netif);
 }
 
+/* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
     struct list_head *ent;
@@ -415,13 +505,36 @@
     netif_tx_request_t txreq;
     u16 pending_idx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_op_t *gop;
+#else
     multicall_entry_t *mcl;
+#endif
     PEND_RING_IDX dc, dp;
     unsigned int data_len;
 
+
     if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
         goto skip_dealloc;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    /*
+     * Free up any grants we have finished using
+     */
+    gop = grant_tx_op;
+    while (dc != dp) {
+        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+        gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->u.unmap_grant_ref.dev_bus_addr = 0;
+        gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx];
+        grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+        gop++;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                           grant_tx_op, gop - grant_tx_op))) {
+        BUG();
+    }
+#else
     mcl = tx_mcl;
     while ( dc != dp )
     {
@@ -438,11 +551,14 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( dealloc_cons != dp )
     {
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         /* The update_va_mapping() must not fail. */
         if ( unlikely(mcl[0].result != 0) )
             BUG();
+#endif
 
         pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 
@@ -466,11 +582,17 @@
         
         netif_put(netif);
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         mcl++;
+#endif
     }
 
  skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gop = grant_tx_op;
+#else
     mcl = tx_mcl;
+#endif
     while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
             !list_empty(&net_schedule_list) )
     {
@@ -492,7 +614,6 @@
         rmb(); /* Ensure that we see the request before we copy it. */
         memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
                sizeof(txreq));
-
         /* Credit-based scheduling. */
         if ( txreq.size > netif->remaining_credit )
         {
@@ -572,13 +693,20 @@
 
         /* Packets passed to netif_rx() must have some headroom. */
         skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->u.map_grant_ref.dom = netif->domid;
+        gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT;
+        gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly;
+        gop++;
+#else
         mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
         mcl[0].args[0] = MMAP_VADDR(pending_idx);
         mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
         mcl[0].args[2] = 0;
         mcl[0].args[3] = netif->domid;
         mcl++;
+#endif
 
         memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
         pending_tx_info[pending_idx].netif = netif;
@@ -588,11 +716,26 @@
 
         pending_cons++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op))
+            break;
+#else
         /* Filled the batch queue? */
         if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
             break;
+#endif
     }
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (gop == grant_tx_op) {
+        return;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                           grant_tx_op, gop - grant_tx_op))) {
+        BUG();
+    }
+    gop = grant_tx_op;
+#else
     if ( mcl == tx_mcl )
         return;
 
@@ -600,6 +743,7 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
     {
         pending_idx = *((u16 *)skb->data);
@@ -607,6 +751,20 @@
         memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
 
         /* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) {
+            printk(KERN_ALERT "#### netback grant fails\n");
+            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+            netif_put(netif);
+            kfree_skb(skb);
+            gop++;
+            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+            continue;
+        }
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+                             FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr);
+        grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle;
+#else
         if ( unlikely(mcl[0].result != 0) )
         {
             DPRINTK("Bad page frame\n");
@@ -620,6 +778,7 @@
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
             FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
 
         data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
 
@@ -627,7 +786,6 @@
         memcpy(skb->data, 
                (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
                data_len);
-
         if ( data_len < txreq.size )
         {
             /* Append the packet payload as a fragment. */
@@ -661,7 +819,11 @@
         netif_rx(skb);
         netif->dev->last_rx = jiffies;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        gop++;
+#else
         mcl++;
+#endif
     }
 }
 
@@ -781,6 +943,12 @@
         return 0;
 
     printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    printk("#### netback rx using grant tables\n");
+#endif
 
     /* We can increase reservation by this much in net_rx_action(). */
     balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	2005-06-02 15:02:46 +01:00
@@ -54,6 +54,25 @@
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+    unsigned char *p = (unsigned char *)ap;
+    int i;
+    
+    printk(KERN_ALERT "#### rx_poll   %c %08x ", tag & 0xff, addr);
+    for (i = 0; i < 20; i++) {
+        printk("%02x", p[i]);
+    }
+    printk("\n");
+}
+#endif
+#endif
+
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
 #endif
@@ -82,6 +101,21 @@
 #define TX_TEST_IDX req_cons  /* conservative: not seen all our requests? */
 #endif
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF	(0xFFFF)
+#endif
+
 static void network_tx_buf_gc(struct net_device *dev);
 static void network_alloc_rx_buffers(struct net_device *dev);
 
@@ -322,6 +356,14 @@
         for (i = np->tx_resp_cons; i != prod; i++) {
             id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
             skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+            if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+                printk(KERN_ALERT "netfront: query foreign access\n");
+            }
+            gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+            gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+            grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
             ADD_ID_TO_FREELIST(np->tx_skbs, id);
             dev_kfree_skb_irq(skb);
         }
@@ -356,6 +398,9 @@
     struct sk_buff *skb;
     int i, batch_target;
     NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    int ref;
+#endif
 
     if (unlikely(np->backend_state != BEST_CONNECTED))
         return;
@@ -388,7 +433,16 @@
         np->rx_skbs[id] = skb;
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-        
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        if ((ref = gnttab_claim_grant_reference(&gref_rx_head, gref_rx_terminal)) < 0) {
+            printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+            BUG();
+        }
+        grant_rx_ref[id] = ref;
+        gnttab_grant_foreign_transfer_ref(ref, rdomid,
+        virt_to_machine(skb->head) >> PAGE_SHIFT);
+        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
         rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
 
 	/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -438,6 +492,10 @@
     struct net_private *np = netdev_priv(dev);
     netif_tx_request_t *tx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    unsigned int ref;
+    unsigned long mfn;
+#endif
 
     if (unlikely(np->tx_full)) {
         printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -472,7 +530,18 @@
     tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
 
     tx->id   = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) < 0) {
+        printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+        BUG();
+    }
+    mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+    gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+    tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+    grant_tx_ref[id] = ref;
+#else
     tx->addr = virt_to_machine(skb->data);
+#endif
     tx->size = skb->len;
     tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
 
@@ -532,6 +601,10 @@
     int work_done, budget, more_to_do = 1;
     struct sk_buff_head rxq;
     unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    unsigned long mfn;
+    grant_ref_t ref;
+#endif
 
     spin_lock(&np->rx_lock);
 
@@ -544,7 +617,6 @@
 
     if ((budget = *pbudget) > dev->quota)
         budget = dev->quota;
-
     rp = np->rx->resp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
@@ -552,7 +624,6 @@
 		    (i != rp) && (work_done < budget);
 		    i++, work_done++) {
         rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
         /*
          * An error here is very odd. Usually indicates a backend bug,
          * low-memory condition, or that we didn't have reservation headroom.
@@ -567,11 +638,23 @@
             continue;
         }
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        ref = grant_rx_ref[rx->id];
+        grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+        mfn = gnttab_end_foreign_transfer(ref);
+        gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
         skb = np->rx_skbs[rx->id];
         ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
 
         /* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        skb->data = skb->head + rx->addr;
+#else
         skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
         skb->len  = rx->status;
         skb->tail = skb->data + skb->len;
 
@@ -582,18 +665,33 @@
         np->stats.rx_bytes += rx->status;
 
         /* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
         mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
         mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
         mmu++;
         mcl->op = __HYPERVISOR_update_va_mapping;
         mcl->args[0] = (unsigned long)skb->head;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+#else
         mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+#endif
         mcl->args[2] = 0;
         mcl++;
 
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+            mfn;
+#else
             rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%08x mfn=%08x ref=%04x\n",
+               skb->data, mfn, ref);
+#endif
         __skb_queue_tail(&rxq, skb);
     }
 
@@ -612,6 +710,11 @@
     }
 
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+         printk(KERN_ALERT "#### rx_poll     dequeue vdata=%08x mfn=%08x\n",
+                skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+         dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
         /*
          * Enough room in skbuff for the data we were passed? Also, Linux 
          * expects at least 16 bytes headroom in each receive buffer.
@@ -620,6 +723,7 @@
 			unlikely((skb->data - skb->head) < 16)) {
             nskb = NULL;
 
+
             /* Only copy the packet if it fits in the current MTU. */
             if (skb->len <= (dev->mtu + ETH_HLEN)) {
                 if ((skb->tail > skb->end) && net_ratelimit())
@@ -650,7 +754,6 @@
         
         /* Set the shared-info area, which is hidden behind the real data. */
         init_skb_shinfo(skb);
-
         /* Ethernet-specific work. Delayed to here as it peeks the header. */
         skb->protocol = eth_type_trans(skb, dev);
 
@@ -923,6 +1026,9 @@
     network_connect(dev, status);
     np->evtchn = status->evtchn;
     np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+    rdomid = status->domid;
+#endif
     (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
     netctrl_connected_count();
     (void)send_fake_arp(dev);
@@ -966,10 +1072,18 @@
     np->rx_max_target = RX_MAX_TARGET;
 
     /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+    for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
         np->tx_skbs[i] = (void *)(i+1);
-    for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
+    for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
         np->rx_skbs[i] = (void *)(i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
 
     dev->open            = network_open;
     dev->hard_start_xmit = network_start_xmit;
@@ -1271,6 +1385,22 @@
 
     if (xen_start_info.flags & SIF_INITDOMAIN)
         return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                      &gref_tx_head, &gref_tx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                      &gref_rx_head, &gref_rx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
 
     if ((err = xennet_proc_init()) != 0)
         return err;
@@ -1290,6 +1420,16 @@
     return err;
 }
 
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
+}
+
 static void vif_suspend(struct net_private *np)
 {
     /* Avoid having tx/rx stuff happen until we're ready. */
@@ -1482,3 +1622,4 @@
 #endif
 
 module_init(netif_init);
+module_exit(netif_exit);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c	2005-06-02 15:02:46 +01:00
+++ b/xen/common/grant_table.c	2005-06-02 15:02:46 +01:00
@@ -797,6 +797,146 @@
 }
 #endif
 
+static long
+gnttab_donate(gnttab_op_t *uop, unsigned int count)
+{
+    struct domain *d = current->domain;
+    struct domain *e;
+    struct pfn_info *page;
+    u32 _d, _nd, x, y;
+    int i;
+    int result = GNTST_okay;
+
+    for (i = 0; i < count; i++) {
+        gnttab_donate_t *gop = &uop[i].u.donate;
+#if GRANT_DEBUG
+        printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+               i, gop->mfn, gop->domid, gop->handle);
+#endif
+        page = &frame_table[gop->mfn];
+
+        if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
+            printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+            printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            gop->status = GNTST_bad_domain;
+            continue;
+        }
+
+        spin_lock(&d->page_alloc_lock);
+
+        /*
+         * The tricky bit: atomically release ownership while
+         * there is just one benign reference to the page
+         * (PGC_allocated). If that reference disappears then the
+         * deallocation routine will safely spin.
+         */
+        _d  = pickle_domptr(d);
+        _nd = page->u.inuse._domain;
+        y   = page->count_info;
+        do {
+            x = y;
+            if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+                         (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
+                        d, d->id, unpickle_domptr(_nd), x, 
+                        page->u.inuse.type_info);
+                spin_unlock(&d->page_alloc_lock);
+                put_domain(e);
+                return 0;
+            }
+            __asm__ __volatile__(
+                LOCK_PREFIX "cmpxchg8b %2"
+                : "=d" (_nd), "=a" (y),
+                "=m" (*(volatile u64 *)(&page->count_info))
+                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+        } while (unlikely(_nd != _d) || unlikely(y != x));
+
+        /*
+         * Unlink from 'd'. At least one reference remains (now
+         * anonymous), so noone else is spinning to try to delete
+         * this page from 'd'.
+         */
+        d->tot_pages--;
+        list_del(&page->list);
+
+        spin_unlock(&d->page_alloc_lock);
+
+        spin_lock(&e->page_alloc_lock);
+
+        /*
+         * Check that 'e' will accept the page and has reservation
+         * headroom.  Also, a domain mustn't have PGC_allocated
+         * pages when it is dying.
+         */
+#ifdef GRANT_DEBUG
+        if (unlikely(e->tot_pages >= e->max_pages)) {
+            printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+                   e->tot_pages, e->max_pages);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(test_bit(DF_DYING, &e->d_flags))) {
+            printk("gnttab_donate: target domain is dying\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#else
+        ASSERT(e->tot_pages <= e->max_pages);
+        if (unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+            unlikely(e->tot_pages == e->max_pages) ||
+            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: Transferee has no reservation headroom (%d,%d), or "
+                    "provided a bad grant ref (%08x), or is dying (%p).\n",
+                    e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#endif
+        /* Okay, add the page to 'e'. */
+        if (unlikely(e->tot_pages++ == 0)) {
+            get_knownalive_domain(e);
+        }
+        list_add_tail(&page->list, &e->page_list);
+        page_set_owner(page, e);
+
+        spin_unlock(&e->page_alloc_lock);
+
+        /*
+         * Transfer is all done: tell the guest about its new page
+         * frame.
+         */
+        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        
+        put_domain(e);
+
+        gop->status = GNTST_okay;
+    }
+    return result;
+}
+
 long 
 do_grant_table_op(
     unsigned int cmd, void *uop, unsigned int count)
@@ -831,6 +971,13 @@
         rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
         break;
 #endif
+    case GNTTABOP_donate:
+        if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count,
+                                      sizeof(gnttab_op_t)))) {
+            goto out;
+        }
+        rc = gnttab_donate(uop, count);
+        break;
     default:
         rc = -ENOSYS;
         break;
@@ -1066,6 +1213,10 @@
     }
     sha->frame = __mfn_to_gpfn(rd, frame);
     sha->domid = rd->domain_id;
+#ifdef GRANT_DEBUG
+    printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n",
+           ref, frame, pfn, sha->frame);
+#endif
     wmb();
     sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
 
diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h	2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/grant_table.h	2005-06-02 15:02:46 +01:00
@@ -220,6 +220,19 @@
     s16         status;               /* 2: GNTST_* */
 } PACKED gnttab_dump_table_t; /* 4 bytes */
 
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain.  The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate                4
+typedef struct {
+    memory_t    mfn;		      /*  0 */
+    domid_t     domid;		      /*  4 */
+    u16         handle;               /*  8 */
+    s16         status;               /*  10: GNTST_* */
+    u32         __pad;
+} PACKED gnttab_donate_t;	      /*  14 bytes */
 
 /*
  * Bitfield values for update_pin_status.flags.
@@ -273,6 +286,7 @@
         gnttab_unmap_grant_ref_t  unmap_grant_ref;
         gnttab_setup_table_t      setup_table;
         gnttab_dump_table_t       dump_table;
+        gnttab_donate_t           donate;
         u8                        __dummy[24];
     } PACKED u;
 } PACKED gnttab_op_t; /* 32 bytes */
diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h	2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/io/netif.h	2005-06-02 15:02:46 +01:00
@@ -25,10 +25,13 @@
 
 typedef struct {
     u16       id;    /*  0: Echoed in response message.        */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    grant_ref_t gref;	/* 2: Reference to incoming granted frame */
+#endif
 } PACKED netif_rx_request_t; /* 2 bytes */
 
 typedef struct {
-    memory_t addr;   /*  0: Machine address of packet.              */
+    u32      addr;   /*  0: Offset in page of start of received packet  */
     MEMORY_PADDING;
     u16      csum_valid:1; /* Protocol checksum is validated?       */
     u16      id:15;  /*  8:  */

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01  2:28 netif & grant tables Matt Chapman
  2005-07-01 10:08 ` Vincent Hanquez
@ 2005-07-01 20:29 ` Stefan Berger
  2005-07-01 20:40   ` Matt Chapman
  1 sibling, 1 reply; 13+ messages in thread
From: Stefan Berger @ 2005-07-01 20:29 UTC (permalink / raw)
  To: Matt Chapman; +Cc: xen-devel

xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:

> Hi,
> 
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.

You *probably* won't get this to work right out of the box. On i386 it 
fails due to dom Us not becoming privileged and so a check like IS_PRIV() 
fails in xen/common/grant_table.c line 692 and probably somewhere else 
also. The question is how this should be fixed. Should the HV call to 
create a domain receive an additional parameter including flags that 
should be set in a domain, such as for example the _DOMF_privileged? 
Currently this flag only seems to be set in one place for dom 0.

The quick fix is:
add 
        set_bit(_DOMF_privileged, &d->domain_flags) 

before the 'return d' in do_createdomain() in xen/common/domain.c -> it 
will make all domains privileged 

To compile the backends into a domU I had to activate 
CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the .config 
file of the dom U kernel for having alloc_empty_lowmem_region() compiled 
into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to 
alloc_empty_lowmem_region() necessary or would another memory allocation 
routine work as well. All the backends seem to use it, though.

   Stefan
 
> 
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?
> 
> Matt
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01 20:29 ` Stefan Berger
@ 2005-07-01 20:40   ` Matt Chapman
  2005-07-01 21:07     ` Stefan Berger
  0 siblings, 1 reply; 13+ messages in thread
From: Matt Chapman @ 2005-07-01 20:40 UTC (permalink / raw)
  To: Stefan Berger; +Cc: xen-devel

Hi Stefan,

I'm not trying to run the backend or any real drivers in domU,
only the frontend.

Matt


On Fri, Jul 01, 2005 at 04:29:20PM -0400, Stefan Berger wrote:
> xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:
> 
> > Hi,
> > 
> > I'm currently looking at getting domU networking working
> > on IA64, and to do this I need to make netback/netfront
> > use grant tables.
> 
> You *probably* won't get this to work right out of the box. On i386 it 
> fails due to dom Us not becoming privileged and so a check like IS_PRIV() 
> fails in xen/common/grant_table.c line 692 and probably somewhere else 
> also. The question is how this should be fixed. Should the HV call to 
> create a domain receive an additional parameter including flags that 
> should be set in a domain, such as for example the _DOMF_privileged? 
> Currently this flag only seems to be set in one place for dom 0.
> 
> The quick fix is:
> add 
>         set_bit(_DOMF_privileged, &d->domain_flags) 
> 
> before the 'return d' in do_createdomain() in xen/common/domain.c -> it 
> will make all domains privileged 
> 
> To compile the backends into a domU I had to activate 
> CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the .config 
> file of the dom U kernel for having alloc_empty_lowmem_region() compiled 
> into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to 
> alloc_empty_lowmem_region() necessary or would another memory allocation 
> routine work as well. All the backends seem to use it, though.
> 
>    Stefan
>  
> > 
> > I'm told that there's already a patch floating around,
> > can someone tell me where to find it?
> > 
> > Matt
> > 
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xensource.com
> > http://lists.xensource.com/xen-devel
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01 20:40   ` Matt Chapman
@ 2005-07-01 21:07     ` Stefan Berger
  2005-07-01 22:19       ` Matt Chapman
  2005-07-02  1:56       ` Mark Williamson
  0 siblings, 2 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-01 21:07 UTC (permalink / raw)
  To: Matt Chapman; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 2608 bytes --]

xen-devel-bounces@lists.xensource.com wrote on 07/01/2005 03:40:31 PM:

> Hi Stefan,
> 
> I'm not trying to run the backend or any real drivers in domU,
> only the frontend.

Just curious: On IA64 you must have grant tables to get networking 
working?

If someone has the matching problem for my solution, then let me know. :-)
Otherwise I think the problem of making domains privileged should really 
be solved - probably starting somewhere in XEN-D.

   Stefan

> 
> Matt
> 
> 
> On Fri, Jul 01, 2005 at 04:29:20PM -0400, Stefan Berger wrote:
> > xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:
> > 
> > > Hi,
> > > 
> > > I'm currently looking at getting domU networking working
> > > on IA64, and to do this I need to make netback/netfront
> > > use grant tables.
> > 
> > You *probably* won't get this to work right out of the box. On i386 it 

> > fails due to dom Us not becoming privileged and so a check like 
IS_PRIV() 
> > fails in xen/common/grant_table.c line 692 and probably somewhere else 

> > also. The question is how this should be fixed. Should the HV call to 
> > create a domain receive an additional parameter including flags that 
> > should be set in a domain, such as for example the _DOMF_privileged? 
> > Currently this flag only seems to be set in one place for dom 0.
> > 
> > The quick fix is:
> > add 
> >         set_bit(_DOMF_privileged, &d->domain_flags) 
> > 
> > before the 'return d' in do_createdomain() in xen/common/domain.c -> 
it 
> > will make all domains privileged 
> > 
> > To compile the backends into a domU I had to activate 
> > CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the 
.config 
> > file of the dom U kernel for having alloc_empty_lowmem_region() 
compiled 
> > into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to 
> > alloc_empty_lowmem_region() necessary or would another memory 
allocation 
> > routine work as well. All the backends seem to use it, though.
> > 
> >    Stefan
> > 
> > > 
> > > I'm told that there's already a patch floating around,
> > > can someone tell me where to find it?
> > > 
> > > Matt
> > > 
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xensource.com
> > > http://lists.xensource.com/xen-devel
> > 
> > 
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xensource.com
> > http://lists.xensource.com/xen-devel
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

[-- Attachment #1.2: Type: text/html, Size: 3497 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01 21:07     ` Stefan Berger
@ 2005-07-01 22:19       ` Matt Chapman
  2005-07-02  1:56       ` Mark Williamson
  1 sibling, 0 replies; 13+ messages in thread
From: Matt Chapman @ 2005-07-01 22:19 UTC (permalink / raw)
  To: Stefan Berger; +Cc: xen-devel

On Fri, Jul 01, 2005 at 04:07:05PM -0500, Stefan Berger wrote:
> xen-devel-bounces@lists.xensource.com wrote on 07/01/2005 03:40:31 PM:
> 
> > Hi Stefan,
> > 
> > I'm not trying to run the backend or any real drivers in domU,
> > only the frontend.
> 
> Just curious: On IA64 you must have grant tables to get networking 
> working?

Well, on IA64 domU doesn't know about real machine addresses (ala
shadow mode on x86).  The grant table mechanism is the easiest way
to enable sharing of pages - grant references provide a common
namespace for referring to a page.

Matt

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-01 21:07     ` Stefan Berger
  2005-07-01 22:19       ` Matt Chapman
@ 2005-07-02  1:56       ` Mark Williamson
  2005-07-02  3:15         ` Stefan Berger
  1 sibling, 1 reply; 13+ messages in thread
From: Mark Williamson @ 2005-07-02  1:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Stefan Berger, Matt Chapman

> If someone has the matching problem for my solution, then let me know. :-)
> Otherwise I think the problem of making domains privileged should really
> be solved - probably starting somewhere in XEN-D.

There should probably be a flag you pass down from the config.  The current 
hack people use is to give the domain access to a PCI device but not compile 
in the drivers.  Driver domains are privileged at the moment, so it works :-S

With full grant tables support, full privilege is not necessary, just a grant 
from the other party.  That's probably the nicest long term solution and can 
also hook in with a suitable IO-TLB to provide protection against rogue DMAs.

Cheers,
Mark

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-02  1:56       ` Mark Williamson
@ 2005-07-02  3:15         ` Stefan Berger
  2005-07-02 15:34           ` Mark Williamson
  0 siblings, 1 reply; 13+ messages in thread
From: Stefan Berger @ 2005-07-02  3:15 UTC (permalink / raw)
  To: Mark Williamson; +Cc: xen-devel, Matt Chapman

Mark Williamson <mark.williamson@cl.cam.ac.uk> wrote on 07/01/2005 
09:56:26 PM:

> > If someone has the matching problem for my solution, then let me know. 
:-)
> > Otherwise I think the problem of making domains privileged should 
really
> > be solved - probably starting somewhere in XEN-D.
> 
> There should probably be a flag you pass down from the config.  The 
current 

It could be done implicitly, meaning that if you give a domain a backend 
(netif/blkif), that privilege flag will automatically be set by XEN-D and 
used when creating the domain, or explicitly where one specifies the 
flag(s) to set in the VM config file.

> hack people use is to give the domain access to a PCI device but not 
compile 
> in the drivers.  Driver domains are privileged at the moment, so it 
works :-S

>From what I can see this does not work anymore - I used to do that also. 
Passing a PCI device to a partition results in an error since the 
xc_physdev_pci_access_modify call ends in an error.
> 
> With full grant tables support, full privilege is not necessary, just a 
grant 
> from the other party.  That's probably the nicest long term solution and 
can 
> also hook in with a suitable IO-TLB to provide protection against rogue 
DMAs.

I am not sure how 'privilege' is defined. The privilege does so far not 
only mean to do dom 0 ops, but seems to also limit guest domains of doing 
other things - like the backend problem I see. I agree, though, that for 
grant table support a backend should not need privileges.

> 
> Cheers,
> Mark

Cheers,
   Stefan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-02  3:15         ` Stefan Berger
@ 2005-07-02 15:34           ` Mark Williamson
  2005-07-03 22:12             ` Stefan Berger
  0 siblings, 1 reply; 13+ messages in thread
From: Mark Williamson @ 2005-07-02 15:34 UTC (permalink / raw)
  To: Stefan Berger; +Cc: xen-devel, Matt Chapman

> It could be done implicitly, meaning that if you give a domain a backend
> (netif/blkif), that privilege flag will automatically be set by XEN-D and
> used when creating the domain, or explicitly where one specifies the
> flag(s) to set in the VM config file.

Doing it implicitly would probably be sensible.

> From what I can see this does not work anymore - I used to do that also.
> Passing a PCI device to a partition results in an error since the
> xc_physdev_pci_access_modify call ends in an error.

Assigning PCI devices is broken in unstable at the moment.  It'll be coming 
back at some stage.

> I am not sure how 'privilege' is defined.

Very coarsely at present: IIRC right now domain who's got access to a PCI 
device is as privileged as dom0.  This means they're allowed to map memory of 
other domains, do dom0 ops, etc.

Grant tables will enable us to deprivilege guests somewhat, then we'll split 
privileges down into more fine-grained capabilities.

Cheers,
Mark

> The privilege does so far not 
> only mean to do dom 0 ops, but seems to also limit guest domains of doing
> other things - like the backend problem I see. I agree, though, that for
> grant table support a backend should not need privileges.
>
> > Cheers,
> > Mark
>
> Cheers,
>    Stefan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-02 15:34           ` Mark Williamson
@ 2005-07-03 22:12             ` Stefan Berger
  0 siblings, 0 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-03 22:12 UTC (permalink / raw)
  To: Mark Williamson; +Cc: xen-devel, Matt Chapman

xen-devel-bounces@lists.xensource.com wrote on 07/02/2005 11:34:58 AM:

> > It could be done implicitly, meaning that if you give a domain a 
backend
> > (netif/blkif), that privilege flag will automatically be set by XEN-D 
and
> > used when creating the domain, or explicitly where one specifies the
> > flag(s) to set in the VM config file.
> 
> Doing it implicitly would probably be sensible.
> 
> > From what I can see this does not work anymore - I used to do that 
also.
> > Passing a PCI device to a partition results in an error since the
> > xc_physdev_pci_access_modify call ends in an error.
> 
> Assigning PCI devices is broken in unstable at the moment.  It'll be 
coming 
> back at some stage.
> 
> > I am not sure how 'privilege' is defined.
> 
> Very coarsely at present: IIRC right now domain who's got access to a 
PCI 
> device is as privileged as dom0.  This means they're allowed to map 
memory of 
> other domains, do dom0 ops, etc.
> 
> Grant tables will enable us to deprivilege guests somewhat, then we'll 
split 
> privileges down into more fine-grained capabilities.
> 
Setting the privileged bit in a user domain gets grant tables to work: 
should this bit be set for those kind of domains or rather the IS_PRIV() 
test be removed from the call path which basically would allow all user 
domains to do mapping by default?

    Stefan

> Cheers,
> Mark
> 
> > The privilege does so far not 
> > only mean to do dom 0 ops, but seems to also limit guest domains of 
doing
> > other things - like the backend problem I see. I agree, though, that 
for
> > grant table support a backend should not need privileges.
> >
> > > Cheers,
> > > Mark
> >
> > Cheers,
> >    Stefan
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
       [not found] <200507040226.18182.mark.williamson@cl.cam.ac.uk>
@ 2005-07-04 18:43 ` Stefan Berger
  2005-07-04 18:44   ` Keir Fraser
  2005-07-04 19:11   ` Mark A. Williamson
  0 siblings, 2 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-04 18:43 UTC (permalink / raw)
  To: Mark Williamson, xen-devel

Mark Williamson <mark.williamson@cl.cam.ac.uk> wrote on 07/03/2005 
09:26:18 PM:

> [off list]
> 
> > Setting the privileged bit in a user domain gets grant tables to work:
> > should this bit be set for those kind of domains or rather the 
IS_PRIV()
> > test be removed from the call path which basically would allow all 
user
> > domains to do mapping by default?
> 
> I'm not clear what you mean here - AFAIK grant tables don't require 
IS_PRIV() 
> in order to work.  Or am I misunderstanding?

You are right, it's not the grant tables per se that need the privileged 
bit to be set, but other functions need it and keep backends from working.

Here are two code paths from the network backend:

from netback/interface.c calls 
 
linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c:direct_remap_area_pages() 
calls
      HYPERVISOR_mmu_update calls
         xen/arch/x86/mm.c:do_mmu_update calls
           set_foreigndom() which has an IS_PRIV() in the path
 
-> The direct_remap_area_pages call fails if a domain does not have the 
privilege bit set.


netback/netback.c: alloc_mfn() calls
   HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list, 
MAX_MFN_ALLOC, 0);
     xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a 
IS_PRIV() in the path



 Stefan


> 
> What I meant was that since grant tables are an explicit capability (you 
can 
> only map a page of another dom if it gave you an explicit grant) there's 
no 
> need for mappings in the IO path to require special privileges at all. 
If 
> someone gave you a grant, they must trust you enough to access that 
page.
> 
> Cheers,
> Mark
> 
> >     Stefan
> >
> > > Cheers,
> > > Mark
> > >
> > > > The privilege does so far not
> > > > only mean to do dom 0 ops, but seems to also limit guest domains 
of
> >
> > doing
> >
> > > > other things - like the backend problem I see. I agree, though, 
that
> >
> > for
> >
> > > > grant table support a backend should not need privileges.
> > > >
> > > > > Cheers,
> > > > > Mark
> > > >
> > > > Cheers,
> > > >    Stefan
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xensource.com
> > > http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-04 18:43 ` Stefan Berger
@ 2005-07-04 18:44   ` Keir Fraser
  2005-07-04 19:11   ` Mark A. Williamson
  1 sibling, 0 replies; 13+ messages in thread
From: Keir Fraser @ 2005-07-04 18:44 UTC (permalink / raw)
  To: Stefan Berger; +Cc: xen-devel, Mark Williamson


On 4 Jul 2005, at 19:43, Stefan Berger wrote:

> linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c: 
> direct_remap_area_pages()
> calls
>       HYPERVISOR_mmu_update calls
>          xen/arch/x86/mm.c:do_mmu_update calls
>            set_foreigndom() which has an IS_PRIV() in the path
>
> -> The direct_remap_area_pages call fails if a domain does not have the
> privilege bit set.

The backend driver parts that use this function need cleaning up to use  
a grant reference instead.

> netback/netback.c: alloc_mfn() calls
>    HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list,
> MAX_MFN_ALLOC, 0);
>      xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a
> IS_PRIV() in the path

It only disallows you from adjusting others' reservations. You can  
still adjust your own.

  -- Keir

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: netif & grant tables
  2005-07-04 18:43 ` Stefan Berger
  2005-07-04 18:44   ` Keir Fraser
@ 2005-07-04 19:11   ` Mark A. Williamson
  1 sibling, 0 replies; 13+ messages in thread
From: Mark A. Williamson @ 2005-07-04 19:11 UTC (permalink / raw)
  To: Stefan Berger; +Cc: xen-devel

> You are right, it's not the grant tables per se that need the privileged
> bit to be set, but other functions need it and keep backends from working.
>
> Here are two code paths from the network backend:

I suspected it might be something like this.  There's no reason for those to 
require privilege either: they can fairly trivially be converted to use grant 
tables too.  Once it's fully grant-table-ified it shouldn't be necessary to 
make such domains be privileged.

Full grant tables support also a pre-req for the point to point "snappable 
frontend" connections directly between domains with high bandwidth 
requirements.

Cheers,
Mark

> from netback/interface.c calls
>
> linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c:direct_remap_area_pages(
>) calls
>       HYPERVISOR_mmu_update calls
>          xen/arch/x86/mm.c:do_mmu_update calls
>            set_foreigndom() which has an IS_PRIV() in the path
>
> -> The direct_remap_area_pages call fails if a domain does not have the
> privilege bit set.
>
>
> netback/netback.c: alloc_mfn() calls
>    HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list,
> MAX_MFN_ALLOC, 0);
>      xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a
> IS_PRIV() in the path
>
>
>
>  Stefan
>
> > What I meant was that since grant tables are an explicit capability (you
>
> can
>
> > only map a page of another dom if it gave you an explicit grant) there's
>
> no
>
> > need for mappings in the IO path to require special privileges at all.
>
> If
>
> > someone gave you a grant, they must trust you enough to access that
>
> page.
>
> > Cheers,
> > Mark
> >
> > >     Stefan
> > >
> > > > Cheers,
> > > > Mark
> > > >
> > > > > The privilege does so far not
> > > > > only mean to do dom 0 ops, but seems to also limit guest domains
>
> of
>
> > > doing
> > >
> > > > > other things - like the backend problem I see. I agree, though,
>
> that
>
> > > for
> > >
> > > > > grant table support a backend should not need privileges.
> > > > >
> > > > > > Cheers,
> > > > > > Mark
> > > > >
> > > > > Cheers,
> > > > >    Stefan
> > > >
> > > > _______________________________________________
> > > > Xen-devel mailing list
> > > > Xen-devel@lists.xensource.com
> > > > http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2005-07-04 19:11 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-07-01  2:28 netif & grant tables Matt Chapman
2005-07-01 10:08 ` Vincent Hanquez
2005-07-01 20:29 ` Stefan Berger
2005-07-01 20:40   ` Matt Chapman
2005-07-01 21:07     ` Stefan Berger
2005-07-01 22:19       ` Matt Chapman
2005-07-02  1:56       ` Mark Williamson
2005-07-02  3:15         ` Stefan Berger
2005-07-02 15:34           ` Mark Williamson
2005-07-03 22:12             ` Stefan Berger
     [not found] <200507040226.18182.mark.williamson@cl.cam.ac.uk>
2005-07-04 18:43 ` Stefan Berger
2005-07-04 18:44   ` Keir Fraser
2005-07-04 19:11   ` Mark A. Williamson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.