From: Vincent Hanquez <vincent.hanquez@cl.cam.ac.uk>
To: Matt Chapman <matthewc@hp.com>
Cc: xen-devel@lists.xensource.com
Subject: Re: netif & grant tables
Date: Fri, 1 Jul 2005 12:08:42 +0200 [thread overview]
Message-ID: <20050701100842.GA23792@snarc.org> (raw)
In-Reply-To: <20050701022854.GA32494@kirby.fc.hp.com>
On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote:
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
>
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?
Hi Matt,
Here is the patch for having grant tables with netback and netfront.
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
@@ -97,6 +97,20 @@
dedicated device-driver domain, or your master control domain
(domain 0), then you almost certainly want to say Y here.
+config XEN_NETDEV_GRANT_TX
+ bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+ bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
bool "Pipelined transmitter (DANGEROUS)"
depends on XEN_NETDEV_FRONTEND
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00
@@ -165,8 +165,14 @@
u16 flags;
flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /*
+ * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+ * if gnttab_donate executes without interruption???
+ */
+#else
ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00
@@ -50,6 +50,9 @@
/* Private indexes into shared ring. */
NETIF_RING_IDX rx_req_cons;
NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
NETIF_RING_IDX tx_req_cons;
NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00
@@ -18,6 +18,24 @@
#include <linux/delay.h>
#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -41,7 +59,9 @@
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,21 @@
static PEND_RING_IDX dealloc_prod, dealloc_cons;
static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_op_t grant_tx_op[MAX_PENDING_REQS];
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_op_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
static struct list_head net_schedule_list;
static spinlock_t net_schedule_list_lock;
@@ -91,6 +125,7 @@
return mfn;
}
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static void free_mfn(unsigned long mfn)
{
unsigned long flags;
@@ -102,6 +137,7 @@
BUG();
spin_unlock_irqrestore(&mfn_lock, flags);
}
+#endif
static inline void maybe_schedule_tx_action(void)
{
@@ -160,7 +196,17 @@
dev_kfree_skb(skb);
skb = nskb;
}
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+ printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
+ netif->rx->req_prod,
+ netif->rx_req_cons,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
netif->rx_req_cons++;
netif_get(netif);
@@ -201,7 +247,11 @@
u16 size, id, evtchn;
multicall_entry_t *mcl;
mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_op_t *gop;
+#else
struct mmuext_op *mmuext;
+#endif
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -212,7 +262,12 @@
mcl = rx_mcl;
mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
+
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -228,7 +283,6 @@
skb_queue_head(&rx_queue, skb);
break;
}
-
/*
* Set the new P2M table entry before reassigning the old data page.
* Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -241,6 +295,14 @@
mcl->args[2] = 0;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop->u.donate.mfn = mdata >> PAGE_SHIFT;
+ gop->u.donate.domid = netif->domid;
+ gop->u.donate.handle = netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+ netif->rx_resp_prod_copy++;
+ gop++;
+#else
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)mmuext;
mcl->args[1] = 1;
@@ -251,13 +313,16 @@
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
mmuext->mfn = mdata >> PAGE_SHIFT;
mmuext++;
-
+#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(vdata) >> PAGE_SHIFT;
mmu++;
__skb_queue_tail(&rxq, skb);
+#ifdef DEBUG_GRANT
+ dump_packet('a', mdata, vdata);
+#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
break;
@@ -273,12 +338,24 @@
mcl->args[3] = DOMID_SELF;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+ grant_rx_op, gop - grant_rx_op))) {
+ BUG();
+ }
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -286,9 +363,12 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
-
+#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -302,13 +382,18 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(gop->u.donate.status != 0)) {
+ BUG();
+ }
+#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
status = NETIF_RSP_ERROR;
}
-
+#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
if ( make_rx_response(netif, id, status, mdata,
@@ -321,9 +406,13 @@
netif_put(netif);
dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl++;
+ gop++;
+#else
mcl += 2;
mmuext += 1;
+#endif
}
while ( notify_nr != 0 )
@@ -407,6 +496,7 @@
netif_schedule_work(netif);
}
+/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
struct list_head *ent;
@@ -415,13 +505,36 @@
netif_tx_request_t txreq;
u16 pending_idx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_op_t *gop;
+#else
multicall_entry_t *mcl;
+#endif
PEND_RING_IDX dc, dp;
unsigned int data_len;
+
if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
goto skip_dealloc;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = grant_tx_op;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.unmap_grant_ref.dev_bus_addr = 0;
+ gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx];
+ grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+ gop++;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+#else
mcl = tx_mcl;
while ( dc != dp )
{
@@ -438,11 +551,14 @@
BUG();
mcl = tx_mcl;
+#endif
while ( dealloc_cons != dp )
{
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
/* The update_va_mapping() must not fail. */
if ( unlikely(mcl[0].result != 0) )
BUG();
+#endif
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -466,11 +582,17 @@
netif_put(netif);
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
mcl++;
+#endif
}
skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop = grant_tx_op;
+#else
mcl = tx_mcl;
+#endif
while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list) )
{
@@ -492,7 +614,6 @@
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-
/* Credit-based scheduling. */
if ( txreq.size > netif->remaining_credit )
{
@@ -572,13 +693,20 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.map_grant_ref.dom = netif->domid;
+ gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT;
+ gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly;
+ gop++;
+#else
mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
mcl[0].args[0] = MMAP_VADDR(pending_idx);
mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
mcl[0].args[2] = 0;
mcl[0].args[3] = netif->domid;
mcl++;
+#endif
memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
pending_tx_info[pending_idx].netif = netif;
@@ -588,11 +716,26 @@
pending_cons++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op))
+ break;
+#else
/* Filled the batch queue? */
if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
break;
+#endif
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gop == grant_tx_op) {
+ return;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+ gop = grant_tx_op;
+#else
if ( mcl == tx_mcl )
return;
@@ -600,6 +743,7 @@
BUG();
mcl = tx_mcl;
+#endif
while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
{
pending_idx = *((u16 *)skb->data);
@@ -607,6 +751,20 @@
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) {
+ printk(KERN_ALERT "#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ gop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr);
+ grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle;
+#else
if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
@@ -620,6 +778,7 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
@@ -627,7 +786,6 @@
memcpy(skb->data,
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
data_len);
-
if ( data_len < txreq.size )
{
/* Append the packet payload as a fragment. */
@@ -661,7 +819,11 @@
netif_rx(skb);
netif->dev->last_rx = jiffies;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop++;
+#else
mcl++;
+#endif
}
}
@@ -781,6 +943,12 @@
return 0;
printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ printk("#### netback rx using grant tables\n");
+#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00
@@ -54,6 +54,25 @@
#include <asm/page.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+ unsigned char *p = (unsigned char *)ap;
+ int i;
+
+ printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
#endif
@@ -82,6 +101,21 @@
#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
+
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -322,6 +356,14 @@
for (i = np->tx_resp_cons; i != prod; i++) {
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+ printk(KERN_ALERT "netfront: query foreign access\n");
+ }
+ gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+ grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
ADD_ID_TO_FREELIST(np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -356,6 +398,9 @@
struct sk_buff *skb;
int i, batch_target;
NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ int ref;
+#endif
if (unlikely(np->backend_state != BEST_CONNECTED))
return;
@@ -388,7 +433,16 @@
np->rx_skbs[id] = skb;
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if ((ref = gnttab_claim_grant_reference(&gref_rx_head, gref_rx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+ BUG();
+ }
+ grant_rx_ref[id] = ref;
+ gnttab_grant_foreign_transfer_ref(ref, rdomid,
+ virt_to_machine(skb->head) >> PAGE_SHIFT);
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -438,6 +492,10 @@
struct net_private *np = netdev_priv(dev);
netif_tx_request_t *tx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ unsigned int ref;
+ unsigned long mfn;
+#endif
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -472,7 +530,18 @@
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
tx->id = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+ BUG();
+ }
+ mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+ tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ grant_tx_ref[id] = ref;
+#else
tx->addr = virt_to_machine(skb->data);
+#endif
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -532,6 +601,10 @@
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ unsigned long mfn;
+ grant_ref_t ref;
+#endif
spin_lock(&np->rx_lock);
@@ -544,7 +617,6 @@
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
-
rp = np->rx->resp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -552,7 +624,6 @@
(i != rp) && (work_done < budget);
i++, work_done++) {
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
@@ -567,11 +638,23 @@
continue;
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ ref = grant_rx_ref[rx->id];
+ grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+ mfn = gnttab_end_foreign_transfer(ref);
+ gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
/* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ skb->data = skb->head + rx->addr;
+#else
skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
skb->len = rx->status;
skb->tail = skb->data + skb->len;
@@ -582,18 +665,33 @@
np->stats.rx_bytes += rx->status;
/* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = (unsigned long)skb->head;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+#else
mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+#endif
mcl->args[2] = 0;
mcl++;
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mfn;
+#else
rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x ref=%04x\n",
+ skb->data, mfn, ref);
+#endif
__skb_queue_tail(&rxq, skb);
}
@@ -612,6 +710,11 @@
}
while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
+ skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
* expects at least 16 bytes headroom in each receive buffer.
@@ -620,6 +723,7 @@
unlikely((skb->data - skb->head) < 16)) {
nskb = NULL;
+
/* Only copy the packet if it fits in the current MTU. */
if (skb->len <= (dev->mtu + ETH_HLEN)) {
if ((skb->tail > skb->end) && net_ratelimit())
@@ -650,7 +754,6 @@
/* Set the shared-info area, which is hidden behind the real data. */
init_skb_shinfo(skb);
-
/* Ethernet-specific work. Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -923,6 +1026,9 @@
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ rdomid = status->domid;
+#endif
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
netctrl_connected_count();
(void)send_fake_arp(dev);
@@ -966,10 +1072,18 @@
np->rx_max_target = RX_MAX_TARGET;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)(i+1);
- for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
+ for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
np->rx_skbs[i] = (void *)(i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
dev->open = network_open;
dev->hard_start_xmit = network_start_xmit;
@@ -1271,6 +1385,22 @@
if (xen_start_info.flags & SIF_INITDOMAIN)
return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head, &gref_tx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head, &gref_rx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
if ((err = xennet_proc_init()) != 0)
return err;
@@ -1290,6 +1420,16 @@
return err;
}
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
+}
+
static void vif_suspend(struct net_private *np)
{
/* Avoid having tx/rx stuff happen until we're ready. */
@@ -1482,3 +1622,4 @@
#endif
module_init(netif_init);
+module_exit(netif_exit);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
+++ b/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
@@ -797,6 +797,146 @@
}
#endif
+static long
+gnttab_donate(gnttab_op_t *uop, unsigned int count)
+{
+ struct domain *d = current->domain;
+ struct domain *e;
+ struct pfn_info *page;
+ u32 _d, _nd, x, y;
+ int i;
+ int result = GNTST_okay;
+
+ for (i = 0; i < count; i++) {
+ gnttab_donate_t *gop = &uop[i].u.donate;
+#if GRANT_DEBUG
+ printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ i, gop->mfn, gop->domid, gop->handle);
+#endif
+ page = &frame_table[gop->mfn];
+
+ if (unlikely(IS_XEN_HEAP_FRAME(page))) {
+ printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+ printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+ printk("gnttab_donate: can't find domain %d\n", gop->domid);
+ gop->status = GNTST_bad_domain;
+ continue;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while
+ * there is just one benign reference to the page
+ * (PGC_allocated). If that reference disappears then the
+ * deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+ printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", page_to_pfn(page),
+ d, d->id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ return 0;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (_nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+ } while (unlikely(_nd != _d) || unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now
+ * anonymous), so noone else is spinning to try to delete
+ * this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated
+ * pages when it is dying.
+ */
+#ifdef GRANT_DEBUG
+ if (unlikely(e->tot_pages >= e->max_pages)) {
+ printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+ e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(test_bit(DF_DYING, &e->d_flags))) {
+ printk("gnttab_donate: target domain is dying\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#else
+ ASSERT(e->tot_pages <= e->max_pages);
+ if (unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: Transferee has no reservation headroom (%d,%d), or "
+ "provided a bad grant ref (%08x), or is dying (%p).\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#endif
+ /* Okay, add the page to 'e'. */
+ if (unlikely(e->tot_pages++ == 0)) {
+ get_knownalive_domain(e);
+ }
+ list_add_tail(&page->list, &e->page_list);
+ page_set_owner(page, e);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /*
+ * Transfer is all done: tell the guest about its new page
+ * frame.
+ */
+ gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+
+ put_domain(e);
+
+ gop->status = GNTST_okay;
+ }
+ return result;
+}
+
long
do_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
@@ -831,6 +971,13 @@
rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
break;
#endif
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count,
+ sizeof(gnttab_op_t)))) {
+ goto out;
+ }
+ rc = gnttab_donate(uop, count);
+ break;
default:
rc = -ENOSYS;
break;
@@ -1066,6 +1213,10 @@
}
sha->frame = __mfn_to_gpfn(rd, frame);
sha->domid = rd->domain_id;
+#ifdef GRANT_DEBUG
+ printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n",
+ ref, frame, pfn, sha->frame);
+#endif
wmb();
sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
@@ -220,6 +220,19 @@
s16 status; /* 2: GNTST_* */
} PACKED gnttab_dump_table_t; /* 4 bytes */
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate 4
+typedef struct {
+ memory_t mfn; /* 0 */
+ domid_t domid; /* 4 */
+ u16 handle; /* 8 */
+ s16 status; /* 10: GNTST_* */
+ u32 __pad;
+} PACKED gnttab_donate_t; /* 14 bytes */
/*
* Bitfield values for update_pin_status.flags.
@@ -273,6 +286,7 @@
gnttab_unmap_grant_ref_t unmap_grant_ref;
gnttab_setup_table_t setup_table;
gnttab_dump_table_t dump_table;
+ gnttab_donate_t donate;
u8 __dummy[24];
} PACKED u;
} PACKED gnttab_op_t; /* 32 bytes */
diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
@@ -25,10 +25,13 @@
typedef struct {
u16 id; /* 0: Echoed in response message. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_ref_t gref; /* 2: Reference to incoming granted frame */
+#endif
} PACKED netif_rx_request_t; /* 2 bytes */
typedef struct {
- memory_t addr; /* 0: Machine address of packet. */
+ u32 addr; /* 0: Offset in page of start of received packet */
MEMORY_PADDING;
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15; /* 8: */
next prev parent reply other threads:[~2005-07-01 10:08 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-07-01 2:28 netif & grant tables Matt Chapman
2005-07-01 10:08 ` Vincent Hanquez [this message]
2005-07-01 20:29 ` Stefan Berger
2005-07-01 20:40 ` Matt Chapman
2005-07-01 21:07 ` Stefan Berger
2005-07-01 22:19 ` Matt Chapman
2005-07-02 1:56 ` Mark Williamson
2005-07-02 3:15 ` Stefan Berger
2005-07-02 15:34 ` Mark Williamson
2005-07-03 22:12 ` Stefan Berger
[not found] <200507040226.18182.mark.williamson@cl.cam.ac.uk>
2005-07-04 18:43 ` Stefan Berger
2005-07-04 18:44 ` Keir Fraser
2005-07-04 19:11 ` Mark A. Williamson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050701100842.GA23792@snarc.org \
--to=vincent.hanquez@cl.cam.ac.uk \
--cc=matthewc@hp.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.