* netif & grant tables
@ 2005-07-01 2:28 Matt Chapman
2005-07-01 10:08 ` Vincent Hanquez
2005-07-01 20:29 ` Stefan Berger
0 siblings, 2 replies; 13+ messages in thread
From: Matt Chapman @ 2005-07-01 2:28 UTC (permalink / raw)
To: xen-devel
Hi,
I'm currently looking at getting domU networking working
on IA64, and to do this I need to make netback/netfront
use grant tables.
I'm told that there's already a patch floating around,
can someone tell me where to find it?
Matt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 2:28 netif & grant tables Matt Chapman
@ 2005-07-01 10:08 ` Vincent Hanquez
2005-07-01 20:29 ` Stefan Berger
1 sibling, 0 replies; 13+ messages in thread
From: Vincent Hanquez @ 2005-07-01 10:08 UTC (permalink / raw)
To: Matt Chapman; +Cc: xen-devel
On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote:
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
>
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?
Hi Matt,
Here is the patch for having grant tables with netback and netfront.
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00
@@ -97,6 +97,20 @@
dedicated device-driver domain, or your master control domain
(domain 0), then you almost certainly want to say Y here.
+config XEN_NETDEV_GRANT_TX
+ bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+ bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend network drivers.
+
config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
bool "Pipelined transmitter (DANGEROUS)"
depends on XEN_NETDEV_FRONTEND
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00
@@ -165,8 +165,14 @@
u16 flags;
flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ /*
+ * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+ * if gnttab_donate executes without interruption???
+ */
+#else
ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
/*
* If a transfer is committed then wait for the frame address to appear.
* Otherwise invalidate the grant entry against future use.
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00
@@ -50,6 +50,9 @@
/* Private indexes into shared ring. */
NETIF_RING_IDX rx_req_cons;
NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
NETIF_RING_IDX tx_req_cons;
NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00
@@ -18,6 +18,24 @@
#include <linux/delay.h>
#endif
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
static void make_tx_response(netif_t *netif,
@@ -41,7 +59,9 @@
static struct sk_buff_head rx_queue;
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,21 @@
static PEND_RING_IDX dealloc_prod, dealloc_cons;
static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static gnttab_op_t grant_tx_op[MAX_PENDING_REQS];
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_op_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
static struct list_head net_schedule_list;
static spinlock_t net_schedule_list_lock;
@@ -91,6 +125,7 @@
return mfn;
}
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
static void free_mfn(unsigned long mfn)
{
unsigned long flags;
@@ -102,6 +137,7 @@
BUG();
spin_unlock_irqrestore(&mfn_lock, flags);
}
+#endif
static inline void maybe_schedule_tx_action(void)
{
@@ -160,7 +196,17 @@
dev_kfree_skb(skb);
skb = nskb;
}
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+ printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
+ netif->rx->req_prod,
+ netif->rx_req_cons,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+ netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
netif->rx_req_cons++;
netif_get(netif);
@@ -201,7 +247,11 @@
u16 size, id, evtchn;
multicall_entry_t *mcl;
mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_op_t *gop;
+#else
struct mmuext_op *mmuext;
+#endif
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -212,7 +262,12 @@
mcl = rx_mcl;
mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
+
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -228,7 +283,6 @@
skb_queue_head(&rx_queue, skb);
break;
}
-
/*
* Set the new P2M table entry before reassigning the old data page.
* Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -241,6 +295,14 @@
mcl->args[2] = 0;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gop->u.donate.mfn = mdata >> PAGE_SHIFT;
+ gop->u.donate.domid = netif->domid;
+ gop->u.donate.handle = netif->rx->ring[
+ MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+ netif->rx_resp_prod_copy++;
+ gop++;
+#else
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)mmuext;
mcl->args[1] = 1;
@@ -251,13 +313,16 @@
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
mmuext->mfn = mdata >> PAGE_SHIFT;
mmuext++;
-
+#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(vdata) >> PAGE_SHIFT;
mmu++;
__skb_queue_tail(&rxq, skb);
+#ifdef DEBUG_GRANT
+ dump_packet('a', mdata, vdata);
+#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
break;
@@ -273,12 +338,24 @@
mcl->args[3] = DOMID_SELF;
mcl++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+ grant_rx_op, gop - grant_rx_op))) {
+ BUG();
+ }
+ gop = grant_rx_op;
+#else
mmuext = rx_mmuext;
+#endif
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -286,9 +363,12 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
-
+#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -302,13 +382,18 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (unlikely(gop->u.donate.status != 0)) {
+ BUG();
+ }
+#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
status = NETIF_RSP_ERROR;
}
-
+#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
if ( make_rx_response(netif, id, status, mdata,
@@ -321,9 +406,13 @@
netif_put(netif);
dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl++;
+ gop++;
+#else
mcl += 2;
mmuext += 1;
+#endif
}
while ( notify_nr != 0 )
@@ -407,6 +496,7 @@
netif_schedule_work(netif);
}
+/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
struct list_head *ent;
@@ -415,13 +505,36 @@
netif_tx_request_t txreq;
u16 pending_idx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_op_t *gop;
+#else
multicall_entry_t *mcl;
+#endif
PEND_RING_IDX dc, dp;
unsigned int data_len;
+
if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
goto skip_dealloc;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ /*
+ * Free up any grants we have finished using
+ */
+ gop = grant_tx_op;
+ while (dc != dp) {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+ gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.unmap_grant_ref.dev_bus_addr = 0;
+ gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx];
+ grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+ gop++;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+#else
mcl = tx_mcl;
while ( dc != dp )
{
@@ -438,11 +551,14 @@
BUG();
mcl = tx_mcl;
+#endif
while ( dealloc_cons != dp )
{
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
/* The update_va_mapping() must not fail. */
if ( unlikely(mcl[0].result != 0) )
BUG();
+#endif
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -466,11 +582,17 @@
netif_put(netif);
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
mcl++;
+#endif
}
skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop = grant_tx_op;
+#else
mcl = tx_mcl;
+#endif
while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list) )
{
@@ -492,7 +614,6 @@
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-
/* Credit-based scheduling. */
if ( txreq.size > netif->remaining_credit )
{
@@ -572,13 +693,20 @@
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx);
+ gop->u.map_grant_ref.dom = netif->domid;
+ gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT;
+ gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly;
+ gop++;
+#else
mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
mcl[0].args[0] = MMAP_VADDR(pending_idx);
mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
mcl[0].args[2] = 0;
mcl[0].args[3] = netif->domid;
mcl++;
+#endif
memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
pending_tx_info[pending_idx].netif = netif;
@@ -588,11 +716,26 @@
pending_cons++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op))
+ break;
+#else
/* Filled the batch queue? */
if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
break;
+#endif
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gop == grant_tx_op) {
+ return;
+ }
+ if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ grant_tx_op, gop - grant_tx_op))) {
+ BUG();
+ }
+ gop = grant_tx_op;
+#else
if ( mcl == tx_mcl )
return;
@@ -600,6 +743,7 @@
BUG();
mcl = tx_mcl;
+#endif
while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
{
pending_idx = *((u16 *)skb->data);
@@ -607,6 +751,20 @@
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) {
+ printk(KERN_ALERT "#### netback grant fails\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ gop++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr);
+ grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle;
+#else
if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
@@ -620,6 +778,7 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
@@ -627,7 +786,6 @@
memcpy(skb->data,
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
data_len);
-
if ( data_len < txreq.size )
{
/* Append the packet payload as a fragment. */
@@ -661,7 +819,11 @@
netif_rx(skb);
netif->dev->last_rx = jiffies;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gop++;
+#else
mcl++;
+#endif
}
}
@@ -781,6 +943,12 @@
return 0;
printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ printk("#### netback rx using grant tables\n");
+#endif
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00
@@ -54,6 +54,25 @@
#include <asm/page.h>
#include <asm/uaccess.h>
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+ unsigned char *p = (unsigned char *)ap;
+ int i;
+
+ printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
+ for (i = 0; i < 20; i++) {
+ printk("%02x", p[i]);
+ }
+ printk("\n");
+}
+#endif
+#endif
+
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
#endif
@@ -82,6 +101,21 @@
#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
+
static void network_tx_buf_gc(struct net_device *dev);
static void network_alloc_rx_buffers(struct net_device *dev);
@@ -322,6 +356,14 @@
for (i = np->tx_resp_cons; i != prod; i++) {
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+ printk(KERN_ALERT "netfront: query foreign access\n");
+ }
+ gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+ grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
ADD_ID_TO_FREELIST(np->tx_skbs, id);
dev_kfree_skb_irq(skb);
}
@@ -356,6 +398,9 @@
struct sk_buff *skb;
int i, batch_target;
NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ int ref;
+#endif
if (unlikely(np->backend_state != BEST_CONNECTED))
return;
@@ -388,7 +433,16 @@
np->rx_skbs[id] = skb;
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if ((ref = gnttab_claim_grant_reference(&gref_rx_head, gref_rx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+ BUG();
+ }
+ grant_rx_ref[id] = ref;
+ gnttab_grant_foreign_transfer_ref(ref, rdomid,
+ virt_to_machine(skb->head) >> PAGE_SHIFT);
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
/* Remove this page from pseudo phys map before passing back to Xen. */
@@ -438,6 +492,10 @@
struct net_private *np = netdev_priv(dev);
netif_tx_request_t *tx;
NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ unsigned int ref;
+ unsigned long mfn;
+#endif
if (unlikely(np->tx_full)) {
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -472,7 +530,18 @@
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
tx->id = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) < 0) {
+ printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+ BUG();
+ }
+ mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+ tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ grant_tx_ref[id] = ref;
+#else
tx->addr = virt_to_machine(skb->data);
+#endif
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -532,6 +601,10 @@
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ unsigned long mfn;
+ grant_ref_t ref;
+#endif
spin_lock(&np->rx_lock);
@@ -544,7 +617,6 @@
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
-
rp = np->rx->resp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -552,7 +624,6 @@
(i != rp) && (work_done < budget);
i++, work_done++) {
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
@@ -567,11 +638,23 @@
continue;
}
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ ref = grant_rx_ref[rx->id];
+ grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+ mfn = gnttab_end_foreign_transfer(ref);
+ gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
skb = np->rx_skbs[rx->id];
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
/* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ skb->data = skb->head + rx->addr;
+#else
skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
skb->len = rx->status;
skb->tail = skb->data + skb->len;
@@ -582,18 +665,33 @@
np->stats.rx_bytes += rx->status;
/* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = (unsigned long)skb->head;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+#else
mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+#endif
mcl->args[2] = 0;
mcl++;
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ mfn;
+#else
rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x ref=%04x\n",
+ skb->data, mfn, ref);
+#endif
__skb_queue_tail(&rxq, skb);
}
@@ -612,6 +710,11 @@
}
while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+ printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n",
+ skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
/*
* Enough room in skbuff for the data we were passed? Also, Linux
* expects at least 16 bytes headroom in each receive buffer.
@@ -620,6 +723,7 @@
unlikely((skb->data - skb->head) < 16)) {
nskb = NULL;
+
/* Only copy the packet if it fits in the current MTU. */
if (skb->len <= (dev->mtu + ETH_HLEN)) {
if ((skb->tail > skb->end) && net_ratelimit())
@@ -650,7 +754,6 @@
/* Set the shared-info area, which is hidden behind the real data. */
init_skb_shinfo(skb);
-
/* Ethernet-specific work. Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
@@ -923,6 +1026,9 @@
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+ rdomid = status->domid;
+#endif
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
netctrl_connected_count();
(void)send_fake_arp(dev);
@@ -966,10 +1072,18 @@
np->rx_max_target = RX_MAX_TARGET;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)(i+1);
- for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
+ for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
np->rx_skbs[i] = (void *)(i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+ }
dev->open = network_open;
dev->hard_start_xmit = network_start_xmit;
@@ -1271,6 +1385,22 @@
if (xen_start_info.flags & SIF_INITDOMAIN)
return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+ &gref_tx_head, &gref_tx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+ &gref_rx_head, &gref_rx_terminal) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ return 1;
+ }
+ printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
if ((err = xennet_proc_init()) != 0)
return err;
@@ -1290,6 +1420,16 @@
return err;
}
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
+}
+
static void vif_suspend(struct net_private *np)
{
/* Avoid having tx/rx stuff happen until we're ready. */
@@ -1482,3 +1622,4 @@
#endif
module_init(netif_init);
+module_exit(netif_exit);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
+++ b/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00
@@ -797,6 +797,146 @@
}
#endif
+static long
+gnttab_donate(gnttab_op_t *uop, unsigned int count)
+{
+ struct domain *d = current->domain;
+ struct domain *e;
+ struct pfn_info *page;
+ u32 _d, _nd, x, y;
+ int i;
+ int result = GNTST_okay;
+
+ for (i = 0; i < count; i++) {
+ gnttab_donate_t *gop = &uop[i].u.donate;
+#if GRANT_DEBUG
+ printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+ i, gop->mfn, gop->domid, gop->handle);
+#endif
+ page = &frame_table[gop->mfn];
+
+ if (unlikely(IS_XEN_HEAP_FRAME(page))) {
+ printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+ printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn);
+ gop->status = GNTST_bad_virt_addr;
+ continue;
+ }
+ if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+ printk("gnttab_donate: can't find domain %d\n", gop->domid);
+ gop->status = GNTST_bad_domain;
+ continue;
+ }
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while
+ * there is just one benign reference to the page
+ * (PGC_allocated). If that reference disappears then the
+ * deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+ printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", page_to_pfn(page),
+ d, d->id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ spin_unlock(&d->page_alloc_lock);
+ put_domain(e);
+ return 0;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %2"
+ : "=d" (_nd), "=a" (y),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+ } while (unlikely(_nd != _d) || unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now
+ * anonymous), so noone else is spinning to try to delete
+ * this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ spin_lock(&e->page_alloc_lock);
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated
+ * pages when it is dying.
+ */
+#ifdef GRANT_DEBUG
+ if (unlikely(e->tot_pages >= e->max_pages)) {
+ printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+ e->tot_pages, e->max_pages);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(test_bit(DF_DYING, &e->d_flags))) {
+ printk("gnttab_donate: target domain is dying\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+ if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#else
+ ASSERT(e->tot_pages <= e->max_pages);
+ if (unlikely(test_bit(DF_DYING, &e->d_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+ printk("gnttab_donate: Transferee has no reservation headroom (%d,%d), or "
+ "provided a bad grant ref (%08x), or is dying (%p).\n",
+ e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+ spin_unlock(&e->page_alloc_lock);
+ put_domain(e);
+ result = GNTST_general_error;
+ break;
+ }
+#endif
+ /* Okay, add the page to 'e'. */
+ if (unlikely(e->tot_pages++ == 0)) {
+ get_knownalive_domain(e);
+ }
+ list_add_tail(&page->list, &e->page_list);
+ page_set_owner(page, e);
+
+ spin_unlock(&e->page_alloc_lock);
+
+ /*
+ * Transfer is all done: tell the guest about its new page
+ * frame.
+ */
+ gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+
+ put_domain(e);
+
+ gop->status = GNTST_okay;
+ }
+ return result;
+}
+
long
do_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
@@ -831,6 +971,13 @@
rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
break;
#endif
+ case GNTTABOP_donate:
+ if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count,
+ sizeof(gnttab_op_t)))) {
+ goto out;
+ }
+ rc = gnttab_donate(uop, count);
+ break;
default:
rc = -ENOSYS;
break;
@@ -1066,6 +1213,10 @@
}
sha->frame = __mfn_to_gpfn(rd, frame);
sha->domid = rd->domain_id;
+#ifdef GRANT_DEBUG
+ printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n",
+ ref, frame, pfn, sha->frame);
+#endif
wmb();
sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00
@@ -220,6 +220,19 @@
s16 status; /* 2: GNTST_* */
} PACKED gnttab_dump_table_t; /* 4 bytes */
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate 4
+typedef struct {
+ memory_t mfn; /* 0 */
+ domid_t domid; /* 4 */
+ u16 handle; /* 8 */
+ s16 status; /* 10: GNTST_* */
+ u32 __pad;
+} PACKED gnttab_donate_t; /* 14 bytes */
/*
* Bitfield values for update_pin_status.flags.
@@ -273,6 +286,7 @@
gnttab_unmap_grant_ref_t unmap_grant_ref;
gnttab_setup_table_t setup_table;
gnttab_dump_table_t dump_table;
+ gnttab_donate_t donate;
u8 __dummy[24];
} PACKED u;
} PACKED gnttab_op_t; /* 32 bytes */
diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
+++ b/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00
@@ -25,10 +25,13 @@
typedef struct {
u16 id; /* 0: Echoed in response message. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+ grant_ref_t gref; /* 2: Reference to incoming granted frame */
+#endif
} PACKED netif_rx_request_t; /* 2 bytes */
typedef struct {
- memory_t addr; /* 0: Machine address of packet. */
+ u32 addr; /* 0: Offset in page of start of received packet */
MEMORY_PADDING;
u16 csum_valid:1; /* Protocol checksum is validated? */
u16 id:15; /* 8: */
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 2:28 netif & grant tables Matt Chapman
2005-07-01 10:08 ` Vincent Hanquez
@ 2005-07-01 20:29 ` Stefan Berger
2005-07-01 20:40 ` Matt Chapman
1 sibling, 1 reply; 13+ messages in thread
From: Stefan Berger @ 2005-07-01 20:29 UTC (permalink / raw)
To: Matt Chapman; +Cc: xen-devel
xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:
> Hi,
>
> I'm currently looking at getting domU networking working
> on IA64, and to do this I need to make netback/netfront
> use grant tables.
You *probably* won't get this to work right out of the box. On i386 it
fails due to dom Us not becoming privileged and so a check like IS_PRIV()
fails in xen/common/grant_table.c line 692 and probably somewhere else
also. The question is how this should be fixed. Should the HV call to
create a domain receive an additional parameter including flags that
should be set in a domain, such as for example the _DOMF_privileged?
Currently this flag only seems to be set in one place for dom 0.
The quick fix is:
add
set_bit(_DOMF_privileged, &d->domain_flags)
before the 'return d' in do_createdomain() in xen/common/domain.c -> it
will make all domains privileged
To compile the backends into a domU I had to activate
CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the .config
file of the dom U kernel for having alloc_empty_lowmem_region() compiled
into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to
alloc_empty_lowmem_region() necessary or would another memory allocation
routine work as well. All the backends seem to use it, though.
Stefan
>
> I'm told that there's already a patch floating around,
> can someone tell me where to find it?
>
> Matt
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 20:29 ` Stefan Berger
@ 2005-07-01 20:40 ` Matt Chapman
2005-07-01 21:07 ` Stefan Berger
0 siblings, 1 reply; 13+ messages in thread
From: Matt Chapman @ 2005-07-01 20:40 UTC (permalink / raw)
To: Stefan Berger; +Cc: xen-devel
Hi Stefan,
I'm not trying to run the backend or any real drivers in domU,
only the frontend.
Matt
On Fri, Jul 01, 2005 at 04:29:20PM -0400, Stefan Berger wrote:
> xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:
>
> > Hi,
> >
> > I'm currently looking at getting domU networking working
> > on IA64, and to do this I need to make netback/netfront
> > use grant tables.
>
> You *probably* won't get this to work right out of the box. On i386 it
> fails due to dom Us not becoming privileged and so a check like IS_PRIV()
> fails in xen/common/grant_table.c line 692 and probably somewhere else
> also. The question is how this should be fixed. Should the HV call to
> create a domain receive an additional parameter including flags that
> should be set in a domain, such as for example the _DOMF_privileged?
> Currently this flag only seems to be set in one place for dom 0.
>
> The quick fix is:
> add
> set_bit(_DOMF_privileged, &d->domain_flags)
>
> before the 'return d' in do_createdomain() in xen/common/domain.c -> it
> will make all domains privileged
>
> To compile the backends into a domU I had to activate
> CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the .config
> file of the dom U kernel for having alloc_empty_lowmem_region() compiled
> into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to
> alloc_empty_lowmem_region() necessary or would another memory allocation
> routine work as well. All the backends seem to use it, though.
>
> Stefan
>
> >
> > I'm told that there's already a patch floating around,
> > can someone tell me where to find it?
> >
> > Matt
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xensource.com
> > http://lists.xensource.com/xen-devel
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 20:40 ` Matt Chapman
@ 2005-07-01 21:07 ` Stefan Berger
2005-07-01 22:19 ` Matt Chapman
2005-07-02 1:56 ` Mark Williamson
0 siblings, 2 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-01 21:07 UTC (permalink / raw)
To: Matt Chapman; +Cc: xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 2608 bytes --]
xen-devel-bounces@lists.xensource.com wrote on 07/01/2005 03:40:31 PM:
> Hi Stefan,
>
> I'm not trying to run the backend or any real drivers in domU,
> only the frontend.
Just curious: On IA64 you must have grant tables to get networking
working?
If someone has the matching problem for my solution, then let me know. :-)
Otherwise I think the problem of making domains privileged should really
be solved - probably starting somewhere in XEN-D.
Stefan
>
> Matt
>
>
> On Fri, Jul 01, 2005 at 04:29:20PM -0400, Stefan Berger wrote:
> > xen-devel-bounces@lists.xensource.com wrote on 06/30/2005 10:28:54 PM:
> >
> > > Hi,
> > >
> > > I'm currently looking at getting domU networking working
> > > on IA64, and to do this I need to make netback/netfront
> > > use grant tables.
> >
> > You *probably* won't get this to work right out of the box. On i386 it
> > fails due to dom Us not becoming privileged and so a check like
IS_PRIV()
> > fails in xen/common/grant_table.c line 692 and probably somewhere else
> > also. The question is how this should be fixed. Should the HV call to
> > create a domain receive an additional parameter including flags that
> > should be set in a domain, such as for example the _DOMF_privileged?
> > Currently this flag only seems to be set in one place for dom 0.
> >
> > The quick fix is:
> > add
> > set_bit(_DOMF_privileged, &d->domain_flags)
> >
> > before the 'return d' in do_createdomain() in xen/common/domain.c ->
it
> > will make all domains privileged
> >
> > To compile the backends into a domU I had to activate
> > CONFIG_XEN_PRIVILGED_GUEST and CONFIG_XEN_PHYSDEV_ACCESS in the
.config
> > file of the dom U kernel for having alloc_empty_lowmem_region()
compiled
> > into th kernel (arch/xen/i386/mm/hypervisor.c). Is this call to
> > alloc_empty_lowmem_region() necessary or would another memory
allocation
> > routine work as well. All the backends seem to use it, though.
> >
> > Stefan
> >
> > >
> > > I'm told that there's already a patch floating around,
> > > can someone tell me where to find it?
> > >
> > > Matt
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xensource.com
> > > http://lists.xensource.com/xen-devel
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xensource.com
> > http://lists.xensource.com/xen-devel
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 3497 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 21:07 ` Stefan Berger
@ 2005-07-01 22:19 ` Matt Chapman
2005-07-02 1:56 ` Mark Williamson
1 sibling, 0 replies; 13+ messages in thread
From: Matt Chapman @ 2005-07-01 22:19 UTC (permalink / raw)
To: Stefan Berger; +Cc: xen-devel
On Fri, Jul 01, 2005 at 04:07:05PM -0500, Stefan Berger wrote:
> xen-devel-bounces@lists.xensource.com wrote on 07/01/2005 03:40:31 PM:
>
> > Hi Stefan,
> >
> > I'm not trying to run the backend or any real drivers in domU,
> > only the frontend.
>
> Just curious: On IA64 you must have grant tables to get networking
> working?
Well, on IA64 domU doesn't know about real machine addresses (ala
shadow mode on x86). The grant table mechanism is the easiest way
to enable sharing of pages - grant references provide a common
namespace for referring to a page.
Matt
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-01 21:07 ` Stefan Berger
2005-07-01 22:19 ` Matt Chapman
@ 2005-07-02 1:56 ` Mark Williamson
2005-07-02 3:15 ` Stefan Berger
1 sibling, 1 reply; 13+ messages in thread
From: Mark Williamson @ 2005-07-02 1:56 UTC (permalink / raw)
To: xen-devel; +Cc: Stefan Berger, Matt Chapman
> If someone has the matching problem for my solution, then let me know. :-)
> Otherwise I think the problem of making domains privileged should really
> be solved - probably starting somewhere in XEN-D.
There should probably be a flag you pass down from the config. The current
hack people use is to give the domain access to a PCI device but not compile
in the drivers. Driver domains are privileged at the moment, so it works :-S
With full grant tables support, full privilege is not necessary, just a grant
from the other party. That's probably the nicest long term solution and can
also hook in with a suitable IO-TLB to provide protection against rogue DMAs.
Cheers,
Mark
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-02 1:56 ` Mark Williamson
@ 2005-07-02 3:15 ` Stefan Berger
2005-07-02 15:34 ` Mark Williamson
0 siblings, 1 reply; 13+ messages in thread
From: Stefan Berger @ 2005-07-02 3:15 UTC (permalink / raw)
To: Mark Williamson; +Cc: xen-devel, Matt Chapman
Mark Williamson <mark.williamson@cl.cam.ac.uk> wrote on 07/01/2005
09:56:26 PM:
> > If someone has the matching problem for my solution, then let me know.
:-)
> > Otherwise I think the problem of making domains privileged should
really
> > be solved - probably starting somewhere in XEN-D.
>
> There should probably be a flag you pass down from the config. The
current
It could be done implicitly, meaning that if you give a domain a backend
(netif/blkif), that privilege flag will automatically be set by XEN-D and
used when creating the domain, or explicitly where one specifies the
flag(s) to set in the VM config file.
> hack people use is to give the domain access to a PCI device but not
compile
> in the drivers. Driver domains are privileged at the moment, so it
works :-S
>From what I can see this does not work anymore - I used to do that also.
Passing a PCI device to a partition results in an error since the
xc_physdev_pci_access_modify call ends in an error.
>
> With full grant tables support, full privilege is not necessary, just a
grant
> from the other party. That's probably the nicest long term solution and
can
> also hook in with a suitable IO-TLB to provide protection against rogue
DMAs.
I am not sure how 'privilege' is defined. The privilege does so far not
only mean to do dom 0 ops, but seems to also limit guest domains of doing
other things - like the backend problem I see. I agree, though, that for
grant table support a backend should not need privileges.
>
> Cheers,
> Mark
Cheers,
Stefan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-02 3:15 ` Stefan Berger
@ 2005-07-02 15:34 ` Mark Williamson
2005-07-03 22:12 ` Stefan Berger
0 siblings, 1 reply; 13+ messages in thread
From: Mark Williamson @ 2005-07-02 15:34 UTC (permalink / raw)
To: Stefan Berger; +Cc: xen-devel, Matt Chapman
> It could be done implicitly, meaning that if you give a domain a backend
> (netif/blkif), that privilege flag will automatically be set by XEN-D and
> used when creating the domain, or explicitly where one specifies the
> flag(s) to set in the VM config file.
Doing it implicitly would probably be sensible.
> From what I can see this does not work anymore - I used to do that also.
> Passing a PCI device to a partition results in an error since the
> xc_physdev_pci_access_modify call ends in an error.
Assigning PCI devices is broken in unstable at the moment. It'll be coming
back at some stage.
> I am not sure how 'privilege' is defined.
Very coarsely at present: IIRC right now domain who's got access to a PCI
device is as privileged as dom0. This means they're allowed to map memory of
other domains, do dom0 ops, etc.
Grant tables will enable us to deprivilege guests somewhat, then we'll split
privileges down into more fine-grained capabilities.
Cheers,
Mark
> The privilege does so far not
> only mean to do dom 0 ops, but seems to also limit guest domains of doing
> other things - like the backend problem I see. I agree, though, that for
> grant table support a backend should not need privileges.
>
> > Cheers,
> > Mark
>
> Cheers,
> Stefan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-02 15:34 ` Mark Williamson
@ 2005-07-03 22:12 ` Stefan Berger
0 siblings, 0 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-03 22:12 UTC (permalink / raw)
To: Mark Williamson; +Cc: xen-devel, Matt Chapman
xen-devel-bounces@lists.xensource.com wrote on 07/02/2005 11:34:58 AM:
> > It could be done implicitly, meaning that if you give a domain a
backend
> > (netif/blkif), that privilege flag will automatically be set by XEN-D
and
> > used when creating the domain, or explicitly where one specifies the
> > flag(s) to set in the VM config file.
>
> Doing it implicitly would probably be sensible.
>
> > From what I can see this does not work anymore - I used to do that
also.
> > Passing a PCI device to a partition results in an error since the
> > xc_physdev_pci_access_modify call ends in an error.
>
> Assigning PCI devices is broken in unstable at the moment. It'll be
coming
> back at some stage.
>
> > I am not sure how 'privilege' is defined.
>
> Very coarsely at present: IIRC right now domain who's got access to a
PCI
> device is as privileged as dom0. This means they're allowed to map
memory of
> other domains, do dom0 ops, etc.
>
> Grant tables will enable us to deprivilege guests somewhat, then we'll
split
> privileges down into more fine-grained capabilities.
>
Setting the privileged bit in a user domain gets grant tables to work:
should this bit be set for those kind of domains or rather the IS_PRIV()
test be removed from the call path which basically would allow all user
domains to do mapping by default?
Stefan
> Cheers,
> Mark
>
> > The privilege does so far not
> > only mean to do dom 0 ops, but seems to also limit guest domains of
doing
> > other things - like the backend problem I see. I agree, though, that
for
> > grant table support a backend should not need privileges.
> >
> > > Cheers,
> > > Mark
> >
> > Cheers,
> > Stefan
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
[not found] <200507040226.18182.mark.williamson@cl.cam.ac.uk>
@ 2005-07-04 18:43 ` Stefan Berger
2005-07-04 18:44 ` Keir Fraser
2005-07-04 19:11 ` Mark A. Williamson
0 siblings, 2 replies; 13+ messages in thread
From: Stefan Berger @ 2005-07-04 18:43 UTC (permalink / raw)
To: Mark Williamson, xen-devel
Mark Williamson <mark.williamson@cl.cam.ac.uk> wrote on 07/03/2005
09:26:18 PM:
> [off list]
>
> > Setting the privileged bit in a user domain gets grant tables to work:
> > should this bit be set for those kind of domains or rather the
IS_PRIV()
> > test be removed from the call path which basically would allow all
user
> > domains to do mapping by default?
>
> I'm not clear what you mean here - AFAIK grant tables don't require
IS_PRIV()
> in order to work. Or am I misunderstanding?
You are right, it's not the grant tables per se that need the privileged
bit to be set, but other functions need it and keep backends from working.
Here are two code paths from the network backend:
from netback/interface.c calls
linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c:direct_remap_area_pages()
calls
HYPERVISOR_mmu_update calls
xen/arch/x86/mm.c:do_mmu_update calls
set_foreigndom() which has an IS_PRIV() in the path
-> The direct_remap_area_pages call fails if a domain does not have the
privilege bit set.
netback/netback.c: alloc_mfn() calls
HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list,
MAX_MFN_ALLOC, 0);
xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a
IS_PRIV() in the path
Stefan
>
> What I meant was that since grant tables are an explicit capability (you
can
> only map a page of another dom if it gave you an explicit grant) there's
no
> need for mappings in the IO path to require special privileges at all.
If
> someone gave you a grant, they must trust you enough to access that
page.
>
> Cheers,
> Mark
>
> > Stefan
> >
> > > Cheers,
> > > Mark
> > >
> > > > The privilege does so far not
> > > > only mean to do dom 0 ops, but seems to also limit guest domains
of
> >
> > doing
> >
> > > > other things - like the backend problem I see. I agree, though,
that
> >
> > for
> >
> > > > grant table support a backend should not need privileges.
> > > >
> > > > > Cheers,
> > > > > Mark
> > > >
> > > > Cheers,
> > > > Stefan
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xensource.com
> > > http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-04 18:43 ` Stefan Berger
@ 2005-07-04 18:44 ` Keir Fraser
2005-07-04 19:11 ` Mark A. Williamson
1 sibling, 0 replies; 13+ messages in thread
From: Keir Fraser @ 2005-07-04 18:44 UTC (permalink / raw)
To: Stefan Berger; +Cc: xen-devel, Mark Williamson
On 4 Jul 2005, at 19:43, Stefan Berger wrote:
> linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c:
> direct_remap_area_pages()
> calls
> HYPERVISOR_mmu_update calls
> xen/arch/x86/mm.c:do_mmu_update calls
> set_foreigndom() which has an IS_PRIV() in the path
>
> -> The direct_remap_area_pages call fails if a domain does not have the
> privilege bit set.
The backend driver parts that use this function need cleaning up to use
a grant reference instead.
> netback/netback.c: alloc_mfn() calls
> HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list,
> MAX_MFN_ALLOC, 0);
> xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a
> IS_PRIV() in the path
It only disallows you from adjusting others' reservations. You can
still adjust your own.
-- Keir
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: netif & grant tables
2005-07-04 18:43 ` Stefan Berger
2005-07-04 18:44 ` Keir Fraser
@ 2005-07-04 19:11 ` Mark A. Williamson
1 sibling, 0 replies; 13+ messages in thread
From: Mark A. Williamson @ 2005-07-04 19:11 UTC (permalink / raw)
To: Stefan Berger; +Cc: xen-devel
> You are right, it's not the grant tables per se that need the privileged
> bit to be set, but other functions need it and keep backends from working.
>
> Here are two code paths from the network backend:
I suspected it might be something like this. There's no reason for those to
require privilege either: they can fairly trivially be converted to use grant
tables too. Once it's fully grant-table-ified it shouldn't be necessary to
make such domains be privileged.
Full grant tables support also a pre-req for the point to point "snappable
frontend" connections directly between domains with high bandwidth
requirements.
Cheers,
Mark
> from netback/interface.c calls
>
> linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c:direct_remap_area_pages(
>) calls
> HYPERVISOR_mmu_update calls
> xen/arch/x86/mm.c:do_mmu_update calls
> set_foreigndom() which has an IS_PRIV() in the path
>
> -> The direct_remap_area_pages call fails if a domain does not have the
> privilege bit set.
>
>
> netback/netback.c: alloc_mfn() calls
> HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, mfn_list,
> MAX_MFN_ALLOC, 0);
> xen/common/dom_mem_ops.c:do_dom_mem_op() is called which has a
> IS_PRIV() in the path
>
>
>
> Stefan
>
> > What I meant was that since grant tables are an explicit capability (you
>
> can
>
> > only map a page of another dom if it gave you an explicit grant) there's
>
> no
>
> > need for mappings in the IO path to require special privileges at all.
>
> If
>
> > someone gave you a grant, they must trust you enough to access that
>
> page.
>
> > Cheers,
> > Mark
> >
> > > Stefan
> > >
> > > > Cheers,
> > > > Mark
> > > >
> > > > > The privilege does so far not
> > > > > only mean to do dom 0 ops, but seems to also limit guest domains
>
> of
>
> > > doing
> > >
> > > > > other things - like the backend problem I see. I agree, though,
>
> that
>
> > > for
> > >
> > > > > grant table support a backend should not need privileges.
> > > > >
> > > > > > Cheers,
> > > > > > Mark
> > > > >
> > > > > Cheers,
> > > > > Stefan
> > > >
> > > > _______________________________________________
> > > > Xen-devel mailing list
> > > > Xen-devel@lists.xensource.com
> > > > http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2005-07-04 19:11 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-07-01 2:28 netif & grant tables Matt Chapman
2005-07-01 10:08 ` Vincent Hanquez
2005-07-01 20:29 ` Stefan Berger
2005-07-01 20:40 ` Matt Chapman
2005-07-01 21:07 ` Stefan Berger
2005-07-01 22:19 ` Matt Chapman
2005-07-02 1:56 ` Mark Williamson
2005-07-02 3:15 ` Stefan Berger
2005-07-02 15:34 ` Mark Williamson
2005-07-03 22:12 ` Stefan Berger
[not found] <200507040226.18182.mark.williamson@cl.cam.ac.uk>
2005-07-04 18:43 ` Stefan Berger
2005-07-04 18:44 ` Keir Fraser
2005-07-04 19:11 ` Mark A. Williamson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.