* [PATCH 2/4] xen/netback: Split one page pool into two(tx/rx) page pool.
From: Annie Li @ 2012-11-15 7:04 UTC (permalink / raw)
To: xen-devel, netdev, konrad.wilk, Ian.Campbell; +Cc: annie.li
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
For tx path, this implementation simplifies the work of searching out
grant page from page pool based on grant reference.
Signed-off-by: Annie Li <annie.li@oracle.com>
---
drivers/net/xen-netback/common.h | 14 ++++++++++----
drivers/net/xen-netback/interface.c | 12 ++++++++----
drivers/net/xen-netback/netback.c | 15 +++++++++------
3 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index a85cac6..02c8573 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -47,8 +47,6 @@
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (XEN_NETIF_TX_RING_SIZE + XEN_NETIF_RX_RING_SIZE)
struct xen_netbk;
@@ -111,8 +109,16 @@ struct xenvif {
wait_queue_head_t waiting_to_free;
- struct persistent_entry *persistent_gnt[MAXIMUM_OUTSTANDING_BLOCK_REQS];
- unsigned int persistent_gntcnt;
+ struct persistent_entry *persistent_tx_gnt[XEN_NETIF_TX_RING_SIZE];
+
+ /*
+ * 2*XEN_NETIF_RX_RING_SIZE is for the case of each head/fragment page
+ * using 2 copy operations.
+ */
+ struct persistent_entry *persistent_rx_gnt[2*XEN_NETIF_RX_RING_SIZE];
+
+ unsigned int persistent_tx_gntcnt;
+ unsigned int persistent_rx_gntcnt;
};
static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 226d159..ecbe116 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -300,7 +300,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
return ERR_PTR(err);
}
- vif->persistent_gntcnt = 0;
+ vif->persistent_tx_gntcnt = 0;
+ vif->persistent_rx_gntcnt = 0;
netdev_dbg(dev, "Successfully created xenvif\n");
return vif;
@@ -385,9 +386,12 @@ void xenvif_disconnect(struct xenvif *vif)
unregister_netdev(vif->dev);
xen_netbk_unmap_frontend_rings(vif);
- if (vif->persistent_grant)
- xenvif_free_grants(vif->persistent_gnt,
- vif->persistent_gntcnt);
+ if (vif->persistent_grant) {
+ xenvif_free_grants(vif->persistent_tx_gnt,
+ vif->persistent_tx_gntcnt);
+ xenvif_free_grants(vif->persistent_rx_gnt,
+ vif->persistent_rx_gntcnt);
+ }
free_netdev(vif->dev);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a26d3fc..ec59c73 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -208,14 +208,17 @@ grant_memory_copy_op(unsigned int cmd, void *vuop, unsigned int count,
BUG_ON(cmd != GNTTABOP_copy);
for (i = 0; i < count; i++) {
- if (tx_pool)
+ if (tx_pool) {
vif = netbk->gnttab_tx_vif[i];
- else
+ gnt_count = &vif->persistent_tx_gntcnt;
+ gnt_total = XEN_NETIF_TX_RING_SIZE;
+ pers_entry = vif->persistent_tx_gnt;
+ } else {
vif = netbk->gnttab_rx_vif[i];
-
- pers_entry = vif->persistent_gnt;
- gnt_count = &vif->persistent_gntcnt;
- gnt_total = MAXIMUM_OUTSTANDING_BLOCK_REQS;
+ gnt_count = &vif->persistent_rx_gntcnt;
+ gnt_total = 2*XEN_NETIF_RX_RING_SIZE;
+ pers_entry = vif->persistent_rx_gnt;
+ }
if (vif->persistent_grant) {
void *saddr, *daddr;
--
1.7.3.4
^ permalink raw reply related
* [PATCH 3/4] Xen/netfront: Implement persistent grant in netfront.
From: Annie Li @ 2012-11-15 7:05 UTC (permalink / raw)
To: xen-devel, netdev, konrad.wilk, Ian.Campbell; +Cc: annie.li
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
Tx/rx page pool are maintained. New grant is mapped and put into
pool, unmap only happens when releasing/removing device.
Signed-off-by: Annie Li <annie.li@oracle.com>
---
drivers/net/xen-netfront.c | 372 +++++++++++++++++++++++++++++++++++++-------
1 files changed, 315 insertions(+), 57 deletions(-)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 0ebbb19..17b81c0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -79,6 +79,13 @@ struct netfront_stats {
struct u64_stats_sync syncp;
};
+struct gnt_list {
+ grant_ref_t gref;
+ struct page *gnt_pages;
+ void *gnt_target;
+ struct gnt_list *tail;
+};
+
struct netfront_info {
struct list_head list;
struct net_device *netdev;
@@ -109,6 +116,10 @@ struct netfront_info {
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
+ struct gnt_list *tx_grant[NET_TX_RING_SIZE];
+ struct gnt_list *tx_gnt_list;
+ unsigned int tx_gnt_cnt;
+
spinlock_t rx_lock ____cacheline_aligned_in_smp;
struct xen_netif_rx_front_ring rx;
int rx_ring_ref;
@@ -126,6 +137,10 @@ struct netfront_info {
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+ struct gnt_list *rx_grant[NET_RX_RING_SIZE];
+ struct gnt_list *rx_gnt_list;
+ unsigned int rx_gnt_cnt;
+
unsigned long rx_pfn_array[NET_RX_RING_SIZE];
struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
struct mmu_update rx_mmu[NET_RX_RING_SIZE];
@@ -134,6 +149,7 @@ struct netfront_info {
struct netfront_stats __percpu *stats;
unsigned long rx_gso_checksum_fixup;
+ u8 persistent_gnt:1;
};
struct netfront_rx_info {
@@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
return ref;
}
+static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ struct gnt_list *gntlist = np->rx_grant[i];
+ np->rx_grant[i] = NULL;
+
+ return gntlist;
+}
+
#ifdef CONFIG_SYSFS
static int xennet_sysfs_addif(struct net_device *netdev);
static void xennet_sysfs_delif(struct net_device *netdev);
@@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev)
netif_wake_queue(dev);
}
+static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev,
+ unsigned long mfn, void *vaddr,
+ unsigned int id,
+ grant_ref_t ref)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ grant_ref_t gnt_ref;
+ struct gnt_list *gnt_list_entry;
+
+ if (np->persistent_gnt && np->rx_gnt_cnt) {
+ gnt_list_entry = np->rx_gnt_list;
+ np->rx_gnt_list = np->rx_gnt_list->tail;
+ np->rx_gnt_cnt--;
+
+ gnt_list_entry->gnt_target = vaddr;
+ gnt_ref = gnt_list_entry->gref;
+ np->rx_grant[id] = gnt_list_entry;
+ } else {
+ struct page *page;
+
+ BUG_ON(!np->persistent_gnt && np->rx_gnt_cnt);
+ if (!ref)
+ gnt_ref =
+ gnttab_claim_grant_reference(&np->gref_rx_head);
+ else
+ gnt_ref = ref;
+ BUG_ON((signed short)gnt_ref < 0);
+
+ if (np->persistent_gnt) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ if (!ref)
+ gnttab_release_grant_reference(
+ &np->gref_rx_head, ref);
+ return -ENOMEM;
+ }
+ mfn = pfn_to_mfn(page_to_pfn(page));
+
+ gnt_list_entry = kmalloc(sizeof(struct gnt_list),
+ GFP_KERNEL);
+ if (!gnt_list_entry) {
+ __free_page(page);
+ if (!ref)
+ gnttab_release_grant_reference(
+ &np->gref_rx_head, ref);
+ return -ENOMEM;
+ }
+ gnt_list_entry->gref = gnt_ref;
+ gnt_list_entry->gnt_pages = page;
+ gnt_list_entry->gnt_target = vaddr;
+
+ np->rx_grant[id] = gnt_list_entry;
+ }
+
+ gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id,
+ mfn, 0);
+ }
+ np->grant_rx_ref[id] = gnt_ref;
+
+ return gnt_ref;
+}
+
static void xennet_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
@@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev)
int i, batch_target, notify;
RING_IDX req_prod = np->rx.req_prod_pvt;
grant_ref_t ref;
- unsigned long pfn;
- void *vaddr;
struct xen_netif_rx_request *req;
if (unlikely(!netif_carrier_ok(dev)))
@@ -306,19 +392,16 @@ no_skb:
BUG_ON(np->rx_skbs[id]);
np->rx_skbs[id] = skb;
- ref = gnttab_claim_grant_reference(&np->gref_rx_head);
- BUG_ON((signed short)ref < 0);
- np->grant_rx_ref[id] = ref;
+ page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
- pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
- vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
+ ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
+ page_address(page), id, 0);
+ if ((signed short)ref < 0) {
+ __skb_queue_tail(&np->rx_batch, skb);
+ break;
+ }
req = RING_GET_REQUEST(&np->rx, req_prod + i);
- gnttab_grant_foreign_access_ref(ref,
- np->xbdev->otherend_id,
- pfn_to_mfn(pfn),
- 0);
-
req->id = id;
req->gref = ref;
}
@@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev)
id = txrsp->id;
skb = np->tx_skbs[id].skb;
- if (unlikely(gnttab_query_foreign_access(
- np->grant_tx_ref[id]) != 0)) {
- printk(KERN_ALERT "xennet_tx_buf_gc: warning "
- "-- grant still in use by backend "
- "domain.\n");
- BUG();
+
+ if (np->persistent_gnt) {
+ struct gnt_list *gnt_list_entry;
+
+ gnt_list_entry = np->tx_grant[id];
+ BUG_ON(!gnt_list_entry);
+
+ gnt_list_entry->tail = np->tx_gnt_list;
+ np->tx_gnt_list = gnt_list_entry;
+ np->tx_gnt_cnt++;
+ } else {
+ if (unlikely(gnttab_query_foreign_access(
+ np->grant_tx_ref[id]) != 0)) {
+ printk(KERN_ALERT "xennet_tx_buf_gc: warning "
+ "-- grant still in use by backend "
+ "domain.\n");
+ BUG();
+ }
+
+ gnttab_end_foreign_access_ref(
+ np->grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, np->grant_tx_ref[id]);
}
- gnttab_end_foreign_access_ref(
- np->grant_tx_ref[id], GNTMAP_readonly);
- gnttab_release_grant_reference(
- &np->gref_tx_head, np->grant_tx_ref[id]);
np->grant_tx_ref[id] = GRANT_INVALID_REF;
add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
dev_kfree_skb_irq(skb);
@@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev)
xennet_maybe_wake_tx(dev);
}
+static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev,
+ unsigned long mfn,
+ unsigned int id)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ grant_ref_t ref;
+ struct page *granted_page;
+
+ if (np->persistent_gnt && np->tx_gnt_cnt) {
+ struct gnt_list *gnt_list_entry;
+
+ gnt_list_entry = np->tx_gnt_list;
+ np->tx_gnt_list = np->tx_gnt_list->tail;
+ np->tx_gnt_cnt--;
+
+ ref = gnt_list_entry->gref;
+ np->tx_grant[id] = gnt_list_entry;
+ } else {
+ struct gnt_list *gnt_list_entry;
+
+ BUG_ON(!np->persistent_gnt && np->tx_gnt_cnt);
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+
+ if (np->persistent_gnt) {
+ granted_page = alloc_page(GFP_KERNEL);
+ if (!granted_page) {
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, ref);
+ return -ENOMEM;
+ }
+
+ mfn = pfn_to_mfn(page_to_pfn(granted_page));
+ gnt_list_entry = kmalloc(sizeof(struct gnt_list),
+ GFP_KERNEL);
+ if (!gnt_list_entry) {
+ __free_page(granted_page);
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, ref);
+ return -ENOMEM;
+ }
+
+ gnt_list_entry->gref = ref;
+ gnt_list_entry->gnt_pages = granted_page;
+ np->tx_grant[id] = gnt_list_entry;
+ }
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+ mfn, 0);
+ }
+
+ return ref;
+}
+
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
struct xen_netif_tx_request *tx)
{
@@ -421,6 +570,9 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
unsigned int len = skb_headlen(skb);
unsigned int id;
grant_ref_t ref;
+ struct gnt_list *gnt_list_entry;
+ void *out_addr;
+ void *in_addr;
int i;
/* While the header overlaps a page boundary (including being
@@ -436,17 +588,19 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
np->tx_skbs[id].skb = skb_get(skb);
tx = RING_GET_REQUEST(&np->tx, prod++);
tx->id = id;
- ref = gnttab_claim_grant_reference(&np->gref_tx_head);
- BUG_ON((signed short)ref < 0);
-
mfn = virt_to_mfn(data);
- gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
- mfn, GNTMAP_readonly);
-
+ ref = xennet_alloc_tx_ref(dev, mfn, id);
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = offset;
tx->size = len;
tx->flags = 0;
+ if (np->persistent_gnt) {
+ gnt_list_entry = np->tx_grant[id];
+ out_addr = page_address(gnt_list_entry->gnt_pages);
+ in_addr = (void *)((unsigned long)data
+ & ~(PAGE_SIZE-1));
+ memcpy(out_addr, in_addr, PAGE_SIZE);
+ }
}
/* Grant backend access to each skb fragment page. */
@@ -459,17 +613,19 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
np->tx_skbs[id].skb = skb_get(skb);
tx = RING_GET_REQUEST(&np->tx, prod++);
tx->id = id;
- ref = gnttab_claim_grant_reference(&np->gref_tx_head);
- BUG_ON((signed short)ref < 0);
-
mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag)));
- gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
- mfn, GNTMAP_readonly);
-
+ ref = xennet_alloc_tx_ref(dev, mfn, id);
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = frag->page_offset;
tx->size = skb_frag_size(frag);
tx->flags = 0;
+ if (np->persistent_gnt) {
+ gnt_list_entry = np->tx_grant[id];
+ out_addr = page_address(gnt_list_entry->gnt_pages);
+ in_addr = (void *)((unsigned long)page_address(
+ skb_frag_page(frag)) & ~(PAGE_SIZE-1));
+ memcpy(out_addr, in_addr, PAGE_SIZE);
+ }
}
np->tx.req_prod_pvt = prod;
@@ -491,6 +647,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int offset = offset_in_page(data);
unsigned int len = skb_headlen(skb);
unsigned long flags;
+ struct gnt_list *gnt_list_entry;
+ void *out_addr;
+ void *in_addr;
frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
@@ -517,16 +676,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx = RING_GET_REQUEST(&np->tx, i);
tx->id = id;
- ref = gnttab_claim_grant_reference(&np->gref_tx_head);
- BUG_ON((signed short)ref < 0);
mfn = virt_to_mfn(data);
- gnttab_grant_foreign_access_ref(
- ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+ ref = xennet_alloc_tx_ref(dev, mfn, id);
tx->gref = np->grant_tx_ref[id] = ref;
tx->offset = offset;
tx->size = len;
extra = NULL;
+ if (np->persistent_gnt) {
+ gnt_list_entry = np->tx_grant[id];
+ out_addr = page_address(gnt_list_entry->gnt_pages);
+ in_addr = (void *)((unsigned long)data & ~(PAGE_SIZE-1));
+ memcpy(out_addr, in_addr, PAGE_SIZE);
+ }
+
tx->flags = 0;
if (skb->ip_summed == CHECKSUM_PARTIAL)
/* local packet? */
@@ -595,13 +758,17 @@ static int xennet_close(struct net_device *dev)
}
static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
- grant_ref_t ref)
+ grant_ref_t ref, RING_IDX cons)
{
int new = xennet_rxidx(np->rx.req_prod_pvt);
BUG_ON(np->rx_skbs[new]);
np->rx_skbs[new] = skb;
np->grant_rx_ref[new] = ref;
+
+ if (np->persistent_gnt)
+ np->rx_grant[new] = xennet_get_rx_grant(np, cons);
+
RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
np->rx.req_prod_pvt++;
@@ -644,7 +811,7 @@ static int xennet_get_extras(struct netfront_info *np,
skb = xennet_get_rx_skb(np, cons);
ref = xennet_get_rx_ref(np, cons);
- xennet_move_rx_slot(np, skb, ref);
+ xennet_move_rx_slot(np, skb, ref, cons);
} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
np->rx.rsp_cons = cons;
@@ -665,6 +832,12 @@ static int xennet_get_responses(struct netfront_info *np,
int frags = 1;
int err = 0;
unsigned long ret;
+ struct gnt_list *gnt_list_entry;
+
+ if (np->persistent_gnt) {
+ gnt_list_entry = xennet_get_rx_grant(np, cons);
+ BUG_ON(!gnt_list_entry);
+ }
if (rx->flags & XEN_NETRXF_extra_info) {
err = xennet_get_extras(np, extras, rp);
@@ -677,7 +850,7 @@ static int xennet_get_responses(struct netfront_info *np,
if (net_ratelimit())
dev_warn(dev, "rx->offset: %x, size: %u\n",
rx->offset, rx->status);
- xennet_move_rx_slot(np, skb, ref);
+ xennet_move_rx_slot(np, skb, ref, cons);
err = -EINVAL;
goto next;
}
@@ -695,11 +868,29 @@ static int xennet_get_responses(struct netfront_info *np,
goto next;
}
- ret = gnttab_end_foreign_access_ref(ref, 0);
- BUG_ON(!ret);
-
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
-
+ if (!np->persistent_gnt) {
+ ret = gnttab_end_foreign_access_ref(ref, 0);
+ BUG_ON(!ret);
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ } else {
+ struct page *grant_page;
+ void *grant_target;
+
+ grant_page = gnt_list_entry->gnt_pages;
+ grant_target = gnt_list_entry->gnt_target;
+ BUG_ON(grant_page == 0);
+ BUG_ON(grant_target == 0);
+
+ if (rx->status > 0)
+ memcpy(grant_target+rx->offset,
+ page_address(grant_page)+rx->offset,
+ rx->status); /* status encodes size */
+
+ gnt_list_entry->gref = ref;
+ gnt_list_entry->tail = np->rx_gnt_list;
+ np->rx_gnt_list = gnt_list_entry;
+ np->rx_gnt_cnt++;
+ }
__skb_queue_tail(list, skb);
next:
@@ -716,6 +907,10 @@ next:
rx = RING_GET_RESPONSE(&np->rx, cons + frags);
skb = xennet_get_rx_skb(np, cons + frags);
ref = xennet_get_rx_ref(np, cons + frags);
+ if (np->persistent_gnt) {
+ gnt_list_entry = xennet_get_rx_grant(np, cons + frags);
+ BUG_ON(!gnt_list_entry);
+ }
frags++;
}
@@ -1090,16 +1285,32 @@ static void xennet_release_tx_bufs(struct netfront_info *np)
struct sk_buff *skb;
int i;
+ if (np->persistent_gnt) {
+ struct gnt_list *gnt_list_entry;
+
+ while (np->tx_gnt_list) {
+ gnt_list_entry = np->tx_gnt_list;
+ np->tx_gnt_list = np->tx_gnt_list->tail;
+ gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ gnttab_release_grant_reference(&np->gref_tx_head,
+ gnt_list_entry->gref);
+
+ __free_page(gnt_list_entry->gnt_pages);
+ kfree(gnt_list_entry);
+ }
+ }
+
for (i = 0; i < NET_TX_RING_SIZE; i++) {
/* Skip over entries which are actually freelist references */
if (skb_entry_is_link(&np->tx_skbs[i]))
continue;
-
skb = np->tx_skbs[i].skb;
- gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
- GNTMAP_readonly);
- gnttab_release_grant_reference(&np->gref_tx_head,
- np->grant_tx_ref[i]);
+ if (!np->persistent_gnt) {
+ gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
+ GNTMAP_readonly);
+ gnttab_release_grant_reference(&np->gref_tx_head,
+ np->grant_tx_ref[i]);
+ }
np->grant_tx_ref[i] = GRANT_INVALID_REF;
add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
dev_kfree_skb_irq(skb);
@@ -1124,6 +1335,20 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
spin_lock_bh(&np->rx_lock);
+ if (np->persistent_gnt) {
+ struct gnt_list *gnt_list_entry;
+
+ while (np->rx_gnt_list) {
+ gnt_list_entry = np->rx_gnt_list;
+ np->rx_gnt_list = np->rx_gnt_list->tail;
+ gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ gnttab_release_grant_reference(&np->gref_rx_head,
+ gnt_list_entry->gref);
+ __free_page(gnt_list_entry->gnt_pages);
+ kfree(gnt_list_entry);
+ }
+ }
+
for (id = 0; id < NET_RX_RING_SIZE; id++) {
ref = np->grant_rx_ref[id];
if (ref == GRANT_INVALID_REF) {
@@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
}
skb = np->rx_skbs[id];
- mfn = gnttab_end_foreign_transfer_ref(ref);
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ if (!np->persistent_gnt) {
+ mfn = gnttab_end_foreign_transfer_ref(ref);
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ }
np->grant_rx_ref[id] = GRANT_INVALID_REF;
if (0 == mfn) {
@@ -1607,6 +1834,13 @@ again:
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants",
+ "%u", info->persistent_gnt);
+ if (err) {
+ message = "writing feature-persistent-grants";
+ xenbus_dev_fatal(dev, err, "%s", message);
+ }
+
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == -EAGAIN)
@@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev)
grant_ref_t ref;
struct xen_netif_rx_request *req;
unsigned int feature_rx_copy;
+ int ret, val;
err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
"feature-rx-copy", "%u", &feature_rx_copy);
@@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev)
return -ENODEV;
}
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-persistent-grants", "%u", &val);
+ if (err != 1)
+ val = 0;
+
+ np->persistent_gnt = !!val;
+
err = talk_to_netback(np->xbdev, np);
if (err)
return err;
@@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev)
spin_lock_bh(&np->rx_lock);
spin_lock_irq(&np->tx_lock);
+ np->tx_gnt_cnt = 0;
+ np->rx_gnt_cnt = 0;
+
/* Step 1: Discard all pending TX packet fragments. */
xennet_release_tx_bufs(np);
+ if (np->persistent_gnt) {
+ struct gnt_list *gnt_list_entry;
+
+ while (np->rx_gnt_list) {
+ gnt_list_entry = np->rx_gnt_list;
+ np->rx_gnt_list = np->rx_gnt_list->tail;
+ gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ __free_page(gnt_list_entry->gnt_pages);
+ kfree(gnt_list_entry);
+ }
+ }
+
/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
skb_frag_t *frag;
@@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev)
frag = &skb_shinfo(skb)->frags[0];
page = skb_frag_page(frag);
- gnttab_grant_foreign_access_ref(
- ref, np->xbdev->otherend_id,
- pfn_to_mfn(page_to_pfn(page)),
- 0);
+ ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
+ page_address(page), requeue_idx, ref);
+ if ((signed short)ret < 0)
+ break;
+
req->gref = ref;
req->id = requeue_idx;
--
1.7.3.4
^ permalink raw reply related
* [PATCH 4/4] fix code indent issue in xen-netfront.
From: Annie Li @ 2012-11-15 7:05 UTC (permalink / raw)
To: xen-devel, netdev, konrad.wilk, Ian.Campbell; +Cc: annie.li
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
Signed-off-by: Annie Li <annie.li@oracle.com>
---
drivers/net/xen-netfront.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 17b81c0..66bb29f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -202,7 +202,7 @@ static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
}
static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
- RING_IDX ri)
+ RING_IDX ri)
{
int i = xennet_rxidx(ri);
grant_ref_t ref = np->grant_rx_ref[i];
@@ -1420,7 +1420,7 @@ static void xennet_uninit(struct net_device *dev)
}
static netdev_features_t xennet_fix_features(struct net_device *dev,
- netdev_features_t features)
+ netdev_features_t features)
{
struct netfront_info *np = netdev_priv(dev);
int val;
@@ -1447,7 +1447,7 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
}
static int xennet_set_features(struct net_device *dev,
- netdev_features_t features)
+ netdev_features_t features)
{
if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
netdev_info(dev, "Reducing MTU because no SG offload");
--
1.7.3.4
^ permalink raw reply related
* RE: Latest 3.6.6 are not compiling due tg3 network driver, hwmon_device_unregister
From: Nithin Sujir @ 2012-11-15 7:22 UTC (permalink / raw)
To: David Rientjes, Paul Gortmaker
Cc: Denys Fedoryshchenko, Michael Chan, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <alpine.DEB.2.00.1211141929510.14414@chino.kir.corp.google.com>
> -----Original Message-----
> From: David Rientjes [mailto:rientjes@google.com]
> Sent: Wednesday, November 14, 2012 7:31 PM
> To: Nithin Sujir; Paul Gortmaker
> Cc: Denys Fedoryshchenko; Michael Chan; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: Latest 3.6.6 are not compiling due tg3 network driver,
> hwmon_device_unregister
>
> On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
>
> > This was fixed by
> >
> > commit de0a41484c47d783dd4d442914815076aa2caac2
> > Author: Paul Gortmaker <paul.gortmaker@windriver.com>
> > Date: Mon Oct 1 11:43:49 2012 -0400
> >
> > tg3: unconditionally select HWMON support when tg3 is enabled.
> >
>
> Would you mind submitting this for stable by following the procedure
> described in Documentation/stable_kernel_rules.txt?
Seems to have been taken into stable already.
http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=commitdiff;h=aecdc33e111b2c447b622e287c6003726daa1426
Nithin.
^ permalink raw reply
* Re: [PATCH v7] Network driver for the Armada 370 and Armada XP ARM Marvell SoCs
From: Thomas Petazzoni @ 2012-11-15 7:33 UTC (permalink / raw)
To: David Miller
Cc: romieu, kernel, netdev, linux-arm-kernel, jason, andrew,
gregory.clement, alior, dima
In-Reply-To: <20121114.215922.1242337385782766449.davem@davemloft.net>
David,
On Wed, 14 Nov 2012 21:59:22 -0500 (EST), David Miller wrote:
> From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> Date: Wed, 14 Nov 2012 15:56:44 +0100
>
> > The previous versions of this patch set have been sent on September
> > 4th (v1), October 11th (v2), October 23rd (v3), October 26th (v4),
> > November 12th (v5), November 13th (v6) and now comes the v7 of the
> > driver. The number of comments over the last versions have been really
> > small, and I would really appreciate if this driver could land into
> > the 3.8 kernel release.
>
> I can't apply this patch to net-next, because for one thing there
> are missing dependencies. For example, the file:
>
> arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
>
> doesn't exist there, therefore patch #6 won't apply.
Yes, this is expected: those patches were provided to allow people to
test the driver on the OpenBlocks platform, which is much more widely
available than the Armada XP evaluation board from Marvell.
Would you mind if we take, with your Ack, the network driver through
the arm-soc tree, so that we can carry the related patches modifying
the Device Tree and so on? If not, then I'll send you a pull request
with just the drivers/net changes, and we'll integrate the ARM-related
changes through the arm-soc tree, in which we will have the necessary
dependencies. I'm fine with any of those solutions.
Thanks,
Thomas
--
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply
* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Pasi Kärkkäinen @ 2012-11-15 7:40 UTC (permalink / raw)
To: Annie Li; +Cc: netdev, xen-devel, Ian.Campbell, konrad.wilk
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
Hello,
On Thu, Nov 15, 2012 at 03:03:07PM +0800, Annie Li wrote:
> This patch implements persistent grants for xen-netfront/netback. This
> mechanism maintains page pools in netback/netfront, these page pools is used to
> save grant pages which are mapped. This way improve performance which is wasted
> when doing grant operations.
>
> Current netback/netfront does map/unmap grant operations frequently when
> transmitting/receiving packets, and grant operations costs much cpu clock. In
> this patch, netfront/netback maps grant pages when needed and then saves them
> into a page pool for future use. All these pages will be unmapped when
> removing/releasing the net device.
>
Do you have performance numbers available already? with/without persistent grants?
> In netfront, two pools are maintained for transmitting and receiving packets.
> When new grant pages are needed, the driver gets grant pages from this pool
> first. If no free grant page exists, it allocates new page, maps it and then
> saves it into the pool. The pool size for transmit/receive is exactly tx/rx
> ring size. The driver uses memcpy(not grantcopy) to copy data grant pages.
> Here, memcpy is copying the whole page size data. I tried to copy len size data
> from offset, but network does not seem work well. I am trying to find the root
> cause now.
>
> In netback, it also maintains two page pools for tx/rx. When netback gets a
> request, it does a search first to find out whether the grant reference of
> this request is already mapped into its page pool. If the grant ref is mapped,
> the address of this mapped page is gotten and memcpy is used to copy data
> between grant pages. However, if the grant ref is not mapped, a new page is
> allocated, mapped with this grant ref, and then saved into page pool for
> future use. Similarly, memcpy replaces grant copy to copy data between grant
> pages. In this implementation, two arrays(gnttab_tx_vif,gnttab_rx_vif) are
> used to save vif pointer for every request because current netback is not
> per-vif based. This would be changed after implementing 1:1 model in netback.
>
Btw is xen-netback/xen-netfront multiqueue support something you're planning to implement aswell?
multiqueue allows single vif scaling to multiple vcpus/cores.
Thanks,
-- Pasi
> This patch supports both persistent-grant and non persistent grant. A new
> xenstore key "feature-persistent-grants" is used to represent this feature.
>
> This patch is based on linux3.4-rc3. I hit netperf/netserver failure on
> linux latest version v3.7-rc1, v3.7-rc2 and v3.7-rc4. Not sure whether this
> netperf/netserver failure connects compound page commit in v3.7-rc1, but I did
> hit BUG_ON with debug patch from thread
> http://lists.xen.org/archives/html/xen-devel/2012-10/msg00893.html
>
>
> Annie Li (4):
> xen/netback: implements persistent grant with one page pool.
> xen/netback: Split one page pool into two(tx/rx) page pool.
> Xen/netfront: Implement persistent grant in netfront.
> fix code indent issue in xen-netfront.
>
> drivers/net/xen-netback/common.h | 24 ++-
> drivers/net/xen-netback/interface.c | 26 +++
> drivers/net/xen-netback/netback.c | 215 ++++++++++++++++++--
> drivers/net/xen-netback/xenbus.c | 14 ++-
> drivers/net/xen-netfront.c | 378 +++++++++++++++++++++++++++++------
> 5 files changed, 570 insertions(+), 87 deletions(-)
>
> --
> 1.7.3.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
^ permalink raw reply
* Re: [PATCH v7] Network driver for the Armada 370 and Armada XP ARM Marvell SoCs
From: David Miller @ 2012-11-15 7:42 UTC (permalink / raw)
To: thomas.petazzoni
Cc: romieu, kernel, netdev, linux-arm-kernel, jason, andrew,
gregory.clement, alior, dima
In-Reply-To: <20121115083319.76483256@skate>
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 15 Nov 2012 08:33:19 +0100
> Would you mind if we take, with your Ack, the network driver through
> the arm-soc tree, so that we can carry the related patches modifying
> the Device Tree and so on? If not, then I'll send you a pull request
> with just the drivers/net changes, and we'll integrate the ARM-related
> changes through the arm-soc tree, in which we will have the necessary
> dependencies. I'm fine with any of those solutions.
Sure, no problem, take it via the ARM tree:
Acked-by: David S. Miller <davem@davemloft.net>
^ permalink raw reply
* RE: Latest 3.6.6 are not compiling due tg3 network driver, hwmon_device_unregister
From: David Rientjes @ 2012-11-15 7:51 UTC (permalink / raw)
To: Nithin Sujir
Cc: Paul Gortmaker, Denys Fedoryshchenko, Michael Chan,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <DAAFBCC105999F468732FA900576CB4510069F69@IRVEXCHMB08.corp.ad.broadcom.com>
On Thu, 15 Nov 2012, Nithin Sujir wrote:
> > Would you mind submitting this for stable by following the procedure
> > described in Documentation/stable_kernel_rules.txt?
>
>
> Seems to have been taken into stable already.
> http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=commitdiff;h=aecdc33e111b2c447b622e287c6003726daa1426
>
You're looking at the wrong shortlog (that's 3.7-rc4, not a stable tree),
you'll want to backport this on top of 3.6.6 and send it to
stable@vger.kernel.org following the procedure in
Documentation/stable_kernel_rules.txt.
^ permalink raw reply
* Re: [RFC] tcp: use order-3 pages in tcp_sendmsg()
From: Yan, Zheng @ 2012-11-15 7:52 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1347868144.26523.71.camel@edumazet-glaptop>
On Mon, Sep 17, 2012 at 3:49 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> We currently use per socket page reserve for tcp_sendmsg() operations.
>
> Its done to raise the probability of coalescing small write() in to
> single segments in the skbs.
>
> But it wastes a lot of memory for applications handling a lot of mostly
> idle sockets, since each socket holds one page in sk->sk_sndmsg_page
>
> I did a small experiment to use order-3 pages and it gave me a 10% boost
> of performance, because each TSO skb can use only two frags of 32KB,
> instead of 16 frags of 4KB, so we spend less time in ndo_start_xmit() to
> setup the tx descriptor and TX completion path to unmap the frags and
> free them.
>
> We also spend less time in tcp_sendmsg(), because we call page allocator
> 8x less often.
>
> Now back to the per socket page, what about trying to factorize it ?
>
> Since we can sleep (or/and do a cpu migration) in tcp_sendmsg(), we cant
> really use a percpu page reserve as we do in __netdev_alloc_frag()
>
> We could instead use a per thread reserve, at the cost of adding a test
> in task exit handler.
>
> Recap :
>
> 1) Use a per thread page reserve instead of a per socket one
> 2) Use order-3 pages (or order-0 pages if page size is >= 32768)
>
>
Hi,
This commit makes one of our test case on core 2 machine drop in performance
by about 60%. The test case runs 2048 instances of netperf 64k stream test at
the same time. Analysis showed using order-3 pages causes more LLC misses,
most new LLC misses happen when the senders copy data to the socket buffer.
If revert to use single page, the sender side only trigger a few LLC
misses, most
LLC misses happen on the receiver size. It means most pages allocated by the
senders are cache hot. But when using order-3 pages, 2048 * 32k = 64M, 64M
is much larger than LLC size. Should this regression be worried? or
our test case
is too unpractical?
Regards
Yan, Zheng
^ permalink raw reply
* Re: Latest 3.6.6 are not compiling due tg3 network driver, hwmon_device_unregister
From: Mathias Krause @ 2012-11-15 7:56 UTC (permalink / raw)
To: Nithin Sujir
Cc: David Rientjes, Paul Gortmaker, Denys Fedoryshchenko,
Michael Chan, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, David S. Miller
In-Reply-To: <DAAFBCC105999F468732FA900576CB4510069F69@IRVEXCHMB08.corp.ad.broadcom.com>
On Thu, Nov 15, 2012 at 07:22:20AM +0000, Nithin Sujir wrote:
> > On Wed, 14 Nov 2012, Nithin Nayak Sujir wrote:
> >
> > > This was fixed by
> > >
> > > commit de0a41484c47d783dd4d442914815076aa2caac2
> > > Author: Paul Gortmaker <paul.gortmaker@windriver.com>
> > > Date: Mon Oct 1 11:43:49 2012 -0400
> > >
> > > tg3: unconditionally select HWMON support when tg3 is enabled.
> > >
> >
> > Would you mind submitting this for stable by following the procedure
> > described in Documentation/stable_kernel_rules.txt?
>
>
> Seems to have been taken into stable already.
> http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=commitdiff;h=aecdc33e111b2c447b622e287c6003726daa1426
No, it's not. That's the merge commit for the v3.7 merge window.
Compare this:
$ git log --oneline --grep='tg3.*HWMON' v3.6..origin/master
aecdc33 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
de0a414 tg3: unconditionally select HWMON support when tg3 is enabled.
to this:
$ git log --oneline --grep='tg3.*HWMON' v3.6..stable/linux-3.6.y
No tg3 fix in the v3.6 stable series. But Dave may has queued it already?
Mathias
^ permalink raw reply
* Re: [PATCH v7] Network driver for the Armada 370 and Armada XP ARM Marvell SoCs
From: Thomas Petazzoni @ 2012-11-15 8:02 UTC (permalink / raw)
To: David Miller
Cc: romieu, kernel, netdev, linux-arm-kernel, jason, andrew,
gregory.clement, alior, dima
In-Reply-To: <20121115.024213.1356436436054345008.davem@davemloft.net>
David,
On Thu, 15 Nov 2012 02:42:13 -0500 (EST), David Miller wrote:
> From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> Date: Thu, 15 Nov 2012 08:33:19 +0100
>
> > Would you mind if we take, with your Ack, the network driver through
> > the arm-soc tree, so that we can carry the related patches modifying
> > the Device Tree and so on? If not, then I'll send you a pull request
> > with just the drivers/net changes, and we'll integrate the ARM-related
> > changes through the arm-soc tree, in which we will have the necessary
> > dependencies. I'm fine with any of those solutions.
>
> Sure, no problem, take it via the ARM tree:
>
> Acked-by: David S. Miller <davem@davemloft.net>
Excellent, thanks!
Thomas
--
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply
* Re: [PATCH] ipv6: fix two typos in a comment in xfrm6_init()
From: Steffen Klassert @ 2012-11-15 8:21 UTC (permalink / raw)
To: David Miller; +Cc: roy.qing.li, netdev
In-Reply-To: <20121114.185528.2167305838484743723.davem@davemloft.net>
On Wed, Nov 14, 2012 at 06:55:28PM -0500, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Tue, 13 Nov 2012 10:00:06 +0100
>
> > Subject: [PATCH] xfrm: Fix the gc threshold value for ipv4
> >
> > The xfrm gc threshold value depends on ip_rt_max_size. This
> > value was set to INT_MAX with the routing cache removal patch,
> > so we start doing garbage collecting when we have INT_MAX/2
> > IPsec routes cached. Fix this by going back to the static
> > threshold of 1024 routes.
> >
> > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
>
> This looks fine to me.
I've just applied this to the ipsec tree.
I'll do the same for the ipv6 side. ipv6 does not handle the maximum
number of routes dynamically, so no need to try to handle the IPsec
gc threshold dynamically.
^ permalink raw reply
* [PATCH] sctp: fix /proc/net/sctp/ memory leak
From: Tommi Rantala @ 2012-11-15 8:23 UTC (permalink / raw)
To: netdev
Cc: Neil Horman, Vlad Yasevich, Sridhar Samudrala, David S. Miller,
linux-sctp, Dave Jones, Tommi Rantala
We are using single_open_net() and seq_open_net() in the opener
functions, so avoid leaking memory by using single_release_net() and
seq_release_net() as the struct file_operations release functions.
Discovered with Trinity (the syscall fuzzer).
Signed-off-by: Tommi Rantala <tt.rantala@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
---
net/sctp/proc.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index c3bea26..9966e7b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -102,7 +102,7 @@ static const struct file_operations sctp_snmp_seq_fops = {
.open = sctp_snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
/* Set up the proc fs entry for 'snmp' object. */
@@ -251,7 +251,7 @@ static const struct file_operations sctp_eps_seq_fops = {
.open = sctp_eps_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
/* Set up the proc fs entry for 'eps' object. */
@@ -372,7 +372,7 @@ static const struct file_operations sctp_assocs_seq_fops = {
.open = sctp_assocs_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
/* Set up the proc fs entry for 'assocs' object. */
@@ -517,7 +517,7 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.open = sctp_remaddr_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
int __net_init sctp_remaddr_proc_init(struct net *net)
--
1.7.9.5
^ permalink raw reply related
* Re: [Xen-devel] [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: ANNIE LI @ 2012-11-15 8:38 UTC (permalink / raw)
To: Pasi Kärkkäinen; +Cc: xen-devel, netdev, konrad.wilk, Ian.Campbell
In-Reply-To: <20121115074057.GO8912@reaktio.net>
On 2012-11-15 15:40, Pasi Kärkkäinen wrote:
> Hello,
>
> On Thu, Nov 15, 2012 at 03:03:07PM +0800, Annie Li wrote:
>> This patch implements persistent grants for xen-netfront/netback. This
>> mechanism maintains page pools in netback/netfront, these page pools is used to
>> save grant pages which are mapped. This way improve performance which is wasted
>> when doing grant operations.
>>
>> Current netback/netfront does map/unmap grant operations frequently when
>> transmitting/receiving packets, and grant operations costs much cpu clock. In
>> this patch, netfront/netback maps grant pages when needed and then saves them
>> into a page pool for future use. All these pages will be unmapped when
>> removing/releasing the net device.
>>
> Do you have performance numbers available already? with/without persistent grants?
I have some simple netperf/netserver test result with/without persistent
grants,
Following is result of with persistent grant patch,
Guests, Sum, Avg, Min, Max
1, 15106.4, 15106.4, 15106.36, 15106.36
2, 13052.7, 6526.34, 6261.81, 6790.86
3, 12675.1, 6337.53, 6220.24, 6454.83
4, 13194, 6596.98, 6274.70, 6919.25
Following are result of without persistent patch
Guests, Sum, Avg, Min, Max
1, 10864.1, 10864.1, 10864.10, 10864.10
2, 10898.5, 5449.24, 4862.08, 6036.40
3, 10734.5, 5367.26, 5261.43, 5473.08
4, 10924, 5461.99, 5314.84, 5609.14
>> In netfront, two pools are maintained for transmitting and receiving packets.
>> When new grant pages are needed, the driver gets grant pages from this pool
>> first. If no free grant page exists, it allocates new page, maps it and then
>> saves it into the pool. The pool size for transmit/receive is exactly tx/rx
>> ring size. The driver uses memcpy(not grantcopy) to copy data grant pages.
>> Here, memcpy is copying the whole page size data. I tried to copy len size data
>> from offset, but network does not seem work well. I am trying to find the root
>> cause now.
>>
>> In netback, it also maintains two page pools for tx/rx. When netback gets a
>> request, it does a search first to find out whether the grant reference of
>> this request is already mapped into its page pool. If the grant ref is mapped,
>> the address of this mapped page is gotten and memcpy is used to copy data
>> between grant pages. However, if the grant ref is not mapped, a new page is
>> allocated, mapped with this grant ref, and then saved into page pool for
>> future use. Similarly, memcpy replaces grant copy to copy data between grant
>> pages. In this implementation, two arrays(gnttab_tx_vif,gnttab_rx_vif) are
>> used to save vif pointer for every request because current netback is not
>> per-vif based. This would be changed after implementing 1:1 model in netback.
>>
> Btw is xen-netback/xen-netfront multiqueue support something you're planning to implement aswell?
Currently, some patches exist for implementing 1:1 model in netback, but
this should be different from what you mentioned, and they are not ready
for upstream.
These patches make netback thread per VIF, and mainly implement some
concepts from netchannel2, such as multipage rings, seperate tx and rx
rings, seperate tx and rx event channels, etc.
Thanks
Annie
> multiqueue allows single vif scaling to multiple vcpus/cores.
>
>
> Thanks,
>
> -- Pasi
>
>
>> This patch supports both persistent-grant and non persistent grant. A new
>> xenstore key "feature-persistent-grants" is used to represent this feature.
>>
>> This patch is based on linux3.4-rc3. I hit netperf/netserver failure on
>> linux latest version v3.7-rc1, v3.7-rc2 and v3.7-rc4. Not sure whether this
>> netperf/netserver failure connects compound page commit in v3.7-rc1, but I did
>> hit BUG_ON with debug patch from thread
>> http://lists.xen.org/archives/html/xen-devel/2012-10/msg00893.html
>>
>>
>> Annie Li (4):
>> xen/netback: implements persistent grant with one page pool.
>> xen/netback: Split one page pool into two(tx/rx) page pool.
>> Xen/netfront: Implement persistent grant in netfront.
>> fix code indent issue in xen-netfront.
>>
>> drivers/net/xen-netback/common.h | 24 ++-
>> drivers/net/xen-netback/interface.c | 26 +++
>> drivers/net/xen-netback/netback.c | 215 ++++++++++++++++++--
>> drivers/net/xen-netback/xenbus.c | 14 ++-
>> drivers/net/xen-netfront.c | 378 +++++++++++++++++++++++++++++------
>> 5 files changed, 570 insertions(+), 87 deletions(-)
>>
>> --
>> 1.7.3.4
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
^ permalink raw reply
* [PATCH net-next] tunnel: use the correct endian for some fields
From: Cong Wang @ 2012-11-15 8:51 UTC (permalink / raw)
To: netdev; +Cc: Nicolas Dichtel, David S. Miller, Cong Wang
Fengguang reported:
net/ipv6/ip6_tunnel.c:1571:33: sparse: incorrect type in assignment (different base types)
net/ipv6/ip6_tunnel.c:1571:33: expected restricted __be32 [usertype] flowinfo
net/ipv6/ip6_tunnel.c:1571:33: got unsigned int
for these fields, we need to use the correct endian wrapers.
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 64686e1..54477d8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -864,7 +864,7 @@ static void ipip_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]);
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
if (data[IFLA_IPTUN_REMOTE])
parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ab4d056..bf3a549 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1568,7 +1568,7 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
if (data[IFLA_IPTUN_FLOWINFO])
- parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]);
+ parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
if (data[IFLA_IPTUN_FLAGS])
parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7bd2a06..e137750 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1240,7 +1240,7 @@ static void ipip6_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]);
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
if (data[IFLA_IPTUN_REMOTE])
parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]);
@@ -1337,7 +1337,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
- nla_put_u16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
goto nla_put_failure;
return 0;
^ permalink raw reply related
* Re: [Xen-devel] [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Ian Campbell @ 2012-11-15 8:51 UTC (permalink / raw)
To: ANNIE LI
Cc: Pasi Kärkkäinen, xen-devel@lists.xensource.com,
netdev@vger.kernel.org, konrad.wilk@oracle.com
In-Reply-To: <50A4AA06.8080900@oracle.com>
On Thu, 2012-11-15 at 08:38 +0000, ANNIE LI wrote:
>
> On 2012-11-15 15:40, Pasi Kärkkäinen wrote:
> > Hello,
> >
> > On Thu, Nov 15, 2012 at 03:03:07PM +0800, Annie Li wrote:
> >> This patch implements persistent grants for xen-netfront/netback. This
> >> mechanism maintains page pools in netback/netfront, these page pools is used to
> >> save grant pages which are mapped. This way improve performance which is wasted
> >> when doing grant operations.
> >>
> >> Current netback/netfront does map/unmap grant operations frequently when
> >> transmitting/receiving packets, and grant operations costs much cpu clock. In
> >> this patch, netfront/netback maps grant pages when needed and then saves them
> >> into a page pool for future use. All these pages will be unmapped when
> >> removing/releasing the net device.
> >>
> > Do you have performance numbers available already? with/without persistent grants?
> I have some simple netperf/netserver test result with/without persistent
> grants,
>
> Following is result of with persistent grant patch,
> Guests, Sum, Avg, Min, Max
> 1, 15106.4, 15106.4, 15106.36, 15106.36
> 2, 13052.7, 6526.34, 6261.81, 6790.86
> 3, 12675.1, 6337.53, 6220.24, 6454.83
> 4, 13194, 6596.98, 6274.70, 6919.25
Are these pairs of guests or individual ones?
I think the really interesting cases are when you get up to larger
numbers of guests, aren't they? ISTR that for blkio things got most
interesting WRT persistent grants at the dozens of guests stage. Do you
have any numbers for those?
Have you run any tests other than netperf?
Do you have numbers for a a persistent capable backend with a
non-persistent frontend and vice versa?
>
>
> Following are result of without persistent patch
>
> Guests, Sum, Avg, Min, Max
> 1, 10864.1, 10864.1, 10864.10, 10864.10
> 2, 10898.5, 5449.24, 4862.08, 6036.40
> 3, 10734.5, 5367.26, 5261.43, 5473.08
> 4, 10924, 5461.99, 5314.84, 5609.14
^ permalink raw reply
* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Ian Campbell @ 2012-11-15 8:53 UTC (permalink / raw)
To: Annie Li
Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
konrad.wilk@oracle.com
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
On Thu, 2012-11-15 at 07:03 +0000, Annie Li wrote:
>
> This patch is based on linux3.4-rc3. I hit netperf/netserver failure
> on linux latest version v3.7-rc1, v3.7-rc2 and v3.7-rc4. Not sure
> whether thisnetperf/netserver failure connects compound page commit in
> v3.7-rc1, but I did hit BUG_ON with debug patch from thread
> http://lists.xen.org/archives/html/xen-devel/2012-10/msg00893.html
Do you think you could cook up a netfront fix similar in principal to
the 6a8ed462f16b8455eec5ae00eb6014159a6721f0 fix for netback?
Ian.
^ permalink raw reply
* [PATCH net-next 1/3] ipip: fix sparse warnings in ipip_netlink_parms()
From: Nicolas Dichtel @ 2012-11-15 8:53 UTC (permalink / raw)
To: netdev; +Cc: davem, Nicolas Dichtel
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
net/ipv4/ipip.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 64686e1..c26c171 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -864,10 +864,10 @@ static void ipip_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]);
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
if (data[IFLA_IPTUN_REMOTE])
- parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]);
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
if (data[IFLA_IPTUN_TTL]) {
parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
--
1.7.12
^ permalink raw reply related
* [PATCH net-next 2/3] sit: fix sparse warnings
From: Nicolas Dichtel @ 2012-11-15 8:53 UTC (permalink / raw)
To: netdev; +Cc: davem, Nicolas Dichtel
In-Reply-To: <1352969616-30476-1-git-send-email-nicolas.dichtel@6wind.com>
Note that i_flags is defined as be16, but is used here like an u16. The test
in ipip6_tunnel_create() was just moved into this function.
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
net/ipv6/sit.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7bd2a06..ca6c2c8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -228,7 +228,7 @@ static int ipip6_tunnel_create(struct net_device *dev)
goto out;
ipip6_tunnel_clone_6rd(dev, sitn);
- if (t->parms.i_flags & SIT_ISATAP)
+ if ((__force u16)t->parms.i_flags & SIT_ISATAP)
dev->priv_flags |= IFF_ISATAP;
err = register_netdevice(dev);
@@ -1240,10 +1240,10 @@ static void ipip6_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]);
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
if (data[IFLA_IPTUN_REMOTE])
- parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]);
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
if (data[IFLA_IPTUN_TTL]) {
parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
@@ -1258,7 +1258,7 @@ static void ipip6_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF);
if (data[IFLA_IPTUN_FLAGS])
- parms->i_flags = nla_get_u16(data[IFLA_IPTUN_FLAGS]);
+ parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
}
static int ipip6_newlink(struct net *src_net, struct net_device *dev,
@@ -1337,7 +1337,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
- nla_put_u16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
goto nla_put_failure;
return 0;
--
1.7.12
^ permalink raw reply related
* [PATCH net-next 3/3] ip6tnl: fix sparse warnings in ip6_tnl_netlink_parms()
From: Nicolas Dichtel @ 2012-11-15 8:53 UTC (permalink / raw)
To: netdev; +Cc: davem, Nicolas Dichtel
In-Reply-To: <1352969616-30476-1-git-send-email-nicolas.dichtel@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
net/ipv6/ip6_tunnel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ab4d056..bf3a549 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1568,7 +1568,7 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
if (data[IFLA_IPTUN_FLOWINFO])
- parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]);
+ parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
if (data[IFLA_IPTUN_FLAGS])
parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
--
1.7.12
^ permalink raw reply related
* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Ian Campbell @ 2012-11-15 8:56 UTC (permalink / raw)
To: Annie Li
Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
konrad.wilk@oracle.com
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>
On Thu, 2012-11-15 at 07:03 +0000, Annie Li wrote:
>
> This patch implements persistent grants for xen-netfront/netback. This
> mechanism maintains page pools in netback/netfront, these page pools
> is used to save grant pages which are mapped. This way improve
> performance which is wasted when doing grant operations.
Please can you send a patch against xen-unstable.hg to document this new
protocol variant in xen/include/public/io/netif.h. This header is a bit
under-documented but lets not let it fall further behind (if you want to
go further and document the existing features, in the style of the
current blkif.h, then that would be awesome!).
You may also want to provide a similar patch to Linux's copy which is in
linux/include/xen/interface/io/netif.h
Ian.
^ permalink raw reply
* Re: [PATCH net-next] tunnel: use the correct endian for some fields
From: Nicolas Dichtel @ 2012-11-15 8:57 UTC (permalink / raw)
To: Cong Wang; +Cc: netdev, David S. Miller
In-Reply-To: <1352969482-29836-1-git-send-email-amwang@redhat.com>
Le 15/11/2012 09:51, Cong Wang a écrit :
> Fengguang reported:
>
> net/ipv6/ip6_tunnel.c:1571:33: sparse: incorrect type in assignment (different base types)
> net/ipv6/ip6_tunnel.c:1571:33: expected restricted __be32 [usertype] flowinfo
> net/ipv6/ip6_tunnel.c:1571:33: got unsigned int
>
> for these fields, we need to use the correct endian wrapers.
>
> Reported-by: Fengguang Wu <fengguang.wu@intel.com>
> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <amwang@redhat.com>
Just one minute before my patch.
Your patch does not fix all warnings (i_flags & SIT_ISATAP in ip6_tunnel.c).
^ permalink raw reply
* Re: [Xen-devel] [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: ANNIE LI @ 2012-11-15 9:02 UTC (permalink / raw)
To: Ian Campbell
Cc: Pasi Kärkkäinen, xen-devel@lists.xensource.com,
netdev@vger.kernel.org, konrad.wilk@oracle.com
In-Reply-To: <1352969519.3499.30.camel@zakaz.uk.xensource.com>
On 2012-11-15 16:51, Ian Campbell wrote:
> On Thu, 2012-11-15 at 08:38 +0000, ANNIE LI wrote:
>> On 2012-11-15 15:40, Pasi Kärkkäinen wrote:
>>> Hello,
>>>
>>> On Thu, Nov 15, 2012 at 03:03:07PM +0800, Annie Li wrote:
>>>> This patch implements persistent grants for xen-netfront/netback. This
>>>> mechanism maintains page pools in netback/netfront, these page pools is used to
>>>> save grant pages which are mapped. This way improve performance which is wasted
>>>> when doing grant operations.
>>>>
>>>> Current netback/netfront does map/unmap grant operations frequently when
>>>> transmitting/receiving packets, and grant operations costs much cpu clock. In
>>>> this patch, netfront/netback maps grant pages when needed and then saves them
>>>> into a page pool for future use. All these pages will be unmapped when
>>>> removing/releasing the net device.
>>>>
>>> Do you have performance numbers available already? with/without persistent grants?
>> I have some simple netperf/netserver test result with/without persistent
>> grants,
>>
>> Following is result of with persistent grant patch,
>> Guests, Sum, Avg, Min, Max
>> 1, 15106.4, 15106.4, 15106.36, 15106.36
>> 2, 13052.7, 6526.34, 6261.81, 6790.86
>> 3, 12675.1, 6337.53, 6220.24, 6454.83
>> 4, 13194, 6596.98, 6274.70, 6919.25
> Are these pairs of guests or individual ones?
They are pairs of guests.
>
> I think the really interesting cases are when you get up to larger
> numbers of guests, aren't they?
Right.
> ISTR that for blkio things got most
> interesting WRT persistent grants at the dozens of guests stage. Do you
> have any numbers for those?
No, I will run more test with more gusets.
>
> Have you run any tests other than netperf?
No, I didn't.
>
> Do you have numbers for a a persistent capable backend with a
> non-persistent frontend and vice versa?
I did it, but the test only runs with 4 guests too,will test with more
guests.
Thanks
Annie
>
>
>>
>> Following are result of without persistent patch
>>
>> Guests, Sum, Avg, Min, Max
>> 1, 10864.1, 10864.1, 10864.10, 10864.10
>> 2, 10898.5, 5449.24, 4862.08, 6036.40
>> 3, 10734.5, 5367.26, 5261.43, 5473.08
>> 4, 10924, 5461.99, 5314.84, 5609.14
>
^ permalink raw reply
* Re: [PATCH net-next] tunnel: use the correct endian for some fields
From: Cong Wang @ 2012-11-15 9:04 UTC (permalink / raw)
To: nicolas.dichtel; +Cc: netdev, David S. Miller
In-Reply-To: <50A4AE82.3040602@6wind.com>
On Thu, 2012-11-15 at 09:57 +0100, Nicolas Dichtel wrote:
> Le 15/11/2012 09:51, Cong Wang a écrit :
> > Fengguang reported:
> >
> > net/ipv6/ip6_tunnel.c:1571:33: sparse: incorrect type in assignment (different base types)
> > net/ipv6/ip6_tunnel.c:1571:33: expected restricted __be32 [usertype] flowinfo
> > net/ipv6/ip6_tunnel.c:1571:33: got unsigned int
> >
> > for these fields, we need to use the correct endian wrapers.
> >
> > Reported-by: Fengguang Wu <fengguang.wu@intel.com>
> > Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> > Cc: David S. Miller <davem@davemloft.net>
> > Signed-off-by: Cong Wang <amwang@redhat.com>
> Just one minute before my patch.
> Your patch does not fix all warnings (i_flags & SIT_ISATAP in ip6_tunnel.c).
Yeah, then ignore this patch. :)
^ permalink raw reply
* Re: [PATCH 1/4] xen/netback: implements persistent grant with one page pool.
From: Ian Campbell @ 2012-11-15 9:10 UTC (permalink / raw)
To: Annie Li
Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
konrad.wilk@oracle.com
In-Reply-To: <1352963066-570-1-git-send-email-annie.li@oracle.com>
On Thu, 2012-11-15 at 07:04 +0000, Annie Li wrote:
> This patch implements persistent grant in netback driver. Tx and rx
> share the same page pool, this pool will be split into two parts
> in next patch.
>
> Signed-off-by: Annie Li <annie.li@oracle.com>
> ---
> drivers/net/xen-netback/common.h | 18 +++-
> drivers/net/xen-netback/interface.c | 22 ++++
> drivers/net/xen-netback/netback.c | 212 +++++++++++++++++++++++++++++++----
> drivers/net/xen-netback/xenbus.c | 14 ++-
> 4 files changed, 239 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index 94b79c3..a85cac6 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -45,8 +45,19 @@
> #include <xen/grant_table.h>
> #include <xen/xenbus.h>
>
> +#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> +#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
BLOCK?
> + (XEN_NETIF_TX_RING_SIZE + XEN_NETIF_RX_RING_SIZE)
> +
> struct xen_netbk;
>
> +struct persistent_entry {
> + grant_ref_t forgranted;
> + struct page *fpage;
> + struct gnttab_map_grant_ref map;
> +};
Isn't this duplicating a bunch of infrastructure which is also in
blkback? Can we put it into some common helpers please?
> +
> struct xenvif {
> /* Unique identifier for this interface. */
> domid_t domid;
> @@ -75,6 +86,7 @@ struct xenvif {
>
> /* Internal feature information. */
> u8 can_queue:1; /* can queue packets for receiver? */
> + u8 persistent_grant:1;
>
> /*
> * Allow xenvif_start_xmit() to peek ahead in the rx request
> @@ -98,6 +110,9 @@ struct xenvif {
> struct net_device *dev;
>
> wait_queue_head_t waiting_to_free;
> +
> + struct persistent_entry *persistent_gnt[MAXIMUM_OUTSTANDING_BLOCK_REQS];
What is the per-vif memory overhead of this array?
> +static struct persistent_entry*
> +get_per_gnt(struct persistent_entry **pers_entry,
> + unsigned int count, grant_ref_t gref)
> +{
> + int i;
> +
> + for (i = 0; i < count; i++)
> + if (gref == pers_entry[i]->forgranted)
> + return pers_entry[i];
Isn't this linear scan rather expensive? I think Roger implemented some
sort of hash lookup for blkback which I think is required here too (and
should be easy if you make that code common).
> +
> + return NULL;
> +}
> +
> @@ -1338,7 +1497,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> gop->source.domid = vif->domid;
> gop->source.offset = txreq.offset;
>
> - gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + if (!vif->persistent_grant)
> + gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + else
> + gop->dest.u.gmfn = (unsigned long)page_address(page);
page_address doesn't return any sort of frame number, does it? This is
rather confusing...
> @@ -453,7 +460,12 @@ static int connect_rings(struct backend_info *be)
> val = 0;
> vif->csum = !val;
>
> - /* Map the shared frame, irq etc. */
> + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-persistent-grants",
> + "%u", &val) < 0)
> + val = 0;
> + vif->persistent_grant = !!val;
> +
> +/* Map the shared frame, irq etc. */
Please run the patches through checkpatch.pl
> err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
> if (err) {
> xenbus_dev_fatal(dev, err,
> --
> 1.7.3.4
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox