netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: cxgb3: Replace LRO with GRO
@ 2009-01-15 21:14 Divy Le Ray
  2009-01-15 23:58 ` Herbert Xu
  2009-01-20  1:03 ` David Miller
  0 siblings, 2 replies; 23+ messages in thread
From: Divy Le Ray @ 2009-01-15 21:14 UTC (permalink / raw)
  To: herbert, davem; +Cc: netdev, swise


Hi Herbert,

Your patch looks fine. Early testing provides the same perfs
seen with LRO.

I resubmit the original patch with 2 changes:
- cxgb3_set_flags() removal did not apply.
  commit 47fd23fe8efeea3af4593a8424419df48724eb25 had updated the routine.
- replace the NETIF_F_LRO features flag with NETIF_F_GRO
 
cxgb3: Replace LRO with GRO

This patch makes cxgb3 invoke the GRO hooks instead of LRO.  As
GRO has a compatible external interface to LRO this is a very
straightforward replacement.

I've kept the ioctl controls for per-queue LRO switches.  However,
we should not encourage anyone to use these.

Because of that, I've also kept the skb construction code in
cxgb3.  Hopefully we can phase out those per-queue switches
and then kill this too.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Divy Le Ray <divy@chelsio.com>

---

 drivers/net/Kconfig            |    1 
 drivers/net/cxgb3/adapter.h    |   13 +---
 drivers/net/cxgb3/cxgb3_main.c |   42 +-------------
 drivers/net/cxgb3/sge.c        |  118 +++++-----------------------------------
 4 files changed, 22 insertions(+), 152 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9fe8cb7..9212289 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2408,7 +2408,6 @@ config CHELSIO_T3
 	tristate "Chelsio Communications T3 10Gb Ethernet support"
 	depends on CHELSIO_T3_DEPENDS
 	select FW_LOADER
-	select INET_LRO
 	help
 	  This driver supports Chelsio T3-based gigabit and 10Gb Ethernet
 	  adapters.
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index a89d8cc..f9d39c6 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -42,7 +42,6 @@
 #include <linux/cache.h>
 #include <linux/mutex.h>
 #include <linux/bitops.h>
-#include <linux/inet_lro.h>
 #include "t3cdev.h"
 #include <asm/io.h>
 
@@ -178,15 +177,11 @@ enum {				/* per port SGE statistics */
 	SGE_PSTAT_TX_CSUM,	/* # of TX checksum offloads */
 	SGE_PSTAT_VLANEX,	/* # of VLAN tag extractions */
 	SGE_PSTAT_VLANINS,	/* # of VLAN tag insertions */
-	SGE_PSTAT_LRO_AGGR,	/* # of page chunks added to LRO sessions */
-	SGE_PSTAT_LRO_FLUSHED,	/* # of flushed LRO sessions */
-	SGE_PSTAT_LRO_NO_DESC,	/* # of overflown LRO sessions */
 
 	SGE_PSTAT_MAX		/* must be last */
 };
 
-#define T3_MAX_LRO_SES 8
-#define T3_MAX_LRO_MAX_PKTS 64
+struct napi_gro_fraginfo;
 
 struct sge_qset {		/* an SGE queue set */
 	struct adapter *adap;
@@ -194,12 +189,8 @@ struct sge_qset {		/* an SGE queue set */
 	struct sge_rspq rspq;
 	struct sge_fl fl[SGE_RXQ_PER_SET];
 	struct sge_txq txq[SGE_TXQ_PER_SET];
-	struct net_lro_mgr lro_mgr;
-	struct net_lro_desc lro_desc[T3_MAX_LRO_SES];
-	struct skb_frag_struct *lro_frag_tbl;
-	int lro_nfrags;
+	struct napi_gro_fraginfo lro_frag_tbl;
 	int lro_enabled;
-	int lro_frag_len;
 	void *lro_va;
 	struct net_device *netdev;
 	struct netdev_queue *tx_q;	/* associated netdev TX queue */
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 0089746..f59b9e3 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -508,19 +508,9 @@ static void set_qset_lro(struct net_device *dev, int qset_idx, int val)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
-	int i, lro_on = 1;
 
 	adapter->params.sge.qset[qset_idx].lro = !!val;
 	adapter->sge.qs[qset_idx].lro_enabled = !!val;
-
-	/* let ethtool report LRO on only if all queues are LRO enabled */
-	for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; ++i)
-		lro_on &= adapter->params.sge.qset[i].lro;
-
-	if (lro_on)
-		dev->features |= NETIF_F_LRO;
-	else
-		dev->features &= ~NETIF_F_LRO;
 }
 
 /**
@@ -1433,9 +1423,9 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_VLANINS);
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
-	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_AGGR);
-	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_FLUSHED);
-	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_NO_DESC);
+	*data++ = 0;
+	*data++ = 0;
+	*data++ = 0;
 	*data++ = s->rx_cong_drops;
 
 	*data++ = s->num_toggled;
@@ -1826,28 +1816,6 @@ static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	memset(&wol->sopass, 0, sizeof(wol->sopass));
 }
 
-static int cxgb3_set_flags(struct net_device *dev, u32 data)
-{
-	struct port_info *pi = netdev_priv(dev);
-	int i;
-
-	if (data & ETH_FLAG_LRO) {
-		if (!(pi->rx_offload & T3_RX_CSUM))
-			return -EINVAL;
-
-		pi->rx_offload |= T3_LRO;
-		for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
-			set_qset_lro(dev, i, 1);
-
-	} else {
-		pi->rx_offload &= ~T3_LRO;
-		for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
-			set_qset_lro(dev, i, 0);
-	}
-
-	return 0;
-}
-
 static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_settings = get_settings,
 	.set_settings = set_settings,
@@ -1877,8 +1845,6 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_regs = get_regs,
 	.get_wol = get_wol,
 	.set_tso = ethtool_op_set_tso,
-	.get_flags = ethtool_op_get_flags,
-	.set_flags = cxgb3_set_flags,
 };
 
 static int in_range(int val, int lo, int hi)
@@ -2960,7 +2926,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 		netdev->mem_end = mmio_start + mmio_len - 1;
 		netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 		netdev->features |= NETIF_F_LLTX;
-		netdev->features |= NETIF_F_LRO;
+		netdev->features |= NETIF_F_GRO;
 		if (pci_using_dac)
 			netdev->features |= NETIF_F_HIGHDMA;
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 14f9fb3..8299fb5 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -585,8 +585,7 @@ static void t3_reset_qset(struct sge_qset *q)
 	memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 	q->txq_stopped = 0;
 	q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
-	kfree(q->lro_frag_tbl);
-	q->lro_nfrags = q->lro_frag_len = 0;
+	q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
 }
 
 
@@ -1945,10 +1944,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 		qs->port_stats[SGE_PSTAT_VLANEX]++;
 		if (likely(grp))
 			if (lro)
-				lro_vlan_hwaccel_receive_skb(&qs->lro_mgr, skb,
-							     grp,
-							     ntohs(p->vlan),
-							     p);
+				vlan_gro_receive(&qs->napi, grp,
+						 ntohs(p->vlan), skb);
 			else {
 				if (unlikely(pi->iscsi_ipv4addr &&
 				    is_arp(skb))) {
@@ -1965,7 +1962,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 			dev_kfree_skb_any(skb);
 	} else if (rq->polling) {
 		if (lro)
-			lro_receive_skb(&qs->lro_mgr, skb, p);
+			napi_gro_receive(&qs->napi, skb);
 		else {
 			if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
 				cxgb3_arp_process(adap, skb);
@@ -1981,59 +1978,6 @@ static inline int is_eth_tcp(u32 rss)
 }
 
 /**
- *	lro_frame_ok - check if an ingress packet is eligible for LRO
- *	@p: the CPL header of the packet
- *
- *	Returns true if a received packet is eligible for LRO.
- *	The following conditions must be true:
- *	- packet is TCP/IP Ethernet II (checked elsewhere)
- *	- not an IP fragment
- *	- no IP options
- *	- TCP/IP checksums are correct
- *	- the packet is for this host
- */
-static inline int lro_frame_ok(const struct cpl_rx_pkt *p)
-{
-	const struct ethhdr *eh = (struct ethhdr *)(p + 1);
-	const struct iphdr *ih = (struct iphdr *)(eh + 1);
-
-	return (*((u8 *)p + 1) & 0x90) == 0x10 && p->csum == htons(0xffff) &&
-		eh->h_proto == htons(ETH_P_IP) && ih->ihl == (sizeof(*ih) >> 2);
-}
-
-static int t3_get_lro_header(void **eh,  void **iph, void **tcph,
-			     u64 *hdr_flags, void *priv)
-{
-	const struct cpl_rx_pkt *cpl = priv;
-
-	if (!lro_frame_ok(cpl))
-		return -1;
-
-	*eh = (struct ethhdr *)(cpl + 1);
-	*iph = (struct iphdr *)((struct ethhdr *)*eh + 1);
-	*tcph = (struct tcphdr *)((struct iphdr *)*iph + 1);
-
-	*hdr_flags = LRO_IPV4 | LRO_TCP;
-	return 0;
-}
-
-static int t3_get_skb_header(struct sk_buff *skb,
-			      void **iph, void **tcph, u64 *hdr_flags,
-			      void *priv)
-{
-	void *eh;
-
-	return t3_get_lro_header(&eh, iph, tcph, hdr_flags, priv);
-}
-
-static int t3_get_frag_header(struct skb_frag_struct *frag, void **eh,
-			      void **iph, void **tcph, u64 *hdr_flags,
-			      void *priv)
-{
-	return t3_get_lro_header(eh, iph, tcph, hdr_flags, priv);
-}
-
-/**
  *	lro_add_page - add a page chunk to an LRO session
  *	@adap: the adapter
  *	@qs: the associated queue set
@@ -2049,8 +1993,9 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 {
 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 	struct cpl_rx_pkt *cpl;
-	struct skb_frag_struct *rx_frag = qs->lro_frag_tbl;
-	int nr_frags = qs->lro_nfrags, frag_len = qs->lro_frag_len;
+	struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
+	int nr_frags = qs->lro_frag_tbl.nr_frags;
+	int frag_len = qs->lro_frag_tbl.len;
 	int offset = 0;
 
 	if (!nr_frags) {
@@ -2069,13 +2014,13 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 	rx_frag->page_offset = sd->pg_chunk.offset + offset;
 	rx_frag->size = len;
 	frag_len += len;
-	qs->lro_nfrags++;
-	qs->lro_frag_len = frag_len;
+	qs->lro_frag_tbl.nr_frags++;
+	qs->lro_frag_tbl.len = frag_len;
 
 	if (!complete)
 		return;
 
-	qs->lro_nfrags = qs->lro_frag_len = 0;
+	qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
 	cpl = qs->lro_va;
 
 	if (unlikely(cpl->vlan_valid)) {
@@ -2084,35 +2029,15 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 		struct vlan_group *grp = pi->vlan_grp;
 
 		if (likely(grp != NULL)) {
-			lro_vlan_hwaccel_receive_frags(&qs->lro_mgr,
-						       qs->lro_frag_tbl,
-						       frag_len, frag_len,
-						       grp, ntohs(cpl->vlan),
-						       cpl, 0);
-			return;
+			vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
+				       &qs->lro_frag_tbl);
+			goto out;
 		}
 	}
-	lro_receive_frags(&qs->lro_mgr, qs->lro_frag_tbl,
-			  frag_len, frag_len, cpl, 0);
-}
+	napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
 
-/**
- *	init_lro_mgr - initialize a LRO manager object
- *	@lro_mgr: the LRO manager object
- */
-static void init_lro_mgr(struct sge_qset *qs, struct net_lro_mgr *lro_mgr)
-{
-	lro_mgr->dev = qs->netdev;
-	lro_mgr->features = LRO_F_NAPI;
-	lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
-	lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
-	lro_mgr->max_desc = T3_MAX_LRO_SES;
-	lro_mgr->lro_arr = qs->lro_desc;
-	lro_mgr->get_frag_header = t3_get_frag_header;
-	lro_mgr->get_skb_header = t3_get_skb_header;
-	lro_mgr->max_aggr = T3_MAX_LRO_MAX_PKTS;
-	if (lro_mgr->max_aggr > MAX_SKB_FRAGS)
-		lro_mgr->max_aggr = MAX_SKB_FRAGS;
+out:
+	qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
 }
 
 /**
@@ -2356,10 +2281,6 @@ next_fl:
 	}
 
 	deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
-	lro_flush_all(&qs->lro_mgr);
-	qs->port_stats[SGE_PSTAT_LRO_AGGR] = qs->lro_mgr.stats.aggregated;
-	qs->port_stats[SGE_PSTAT_LRO_FLUSHED] = qs->lro_mgr.stats.flushed;
-	qs->port_stats[SGE_PSTAT_LRO_NO_DESC] = qs->lro_mgr.stats.no_desc;
 
 	if (sleeping)
 		check_ring_db(adap, qs, sleeping);
@@ -2906,7 +2827,6 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 {
 	int i, avail, ret = -ENOMEM;
 	struct sge_qset *q = &adapter->sge.qs[id];
-	struct net_lro_mgr *lro_mgr = &q->lro_mgr;
 
 	init_qset_cntxt(q, id);
 	setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
@@ -2986,10 +2906,6 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	q->fl[0].order = FL0_PG_ORDER;
 	q->fl[1].order = FL1_PG_ORDER;
 
-	q->lro_frag_tbl = kcalloc(MAX_FRAME_SIZE / FL1_PG_CHUNK_SIZE + 1,
-				  sizeof(struct skb_frag_struct),
-				  GFP_KERNEL);
-	q->lro_nfrags = q->lro_frag_len = 0;
 	spin_lock_irq(&adapter->sge.reg_lock);
 
 	/* FL threshold comparison uses < */
@@ -3041,8 +2957,6 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	q->tx_q = netdevq;
 	t3_update_qset_coalesce(q, p);
 
-	init_lro_mgr(q, lro_mgr);
-
 	avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
 			  GFP_KERNEL | __GFP_COMP);
 	if (!avail) {


^ permalink raw reply related	[flat|nested] 23+ messages in thread
* Re: cxgb3: Replace LRO with GRO
@ 2009-01-20 10:14 Divy Le Ray
  2009-01-21  8:29 ` Herbert Xu
  0 siblings, 1 reply; 23+ messages in thread
From: Divy Le Ray @ 2009-01-20 10:14 UTC (permalink / raw)
  To: herbert; +Cc: netdev


Hi Herbert,

I have tried the following patch as an attempt to eliminate the memcpy
seen on the previous oprofile. I'm now getting about 5.5 Gbs.
After that, I went through the output of opreport -d to figure out
the most expensive ops witnessed in my profiling.

Here is the patch:

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2554,6 +2554,8 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 	struct net_device *dev = napi->dev;
 	struct sk_buff *skb = napi->skb;
 	struct ethhdr *eth;
+	skb_frag_t *frag;
+	int i;

 	napi->skb = NULL;

@@ -2566,9 +2568,15 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 	}

 	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
-	skb_shinfo(skb)->nr_frags = info->nr_frags;
-	memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
+	frag = &info->frags[info->nr_frags - 1];

+	for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
+		skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
+				   frag->size);
+		frag++;
+	}
+	skb_shinfo(skb)->nr_frags = info->nr_frags;
+
 	skb->data_len = info->len;
 	skb->len += info->len;
 	skb->truesize += info->len;

Here is the non detailed opreport output for the CPU managing the reception
of netperf traffic:

      38.815300  copy_user_generic_unrolled          vmlinux
       6.373900  process_responses                  cxgb3.ko
       4.957800  inet_gro_receive                    vmlinux
       4.908800  put_page                            vmlinux
       4.862100  refill_fl                          cxgb3.ko
       3.774900  dev_gro_receive                     vmlinux
       3.096000  tcp_gro_receive                     vmlinux
       2.764700  napi_fraginfo_skb                   vmlinux
       2.174400  free_hot_cold_page                  vmlinux
       2.006400  skb_copy_datagram_iovec             vmlinux
       1.511800  tcp_recvmsg                         vmlinux
       1.488500  get_page_from_freelist              vmlinux
       1.455800  irq_entries_start                   vmlinux
       1.453500  skb_gro_header                      vmlinux
       0.877200  get_pageblock_flags_group           vmlinux
       0.863200  memcpy_toiovec                      vmlinux
       0.856200  _raw_spin_lock                      vmlinux
       0.720900  memcpy                              vmlinux
       0.711600  skb_gro_receive                     vmlinux
       0.683600  kfree                               vmlinux

Here is a list of more detailed info sorted per GRO function as seen above:
- Relative % for the most expensive instructions
- gdb dissass'output for these instructions
- gdb list's output.

inet_gro_receive 4.9578 ffffffff805468c0
  ffffffff80546a49 11.1059%
    0xffffffff80546a49 <inet_gro_receive+393>:      jne    0xffffffff805469e5 <inet_gro_receive+293>
    0xffffffff80546a49 is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1285).
      1280                    if (!NAPI_GRO_CB(p)->same_flow)
      1281                            continue;
      1282
      1283                    iph2 = ip_hdr(p);
      1284
      1285                    if (iph->protocol != iph2->protocol ||
      1286                        iph->tos != iph2->tos ||
      1287                        memcmp(&iph->saddr, &iph2->saddr, 8)) {
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;

  ffffffff80546a61 10.4000%
    0xffffffff80546a61 <inet_gro_receive+417>:      je     0xffffffff80546abb <inet_gro_receive+507>
    0xffffffff80546a61 is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1293).
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;
      1290                    }
      1291
      1292                    /* All fields must match except length and checksum. */
      1293                    NAPI_GRO_CB(p)->flush |=
      1294                            memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
      1295                            (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;

  ffffffff80546a58 8.2353%
    0xffffffff80546a58 <inet_gro_receive+408>:      mov    %rdx,%rcx
    0xffffffff80546a58 is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1293).
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;
      1290                    }
      1291
      1292                    /* All fields must match except length and checksum. */
      1293                    NAPI_GRO_CB(p)->flush |=
      1294                            memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
      1295                            (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;
      1296
      1297                    NAPI_GRO_CB(p)->flush |= flush;

  ffffffff80546abb 8.2353%
    0xffffffff80546abb <inet_gro_receive+507>:      movzwl 0x4(%r10),%eax
      (gdb) list *(0xffffffff80546abb)
      0xffffffff80546abb is in inet_gro_receive (/mnt/net-2.6/include/linux/swab.h:51).
      46      static inline __attribute_const__ __u16 __fswab16(__u16 val)
      47      {
      48      #ifdef __arch_swab16
      49              return __arch_swab16(val);
      50      #else
      51              return ___constant_swab16(val);
      52      #endif
      53      }
      54
      55      static inline __attribute_const__ __u32 __fswab32(__u32 val)

  ffffffff80546a4b 8.1882%
    0xffffffff80546a4b is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1293).
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;
      1290                    }
      1291
      1292                    /* All fields must match except length and checksum. */
      1293                    NAPI_GRO_CB(p)->flush |=
      1294                            memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
      1295                            (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;
      1296
      1297                    NAPI_GRO_CB(p)->flush |= flush;

  ffffffff80546a47 7.5765%
    0xffffffff80546a47 <inet_gro_receive+391>:      repz cmpsb %es:(%rdi),%ds:(%rsi)
    0xffffffff80546a47 is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1285).
      1280                    if (!NAPI_GRO_CB(p)->same_flow)
      1281                            continue;
      1282
      1283                    iph2 = ip_hdr(p);
      1284
      1285                    if (iph->protocol != iph2->protocol ||
      1286                        iph->tos != iph2->tos ||
      1287                        memcmp(&iph->saddr, &iph2->saddr, 8)) {
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;

  ffffffff80546a44 7.1529%
    0xffffffff80546a44 is in inet_gro_receive (/mnt/net-2.6/net/ipv4/af_inet.c:1285).
      1280                    if (!NAPI_GRO_CB(p)->same_flow)
      1281                            continue;
      1282
      1283                    iph2 = ip_hdr(p);
      1284
      1285                    if (iph->protocol != iph2->protocol ||
      1286                        iph->tos != iph2->tos ||
      1287                        memcmp(&iph->saddr, &iph2->saddr, 8)) {
      1288                            NAPI_GRO_CB(p)->same_flow = 0;
      1289                            continue;


dev_gro_receive 3.7749 ffffffff805024b0
  ffffffff805026a2 18.7268%
    0xffffffff805026a2 <dev_gro_receive+498>:       repz cmpsb %es:(%rdi),%ds:(%rsi)
    0xffffffff805026a2 is in dev_gro_receive (/mnt/net-2.6/net/core/dev.c:2450).
      2445                            count++;
      2446
      2447                            if (!NAPI_GRO_CB(p)->same_flow)
      2448                                    continue;
      2449
      2450                            if (p->mac_len != mac_len ||
      2451                                memcmp(skb_mac_header(p), mac, mac_len))
      2452                                    NAPI_GRO_CB(p)->same_flow = 0;
      2453                    }
      2454

  ffffffff805026a4 13.4734%
    0xffffffff805026a4 <dev_gro_receive+500>:       je     0xffffffff805025c8 <dev_gro_receive+280>
      (gdb) list *(0xffffffff805026a4)
      0xffffffff805026a4 is in dev_gro_receive (/mnt/net-2.6/net/core/dev.c:2450).
      2445                            count++;
      2446
      2447                            if (!NAPI_GRO_CB(p)->same_flow)
      2448                                    continue;
      2449
      2450                            if (p->mac_len != mac_len ||
      2451                                memcmp(skb_mac_header(p), mac, mac_len))
      2452                                    NAPI_GRO_CB(p)->same_flow = 0;

  ffffffff805025c8 9.3943%
    0xffffffff805025c8 <dev_gro_receive+280>:       mov    (%r9),%r9
    0xffffffff805025c8 is in dev_gro_receive (/mnt/net-2.6/net/core/dev.c:2444).
      2439                    skb->mac_len = mac_len;
      2440                    NAPI_GRO_CB(skb)->same_flow = 0;
      2441                    NAPI_GRO_CB(skb)->flush = 0;
      2442                    NAPI_GRO_CB(skb)->free = 0;
      2443
      2444                    for (p = napi->gro_list; p; p = p->next) {
      2445                            count++;
      2446
      2447                            if (!NAPI_GRO_CB(p)->same_flow)
      2448                                    continue;

  ffffffff805025f9 7.3548%
    0xffffffff805025f9 <dev_gro_receive+329>:       je     0xffffffff80502614 <dev_gro_receive+356>
    0xffffffff805025f9 is in dev_gro_receive (/mnt/net-2.6/net/core/dev.c:2466).
      2461                    goto normal;
      2462
      2463            same_flow = NAPI_GRO_CB(skb)->same_flow;
      2464            ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
      2465
      2466            if (pp) {
      2467                    struct sk_buff *nskb = *pp;
      2468
      2469                    *pp = nskb->next;
      2470                    nskb->next = NULL;


tcp_gro_receive 3.0960 ffffffff80528df0
  ffffffff80528f2b 16.3527%
    0xffffffff80528f2b <tcp_gro_receive+315>:       repz cmpsb %es:(%rdi),%ds:(%rsi)
    0xffffffff80528f2b is in tcp_gro_receive (/mnt/net-2.6/net/ipv4/tcp.c:2521).
      2516            flush = NAPI_GRO_CB(p)->flush;
      2517            flush |= flags & TCP_FLAG_CWR;
      2518            flush |= (flags ^ tcp_flag_word(th2)) &
      2519                      ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
      2520            flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
      2521            flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
      2522
      2523            total = skb_gro_len(p);
      2524            mss = skb_shinfo(p)->gso_size;

  ffffffff80528f2d 15.9759%
    0xffffffff80528f2d <tcp_gro_receive+317>:       mov    0x60(%r8),%edi
      0xffffffff80528f2d is in tcp_gro_receive (/mnt/net-2.6/include/linux/netdevice.h:1101).
      1096            return NAPI_GRO_CB(skb)->data_offset;
      1097    }
      1098
      1099    static inline unsigned int skb_gro_len(const struct sk_buff *skb)
      1100    {
      1101            return skb->len - NAPI_GRO_CB(skb)->data_offset;
      1102    }
      1103
      1104    static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
      1105    {

  ffffffff80528f31 13.7905%
    0xffffffff80528f31 <tcp_gro_receive+321>:       setb   %al
    0xffffffff80528f31 is in tcp_gro_receive (/mnt/net-2.6/net/ipv4/tcp.c:2521).
      2516            flush = NAPI_GRO_CB(p)->flush;
      2517            flush |= flags & TCP_FLAG_CWR;
      2518            flush |= (flags ^ tcp_flag_word(th2)) &
      2519                      ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
      2520            flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
      2521            flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
      2522
      2523            total = skb_gro_len(p);
      2524            mss = skb_shinfo(p)->gso_size;

napi_fraginfo_skb 2.7647 ffffffff80501dd0
  ffffffff80501f16 65.2321%
    0xffffffff80501f16 <napi_fraginfo_skb+326>:     mov    %eax,0x6c(%rbx)
    0xffffffff80501f16 is in napi_fraginfo_skb (/mnt/net-2.6/net/core/dev.c:2606).
      2601             * special handling.  We'll fix it up properly at the end.
      2602             */
      2603            skb->protocol = eth->h_proto;
      2604
      2605            skb->ip_summed = info->ip_summed;
      2606            skb->csum = info->csum;
      2607
      2608    out:
      2609            return skb;
      2610    }

Cheers,
Divy

^ permalink raw reply	[flat|nested] 23+ messages in thread
* [1/2] e1000e: Invoke VLAN GRO handler
@ 2009-01-13  9:26 Herbert Xu
  2009-01-15  6:59 ` cxgb3: Replace LRO with GRO Herbert Xu
  0 siblings, 1 reply; 23+ messages in thread
From: Herbert Xu @ 2009-01-13  9:26 UTC (permalink / raw)
  To: David S. Miller, Jeff Kirsher, netdev

Hi Dave:

I'm starting to convert the LRO users now.  Here's the first
patch which is trivial.  It simply completes the test driver
that I started out with.

e1000e: Invoke VLAN GRO handler

Now that VLAN has GRO support as well, we can call its GRO handler
as well.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 91817d0..f73faac 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -99,8 +99,8 @@ static void e1000_receive_skb(struct e1000_adapter *adapter,
 	skb->protocol = eth_type_trans(skb, netdev);
 
 	if (adapter->vlgrp && (status & E1000_RXD_STAT_VP))
-		vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-					 le16_to_cpu(vlan));
+		vlan_gro_receive(&adapter->napi, adapter->vlgrp,
+				 le16_to_cpu(vlan), skb);
 	else
 		napi_gro_receive(&adapter->napi, skb);
 }

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2009-04-13 15:24 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-15 21:14 cxgb3: Replace LRO with GRO Divy Le Ray
2009-01-15 23:58 ` Herbert Xu
2009-01-16  8:06   ` Divy Le Ray
2009-01-16  8:56     ` Herbert Xu
2009-01-16 11:12       ` Divy Le Ray
2009-01-16 23:58         ` Herbert Xu
2009-01-17  5:08         ` Herbert Xu
2009-01-17 11:11           ` Divy Le Ray
2009-01-17 13:08             ` Herbert Xu
2009-01-18 20:33               ` Divy Le Ray
2009-01-18 22:50                 ` Herbert Xu
2009-01-20  1:03 ` David Miller
2009-01-20  2:03   ` David Miller
2009-01-20  5:24     ` Herbert Xu
2009-01-20 10:04     ` Divy Le Ray
  -- strict thread matches above, loose matches on Subject: below --
2009-01-20 10:14 Divy Le Ray
2009-01-21  8:29 ` Herbert Xu
2009-01-22  9:42   ` Divy Le Ray
2009-02-16  3:36     ` Herbert Xu
2009-02-16  3:47       ` Divy Le Ray
2009-03-13  7:28       ` Divy Le Ray
2009-04-13 15:24         ` Herbert Xu
2009-01-13  9:26 [1/2] e1000e: Invoke VLAN GRO handler Herbert Xu
2009-01-15  6:59 ` cxgb3: Replace LRO with GRO Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).