netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
@ 2010-06-03  3:48 sonic zhang
  2010-06-03  4:05 ` Eric Dumazet
  2010-06-04  3:29 ` Sonic Zhang
  0 siblings, 2 replies; 10+ messages in thread
From: sonic zhang @ 2010-06-03  3:48 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, uclinux-dist-devel

>From 40560ae9e8db42e2d2259b791ace160534c9a0f2 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 3 Jun 2010 11:44:33 +0800
Subject: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer

SKBs hold onto resources that can't be held indefinitely, such as TCP
socket references and netfilter conntrack state.  So if a packet is left
in TX ring for a long time, there might be a TCP socket that cannot be
closed and freed up.

Current blackfin EMAC driver always reclaim and free used tx skbs in future
transfers. The problem is that future transfer may not come as soon as
possible. This patch start a timer after transfer to reclaim and free skb.
There is nearly no performance drop with this patch.

TX interrupt is not enabled for 2 reasons:

1) If Blackfin EMAC TX transfer control is turned on, endless TX
interrupts are triggered no matter if TX DMA is enabled. Since DMA walks
down the ring automatically, TX transfer control can't be turned off in the
middle. The only way is to disable TX interrupt completely.

2) skb can not be freed from interrupt context. A work queue or tasklet
has to be created, which introduce more overhead than timer only solution.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
---
 drivers/net/bfin_mac.c |  114 ++++++++++++++++++++++++++++++------------------
 drivers/net/bfin_mac.h |    5 ++
 2 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 368f333..6f0755f 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -922,61 +922,73 @@ static void bfin_mac_hwtstamp_init(struct net_device *netdev)
 # define bfin_tx_hwtstamp(dev, skb)
 #endif
 
-static void adjust_tx_list(void)
+static inline void _tx_reclaim_skb(void)
+{
+	do {
+		tx_list_head->desc_a.config &= ~DMAEN;
+		tx_list_head->status.status_word = 0;
+		if (tx_list_head->skb) {
+			dev_kfree_skb(tx_list_head->skb);
+			tx_list_head->skb = NULL;
+		}
+		tx_list_head = tx_list_head->next;
+
+	} while (tx_list_head->status.status_word != 0);
+}
+
+static void tx_reclaim_skb(struct bfin_mac_local *lp)
 {
 	int timeout_cnt = MAX_TIMEOUT_CNT;
 
-	if (tx_list_head->status.status_word != 0 &&
-	    current_tx_ptr != tx_list_head) {
-		goto adjust_head;	/* released something, just return; */
-	}
+	if (tx_list_head->status.status_word != 0)
+		_tx_reclaim_skb();
 
-	/*
-	 * if nothing released, check wait condition
-	 * current's next can not be the head,
-	 * otherwise the dma will not stop as we want
-	 */
-	if (current_tx_ptr->next->next == tx_list_head) {
+	if (current_tx_ptr->next == tx_list_head) {
 		while (tx_list_head->status.status_word == 0) {
+			/* slow down polling to avoid too many queue stop. */
 			udelay(10);
-			if (tx_list_head->status.status_word != 0 ||
-			    !(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN)) {
-				goto adjust_head;
-			}
-			if (timeout_cnt-- < 0) {
-				printk(KERN_ERR DRV_NAME
-				": wait for adjust tx list head timeout\n");
+			/* reclaim skb if DMA is not running. */
+			if (!(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN))
+				break;
+			if (timeout_cnt-- < 0)
 				break;
-			}
-		}
-		if (tx_list_head->status.status_word != 0) {
-			goto adjust_head;
 		}
+
+		if (timeout_cnt >= 0)
+			_tx_reclaim_skb();
+		else
+			netif_stop_queue(lp->ndev);
 	}
 
-	return;
+	if (current_tx_ptr->next != tx_list_head &&
+		netif_queue_stopped(lp->ndev))
+		netif_wake_queue(lp->ndev);
+
+	if (tx_list_head != current_tx_ptr) {
+		/* shorten the timer interval if tx queue is stopped */
+		if (netif_queue_stopped(lp->ndev))
+			lp->tx_reclaim_timer.expires =
+				jiffies + (TX_RECLAIM_JIFFIES >> 4);
+		else
+			lp->tx_reclaim_timer.expires =
+				jiffies + TX_RECLAIM_JIFFIES;
+
+		mod_timer(&lp->tx_reclaim_timer,
+			lp->tx_reclaim_timer.expires);
+	}
 
-adjust_head:
-	do {
-		tx_list_head->desc_a.config &= ~DMAEN;
-		tx_list_head->status.status_word = 0;
-		if (tx_list_head->skb) {
-			dev_kfree_skb(tx_list_head->skb);
-			tx_list_head->skb = NULL;
-		} else {
-			printk(KERN_ERR DRV_NAME
-			       ": no sk_buff in a transmitted frame!\n");
-		}
-		tx_list_head = tx_list_head->next;
-	} while (tx_list_head->status.status_word != 0 &&
-		 current_tx_ptr != tx_list_head);
 	return;
+}
 
+static void tx_reclaim_skb_timeout(unsigned long lp)
+{
+	tx_reclaim_skb((struct bfin_mac_local *)lp);
 }
 
 static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
+	struct bfin_mac_local *lp = netdev_priv(dev);
 	u16 *data;
 	u32 data_align = (unsigned long)(skb->data) & 0x3;
 	union skb_shared_tx *shtx = skb_tx(skb);
@@ -1009,8 +1021,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 			skb->len);
 		current_tx_ptr->desc_a.start_addr =
 			(u32)current_tx_ptr->packet;
-		if (current_tx_ptr->status.status_word != 0)
-			current_tx_ptr->status.status_word = 0;
 		blackfin_dcache_flush_range(
 			(u32)current_tx_ptr->packet,
 			(u32)(current_tx_ptr->packet + skb->len + 2));
@@ -1022,6 +1032,9 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 	 */
 	SSYNC();
 
+	/* always clear status buffer before start tx dma */
+	current_tx_ptr->status.status_word = 0;
+
 	/* enable this packet's dma */
 	current_tx_ptr->desc_a.config |= DMAEN;
 
@@ -1037,13 +1050,14 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 	bfin_write_EMAC_OPMODE(bfin_read_EMAC_OPMODE() | TE);
 
 out:
-	adjust_tx_list();
-
 	bfin_tx_hwtstamp(dev, skb);
 
 	current_tx_ptr = current_tx_ptr->next;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += (skb->len);
+
+	tx_reclaim_skb(lp);
+
 	return NETDEV_TX_OK;
 }
 
@@ -1167,8 +1181,11 @@ real_rx:
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void bfin_mac_poll(struct net_device *dev)
 {
+	struct bfin_mac_local *lp = netdev_priv(dev);
+
 	disable_irq(IRQ_MAC_RX);
 	bfin_mac_interrupt(IRQ_MAC_RX, dev);
+	tx_reclaim_skb(lp);
 	enable_irq(IRQ_MAC_RX);
 }
 #endif				/* CONFIG_NET_POLL_CONTROLLER */
@@ -1232,12 +1249,20 @@ static int bfin_mac_enable(void)
 /* Our watchdog timed out. Called by the networking layer */
 static void bfin_mac_timeout(struct net_device *dev)
 {
+	struct bfin_mac_local *lp = netdev_priv(dev);
+
 	pr_debug("%s: %s\n", dev->name, __func__);
 
 	bfin_mac_disable();
 
+	if (timer_pending(&lp->tx_reclaim_timer))
+		del_timer(&(lp->tx_reclaim_timer));
+
 	/* reset tx queue */
-	tx_list_tail = tx_list_head->next;
+	current_tx_ptr = tx_list_head;
+
+	if (netif_queue_stopped(lp->ndev))
+		netif_wake_queue(lp->ndev);
 
 	bfin_mac_enable();
 
@@ -1430,6 +1455,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	platform_set_drvdata(pdev, ndev);
 	lp = netdev_priv(ndev);
+	lp->ndev = ndev;
 
 	/* Grab the MAC address in the MAC */
 	*(__le32 *) (&(ndev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO());
@@ -1485,6 +1511,10 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	ndev->netdev_ops = &bfin_mac_netdev_ops;
 	ndev->ethtool_ops = &bfin_mac_ethtool_ops;
 
+	init_timer(&lp->tx_reclaim_timer);
+	lp->tx_reclaim_timer.data = (unsigned long)lp;
+	lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout;
+
 	spin_lock_init(&lp->lock);
 
 	/* now, enable interrupts */
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index 1ae7b82..04e4050 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -13,9 +13,12 @@
 #include <linux/net_tstamp.h>
 #include <linux/clocksource.h>
 #include <linux/timecompare.h>
+#include <linux/timer.h>
 
 #define BFIN_MAC_CSUM_OFFLOAD
 
+#define TX_RECLAIM_JIFFIES (HZ / 5)
+
 struct dma_descriptor {
 	struct dma_descriptor *next_dma_desc;
 	unsigned long start_addr;
@@ -68,6 +71,8 @@ struct bfin_mac_local {
 
 	int wol;		/* Wake On Lan */
 	int irq_wake_requested;
+	struct timer_list tx_reclaim_timer;
+	struct net_device *ndev;
 
 	/* MII and PHY stuffs */
 	int old_link;          /* used by bf537_adjust_link */
-- 
1.6.0




^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-03  3:48 [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer sonic zhang
@ 2010-06-03  4:05 ` Eric Dumazet
  2010-06-03  8:57   ` Junchang Wang
  2010-06-04  3:29 ` Sonic Zhang
  1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2010-06-03  4:05 UTC (permalink / raw)
  To: sonic zhang; +Cc: David Miller, netdev, uclinux-dist-devel

Le jeudi 03 juin 2010 à 11:48 +0800, sonic zhang a écrit :

>  	bfin_tx_hwtstamp(dev, skb);
>  
>  	current_tx_ptr = current_tx_ptr->next;
>  	dev->stats.tx_packets++;
>  	dev->stats.tx_bytes += (skb->len);
> +
> +	tx_reclaim_skb(lp);
> +
>  	return NETDEV_TX_OK;
>  }
>  

Not related to your patch, but reviewing it I see this driver still do
the "dev->stats.tx_packets++; dev->stats.tx_bytes += (skb->len);"

This is not necessary and expensive, since we update txq stats in core
network stack.

	rc = ops->ndo_start_xmit(skb, dev);
	if (rc == NETDEV_TX_OK)
		txq_trans_update(txq);  << here >>

Doing these changes in a network driver dirties yet another cache line
in a hot path.

I suspect many drivers could be changed to avoid this double accounting.

A driver providing a ndo_get_stats() method can use the
dev_txq_stats_fold() helper.

driver not provinding a ndo_get_stats() method has nothing special to
do, dev_get_stats() automatically calls dev_txq_stats_fold() to update
dev->stats.{tx_packets|tx_bytes|tx_dropped} using txq stats.





^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-03  4:05 ` Eric Dumazet
@ 2010-06-03  8:57   ` Junchang Wang
  2010-06-03  9:19     ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Junchang Wang @ 2010-06-03  8:57 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: sonic zhang, David Miller, netdev, uclinux-dist-devel

Hi Eric,

On Thu, Jun 3, 2010 at 12:05 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Not related to your patch, but reviewing it I see this driver still do
> the "dev->stats.tx_packets++; dev->stats.tx_bytes += (skb->len);"
>
> This is not necessary and expensive, since we update txq stats in core
> network stack.
>
>        rc = ops->ndo_start_xmit(skb, dev);
>        if (rc == NETDEV_TX_OK)
>                txq_trans_update(txq);  << here >>
>

Good suggestion for drivers. But I wonder whether there are stats for
received packets in core network stack.

I.e., can I replace "dev->stats.rx_packets++" and "dev->stats.rx_bytes
+= (skb->len);" with something already maintained by core stack? I
failed to find them.

Thanks.


-- 
--Junchang

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-03  8:57   ` Junchang Wang
@ 2010-06-03  9:19     ` Eric Dumazet
  2010-06-03 10:54       ` Junchang Wang
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2010-06-03  9:19 UTC (permalink / raw)
  To: Junchang Wang; +Cc: sonic zhang, David Miller, netdev, uclinux-dist-devel

Le jeudi 03 juin 2010 à 16:57 +0800, Junchang Wang a écrit :
> Hi Eric,
> 
> On Thu, Jun 3, 2010 at 12:05 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Not related to your patch, but reviewing it I see this driver still do
> > the "dev->stats.tx_packets++; dev->stats.tx_bytes += (skb->len);"
> >
> > This is not necessary and expensive, since we update txq stats in core
> > network stack.
> >
> >        rc = ops->ndo_start_xmit(skb, dev);
> >        if (rc == NETDEV_TX_OK)
> >                txq_trans_update(txq);  << here >>
> >
> 
> Good suggestion for drivers. But I wonder whether there are stats for
> received packets in core network stack.
> 

No its not there.

> I.e., can I replace "dev->stats.rx_packets++" and "dev->stats.rx_bytes
> += (skb->len);" with something already maintained by core stack? I
> failed to find them.
> 

As I said, core network takes care of three counters only, because it
was 'free', as they share a cache line with a spinlock we must hold when
calling xmit function.

In receive path, we dont dirty a cache line in core network, so updating
counters would add a cost. (modern NICs handle stats in firmware)




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-03  9:19     ` Eric Dumazet
@ 2010-06-03 10:54       ` Junchang Wang
  0 siblings, 0 replies; 10+ messages in thread
From: Junchang Wang @ 2010-06-03 10:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

>
> As I said, core network takes care of three counters only, because it
> was 'free', as they share a cache line with a spinlock we must hold when
> calling xmit function.
>
> In receive path, we dont dirty a cache line in core network, so updating
> counters would add a cost. (modern NICs handle stats in firmware)
>
That's really resolve my doubts about lack of RX stats in kernel stack.

Thanks Eric.

-- 
--Junchang

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-03  3:48 [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer sonic zhang
  2010-06-03  4:05 ` Eric Dumazet
@ 2010-06-04  3:29 ` Sonic Zhang
  2010-06-04  4:05   ` Eric Dumazet
  1 sibling, 1 reply; 10+ messages in thread
From: Sonic Zhang @ 2010-06-04  3:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, uclinux-dist-devel

David,

Any comments?

Thanks

Sonic

On Thu, Jun 3, 2010 at 11:48 AM, sonic zhang <sonic.adi@gmail.com> wrote:
> >From 40560ae9e8db42e2d2259b791ace160534c9a0f2 Mon Sep 17 00:00:00 2001
> From: Sonic Zhang <sonic.zhang@analog.com>
> Date: Thu, 3 Jun 2010 11:44:33 +0800
> Subject: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
>
> SKBs hold onto resources that can't be held indefinitely, such as TCP
> socket references and netfilter conntrack state.  So if a packet is left
> in TX ring for a long time, there might be a TCP socket that cannot be
> closed and freed up.
>
> Current blackfin EMAC driver always reclaim and free used tx skbs in future
> transfers. The problem is that future transfer may not come as soon as
> possible. This patch start a timer after transfer to reclaim and free skb.
> There is nearly no performance drop with this patch.
>
> TX interrupt is not enabled for 2 reasons:
>
> 1) If Blackfin EMAC TX transfer control is turned on, endless TX
> interrupts are triggered no matter if TX DMA is enabled. Since DMA walks
> down the ring automatically, TX transfer control can't be turned off in the
> middle. The only way is to disable TX interrupt completely.
>
> 2) skb can not be freed from interrupt context. A work queue or tasklet
> has to be created, which introduce more overhead than timer only solution.
>
> Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
> ---
>  drivers/net/bfin_mac.c |  114 ++++++++++++++++++++++++++++++------------------
>  drivers/net/bfin_mac.h |    5 ++
>  2 files changed, 77 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
> index 368f333..6f0755f 100644
> --- a/drivers/net/bfin_mac.c
> +++ b/drivers/net/bfin_mac.c
> @@ -922,61 +922,73 @@ static void bfin_mac_hwtstamp_init(struct net_device *netdev)
>  # define bfin_tx_hwtstamp(dev, skb)
>  #endif
>
> -static void adjust_tx_list(void)
> +static inline void _tx_reclaim_skb(void)
> +{
> +       do {
> +               tx_list_head->desc_a.config &= ~DMAEN;
> +               tx_list_head->status.status_word = 0;
> +               if (tx_list_head->skb) {
> +                       dev_kfree_skb(tx_list_head->skb);
> +                       tx_list_head->skb = NULL;
> +               }
> +               tx_list_head = tx_list_head->next;
> +
> +       } while (tx_list_head->status.status_word != 0);
> +}
> +
> +static void tx_reclaim_skb(struct bfin_mac_local *lp)
>  {
>        int timeout_cnt = MAX_TIMEOUT_CNT;
>
> -       if (tx_list_head->status.status_word != 0 &&
> -           current_tx_ptr != tx_list_head) {
> -               goto adjust_head;       /* released something, just return; */
> -       }
> +       if (tx_list_head->status.status_word != 0)
> +               _tx_reclaim_skb();
>
> -       /*
> -        * if nothing released, check wait condition
> -        * current's next can not be the head,
> -        * otherwise the dma will not stop as we want
> -        */
> -       if (current_tx_ptr->next->next == tx_list_head) {
> +       if (current_tx_ptr->next == tx_list_head) {
>                while (tx_list_head->status.status_word == 0) {
> +                       /* slow down polling to avoid too many queue stop. */
>                        udelay(10);
> -                       if (tx_list_head->status.status_word != 0 ||
> -                           !(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN)) {
> -                               goto adjust_head;
> -                       }
> -                       if (timeout_cnt-- < 0) {
> -                               printk(KERN_ERR DRV_NAME
> -                               ": wait for adjust tx list head timeout\n");
> +                       /* reclaim skb if DMA is not running. */
> +                       if (!(bfin_read_DMA2_IRQ_STATUS() & DMA_RUN))
> +                               break;
> +                       if (timeout_cnt-- < 0)
>                                break;
> -                       }
> -               }
> -               if (tx_list_head->status.status_word != 0) {
> -                       goto adjust_head;
>                }
> +
> +               if (timeout_cnt >= 0)
> +                       _tx_reclaim_skb();
> +               else
> +                       netif_stop_queue(lp->ndev);
>        }
>
> -       return;
> +       if (current_tx_ptr->next != tx_list_head &&
> +               netif_queue_stopped(lp->ndev))
> +               netif_wake_queue(lp->ndev);
> +
> +       if (tx_list_head != current_tx_ptr) {
> +               /* shorten the timer interval if tx queue is stopped */
> +               if (netif_queue_stopped(lp->ndev))
> +                       lp->tx_reclaim_timer.expires =
> +                               jiffies + (TX_RECLAIM_JIFFIES >> 4);
> +               else
> +                       lp->tx_reclaim_timer.expires =
> +                               jiffies + TX_RECLAIM_JIFFIES;
> +
> +               mod_timer(&lp->tx_reclaim_timer,
> +                       lp->tx_reclaim_timer.expires);
> +       }
>
> -adjust_head:
> -       do {
> -               tx_list_head->desc_a.config &= ~DMAEN;
> -               tx_list_head->status.status_word = 0;
> -               if (tx_list_head->skb) {
> -                       dev_kfree_skb(tx_list_head->skb);
> -                       tx_list_head->skb = NULL;
> -               } else {
> -                       printk(KERN_ERR DRV_NAME
> -                              ": no sk_buff in a transmitted frame!\n");
> -               }
> -               tx_list_head = tx_list_head->next;
> -       } while (tx_list_head->status.status_word != 0 &&
> -                current_tx_ptr != tx_list_head);
>        return;
> +}
>
> +static void tx_reclaim_skb_timeout(unsigned long lp)
> +{
> +       tx_reclaim_skb((struct bfin_mac_local *)lp);
>  }
>
>  static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>                                struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
>        u16 *data;
>        u32 data_align = (unsigned long)(skb->data) & 0x3;
>        union skb_shared_tx *shtx = skb_tx(skb);
> @@ -1009,8 +1021,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>                        skb->len);
>                current_tx_ptr->desc_a.start_addr =
>                        (u32)current_tx_ptr->packet;
> -               if (current_tx_ptr->status.status_word != 0)
> -                       current_tx_ptr->status.status_word = 0;
>                blackfin_dcache_flush_range(
>                        (u32)current_tx_ptr->packet,
>                        (u32)(current_tx_ptr->packet + skb->len + 2));
> @@ -1022,6 +1032,9 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>         */
>        SSYNC();
>
> +       /* always clear status buffer before start tx dma */
> +       current_tx_ptr->status.status_word = 0;
> +
>        /* enable this packet's dma */
>        current_tx_ptr->desc_a.config |= DMAEN;
>
> @@ -1037,13 +1050,14 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
>        bfin_write_EMAC_OPMODE(bfin_read_EMAC_OPMODE() | TE);
>
>  out:
> -       adjust_tx_list();
> -
>        bfin_tx_hwtstamp(dev, skb);
>
>        current_tx_ptr = current_tx_ptr->next;
>        dev->stats.tx_packets++;
>        dev->stats.tx_bytes += (skb->len);
> +
> +       tx_reclaim_skb(lp);
> +
>        return NETDEV_TX_OK;
>  }
>
> @@ -1167,8 +1181,11 @@ real_rx:
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  static void bfin_mac_poll(struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
> +
>        disable_irq(IRQ_MAC_RX);
>        bfin_mac_interrupt(IRQ_MAC_RX, dev);
> +       tx_reclaim_skb(lp);
>        enable_irq(IRQ_MAC_RX);
>  }
>  #endif                         /* CONFIG_NET_POLL_CONTROLLER */
> @@ -1232,12 +1249,20 @@ static int bfin_mac_enable(void)
>  /* Our watchdog timed out. Called by the networking layer */
>  static void bfin_mac_timeout(struct net_device *dev)
>  {
> +       struct bfin_mac_local *lp = netdev_priv(dev);
> +
>        pr_debug("%s: %s\n", dev->name, __func__);
>
>        bfin_mac_disable();
>
> +       if (timer_pending(&lp->tx_reclaim_timer))
> +               del_timer(&(lp->tx_reclaim_timer));
> +
>        /* reset tx queue */
> -       tx_list_tail = tx_list_head->next;
> +       current_tx_ptr = tx_list_head;
> +
> +       if (netif_queue_stopped(lp->ndev))
> +               netif_wake_queue(lp->ndev);
>
>        bfin_mac_enable();
>
> @@ -1430,6 +1455,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
>        SET_NETDEV_DEV(ndev, &pdev->dev);
>        platform_set_drvdata(pdev, ndev);
>        lp = netdev_priv(ndev);
> +       lp->ndev = ndev;
>
>        /* Grab the MAC address in the MAC */
>        *(__le32 *) (&(ndev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO());
> @@ -1485,6 +1511,10 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
>        ndev->netdev_ops = &bfin_mac_netdev_ops;
>        ndev->ethtool_ops = &bfin_mac_ethtool_ops;
>
> +       init_timer(&lp->tx_reclaim_timer);
> +       lp->tx_reclaim_timer.data = (unsigned long)lp;
> +       lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout;
> +
>        spin_lock_init(&lp->lock);
>
>        /* now, enable interrupts */
> diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
> index 1ae7b82..04e4050 100644
> --- a/drivers/net/bfin_mac.h
> +++ b/drivers/net/bfin_mac.h
> @@ -13,9 +13,12 @@
>  #include <linux/net_tstamp.h>
>  #include <linux/clocksource.h>
>  #include <linux/timecompare.h>
> +#include <linux/timer.h>
>
>  #define BFIN_MAC_CSUM_OFFLOAD
>
> +#define TX_RECLAIM_JIFFIES (HZ / 5)
> +
>  struct dma_descriptor {
>        struct dma_descriptor *next_dma_desc;
>        unsigned long start_addr;
> @@ -68,6 +71,8 @@ struct bfin_mac_local {
>
>        int wol;                /* Wake On Lan */
>        int irq_wake_requested;
> +       struct timer_list tx_reclaim_timer;
> +       struct net_device *ndev;
>
>        /* MII and PHY stuffs */
>        int old_link;          /* used by bf537_adjust_link */
> --
> 1.6.0
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-04  3:29 ` Sonic Zhang
@ 2010-06-04  4:05   ` Eric Dumazet
  2010-06-04  4:44     ` Sonic Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2010-06-04  4:05 UTC (permalink / raw)
  To: Sonic Zhang; +Cc: David Miller, netdev, uclinux-dist-devel

Le vendredi 04 juin 2010 à 11:29 +0800, Sonic Zhang a écrit :
> David,
> 
> Any comments?
> 
> Thanks
> 
> Sonic
> 
> On Thu, Jun 3, 2010 at 11:48 AM, sonic zhang <sonic.adi@gmail.com> wrote:
> > >From 40560ae9e8db42e2d2259b791ace160534c9a0f2 Mon Sep 17 00:00:00 2001
> > From: Sonic Zhang <sonic.zhang@analog.com>
> > Date: Thu, 3 Jun 2010 11:44:33 +0800
> > Subject: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
> >
> > SKBs hold onto resources that can't be held indefinitely, such as TCP
> > socket references and netfilter conntrack state.  So if a packet is left
> > in TX ring for a long time, there might be a TCP socket that cannot be
> > closed and freed up.
> >
> > Current blackfin EMAC driver always reclaim and free used tx skbs in future
> > transfers. The problem is that future transfer may not come as soon as
> > possible. This patch start a timer after transfer to reclaim and free skb.
> > There is nearly no performance drop with this patch.
> >
> > TX interrupt is not enabled for 2 reasons:
> >
> > 1) If Blackfin EMAC TX transfer control is turned on, endless TX
> > interrupts are triggered no matter if TX DMA is enabled. Since DMA walks
> > down the ring automatically, TX transfer control can't be turned off in the
> > middle. The only way is to disable TX interrupt completely.
> >
> > 2) skb can not be freed from interrupt context. A work queue or tasklet
> > has to be created, which introduce more overhead than timer only solution.
> >

Could you elaborate on this second point ?

skb can be freed from interrupt context using appropriate API :

1) If from NAPI context, no special care is needed and use
dev_kfree_skb().

2) If from hard irq context, use dev_kfree_skb_irq() :

With recent changes, skb is probably already orphaned and can be freed
immediately.

In the unlikely case it is not yet orphaned, skb is queued in
softnet_data.completion_queue.




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-04  4:05   ` Eric Dumazet
@ 2010-06-04  4:44     ` Sonic Zhang
  2010-06-07  9:58       ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Sonic Zhang @ 2010-06-04  4:44 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, uclinux-dist-devel

On Fri, Jun 4, 2010 at 12:05 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 04 juin 2010 à 11:29 +0800, Sonic Zhang a écrit :
>> David,
>>
>> Any comments?
>>
>> Thanks
>>
>> Sonic
>>
>> On Thu, Jun 3, 2010 at 11:48 AM, sonic zhang <sonic.adi@gmail.com> wrote:
>> > >From 40560ae9e8db42e2d2259b791ace160534c9a0f2 Mon Sep 17 00:00:00 2001
>> > From: Sonic Zhang <sonic.zhang@analog.com>
>> > Date: Thu, 3 Jun 2010 11:44:33 +0800
>> > Subject: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
>> >
>> > SKBs hold onto resources that can't be held indefinitely, such as TCP
>> > socket references and netfilter conntrack state.  So if a packet is left
>> > in TX ring for a long time, there might be a TCP socket that cannot be
>> > closed and freed up.
>> >
>> > Current blackfin EMAC driver always reclaim and free used tx skbs in future
>> > transfers. The problem is that future transfer may not come as soon as
>> > possible. This patch start a timer after transfer to reclaim and free skb.
>> > There is nearly no performance drop with this patch.
>> >
>> > TX interrupt is not enabled for 2 reasons:
>> >
>> > 1) If Blackfin EMAC TX transfer control is turned on, endless TX
>> > interrupts are triggered no matter if TX DMA is enabled. Since DMA walks
>> > down the ring automatically, TX transfer control can't be turned off in the
>> > middle. The only way is to disable TX interrupt completely.
>> >
>> > 2) skb can not be freed from interrupt context. A work queue or tasklet
>> > has to be created, which introduce more overhead than timer only solution.
>> >
>
> Could you elaborate on this second point ?
>
> skb can be freed from interrupt context using appropriate API :
>
> 1) If from NAPI context, no special care is needed and use
> dev_kfree_skb().
>
> 2) If from hard irq context, use dev_kfree_skb_irq() :
>

Yes, you are right. dev_kfree_skb_irq() queues used skb to the
complete queue. But, it is actually freed in the other soft irq
NET_TX_SOFTIRQ.


> With recent changes, skb is probably already orphaned and can be freed
> immediately.
>
> In the unlikely case it is not yet orphaned, skb is queued in
> softnet_data.completion_queue.
>
>
>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-04  4:44     ` Sonic Zhang
@ 2010-06-07  9:58       ` Eric Dumazet
  2010-06-07 10:26         ` Sonic Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2010-06-07  9:58 UTC (permalink / raw)
  To: Sonic Zhang; +Cc: David Miller, netdev, uclinux-dist-devel

Le vendredi 04 juin 2010 à 12:44 +0800, Sonic Zhang a écrit :

> 
> Yes, you are right. dev_kfree_skb_irq() queues used skb to the
> complete queue. But, it is actually freed in the other soft irq
> NET_TX_SOFTIRQ.

I guess you didnt understood my mail, so I'll re-explain :

dev_kfree_skb_irq() queues skb only if packet is not already orphaned.

As most packets are now orphaned (in recent kernels where your patch
applies), dev_kfree_skb_irq() can free packet immediately, with no
NET_TX_SOFTIRQ overhead.




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer
  2010-06-07  9:58       ` Eric Dumazet
@ 2010-06-07 10:26         ` Sonic Zhang
  0 siblings, 0 replies; 10+ messages in thread
From: Sonic Zhang @ 2010-06-07 10:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, uclinux-dist-devel

On Mon, Jun 7, 2010 at 5:58 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 04 juin 2010 à 12:44 +0800, Sonic Zhang a écrit :
>
>>
>> Yes, you are right. dev_kfree_skb_irq() queues used skb to the
>> complete queue. But, it is actually freed in the other soft irq
>> NET_TX_SOFTIRQ.
>
> I guess you didnt understood my mail, so I'll re-explain :
>
> dev_kfree_skb_irq() queues skb only if packet is not already orphaned.
>
> As most packets are now orphaned (in recent kernels where your patch
> applies), dev_kfree_skb_irq() can free packet immediately, with no
> NET_TX_SOFTIRQ overhead.
>
>

OK. I didn't notice the recent change in kernel to orphan most
packets. You are right, my second reason is out of date.

Thanks

Sonic

>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-06-07 10:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-03  3:48 [PATCH v2] netdev:bfin_mac: reclaim and free tx skb as soon as possible after transfer sonic zhang
2010-06-03  4:05 ` Eric Dumazet
2010-06-03  8:57   ` Junchang Wang
2010-06-03  9:19     ` Eric Dumazet
2010-06-03 10:54       ` Junchang Wang
2010-06-04  3:29 ` Sonic Zhang
2010-06-04  4:05   ` Eric Dumazet
2010-06-04  4:44     ` Sonic Zhang
2010-06-07  9:58       ` Eric Dumazet
2010-06-07 10:26         ` Sonic Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).