netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] forcedeth: Stay in NAPI as long as there's work
@ 2010-04-28  6:36 Tom Herbert
  2010-04-28 17:54 ` Joe Perches
  0 siblings, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2010-04-28  6:36 UTC (permalink / raw)
  To: netdev, aabdulla, davem

Add loop in NAPI poll routine to keep processing RX and TX as long as
there is more work to do.  This is similar to what tg3 and some other
drivers do.

This modification seems improves performance (maximum pps).  Running
500 instances of netperf TCP_RR test with one byte sizes on between
two sixteen core AMD machines (RPS enabled) gives:

Before patch: 186715 tps
With patch: 400949 tps

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index a1c0e7b..1e4de7b 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 }
 
 #ifdef CONFIG_FORCEDETH_NAPI
+static inline int nv_has_work(struct fe_priv *np)
+{
+	if (nv_optimized(np)) {
+		return (
+		    ((np->get_rx.ex != np->put_rx.ex) &&
+		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
+		    ((np->get_tx.ex != np->put_tx.ex) &&
+		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
+	} else {
+		return (
+		    ((np->get_rx.orig != np->put_rx.orig) &&
+		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
+		    ((np->get_tx.orig != np->put_tx.orig) &&
+		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
+	}
+}
+
 static int nv_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct fe_priv *np = container_of(napi, struct fe_priv, napi);
@@ -3743,30 +3760,33 @@ static int nv_napi_poll(struct napi_struct *napi, int budget)
 	u8 __iomem *base = get_hwbase(dev);
 	unsigned long flags;
 	int retcode;
-	int tx_work, rx_work;
+	int tx_work = 0, rx_work = 0;
 
-	if (!nv_optimized(np)) {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+	do {
+		if (!nv_optimized(np)) {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process(dev, budget);
-		retcode = nv_alloc_rx(dev);
-	} else {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done_optimized(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+			rx_work += nv_rx_process(dev, budget);
+			retcode = nv_alloc_rx(dev);
+		} else {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process_optimized(dev, budget);
-		retcode = nv_alloc_rx_optimized(dev);
-	}
+			rx_work += nv_rx_process_optimized(dev, budget);
+			retcode = nv_alloc_rx_optimized(dev);
+		}
 
-	if (retcode) {
-		spin_lock_irqsave(&np->lock, flags);
-		if (!np->in_shutdown)
-			mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-		spin_unlock_irqrestore(&np->lock, flags);
-	}
+		if (unlikely(retcode)) {
+			spin_lock_irqsave(&np->lock, flags);
+			if (!np->in_shutdown)
+				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+			spin_unlock_irqrestore(&np->lock, flags);
+			break;
+		}
+	} while (rx_work < budget && nv_has_work(np));
 
 	nv_change_interrupt_mode(dev, tx_work + rx_work);
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28  6:36 [PATCH] forcedeth: Stay in NAPI as long as there's work Tom Herbert
@ 2010-04-28 17:54 ` Joe Perches
  2010-04-28 18:07   ` Tom Herbert
  2010-04-28 18:13   ` Stephen Hemminger
  0 siblings, 2 replies; 9+ messages in thread
From: Joe Perches @ 2010-04-28 17:54 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev, aabdulla, davem

On Tue, 2010-04-27 at 23:36 -0700, Tom Herbert wrote:
> Add loop in NAPI poll routine to keep processing RX and TX as long as
> there is more work to do.  This is similar to what tg3 and some other
> drivers do.
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---
> diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
> index a1c0e7b..1e4de7b 100644
> --- a/drivers/net/forcedeth.c
> +++ b/drivers/net/forcedeth.c
> @@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
>  }
>  
>  #ifdef CONFIG_FORCEDETH_NAPI
> +static inline int nv_has_work(struct fe_priv *np)
> +{
> +	if (nv_optimized(np)) {
> +		return (
> +		    ((np->get_rx.ex != np->put_rx.ex) &&
> +		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
> +		    ((np->get_tx.ex != np->put_tx.ex) &&
> +		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
> +	} else {
> +		return (
> +		    ((np->get_rx.orig != np->put_rx.orig) &&
> +		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
> +		    ((np->get_tx.orig != np->put_tx.orig) &&
> +		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
> +	}
> +}

It might be better to test the comparisons using
a cpu_to_le32 of the constants.

static inline int nv_has_work(struct fe_priv *np)
{
	if (nv_optimized(np))
		return ((np->get_rx.ex != np->put_rx.ex) &&
			!(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
		       ((np->get_tx.ex != np->put_tx.ex) &&
			!(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));

	return ((np->get_rx.orig != np->put_rx.orig) &&
		!(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
	       ((np->get_tx.orig != np->put_tx.orig) &&
		!(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
}



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 17:54 ` Joe Perches
@ 2010-04-28 18:07   ` Tom Herbert
  2010-04-28 18:18     ` Tom Herbert
  2010-04-28 18:13   ` Stephen Hemminger
  1 sibling, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2010-04-28 18:07 UTC (permalink / raw)
  To: Joe Perches; +Cc: netdev, aabdulla, davem

>
> It might be better to test the comparisons using
> a cpu_to_le32 of the constants.
>
Yes.  Probably should also be changed in nv_tx_done{_optimized} and
nv_rx_process{_optimized}

> static inline int nv_has_work(struct fe_priv *np)
> {
>        if (nv_optimized(np))
>                return ((np->get_rx.ex != np->put_rx.ex) &&
>                        !(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
>                       ((np->get_tx.ex != np->put_tx.ex) &&
>                        !(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));
>
>        return ((np->get_rx.orig != np->put_rx.orig) &&
>                !(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
>               ((np->get_tx.orig != np->put_tx.orig) &&
>                !(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
> }
>
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 17:54 ` Joe Perches
  2010-04-28 18:07   ` Tom Herbert
@ 2010-04-28 18:13   ` Stephen Hemminger
  2010-04-28 18:25     ` Stephen Hemminger
  1 sibling, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2010-04-28 18:13 UTC (permalink / raw)
  To: Joe Perches; +Cc: Tom Herbert, netdev, aabdulla, davem

On Wed, 28 Apr 2010 10:54:11 -0700
Joe Perches <joe@perches.com> wrote:

> On Tue, 2010-04-27 at 23:36 -0700, Tom Herbert wrote:
> > Add loop in NAPI poll routine to keep processing RX and TX as long as
> > there is more work to do.  This is similar to what tg3 and some other
> > drivers do.
> > Signed-off-by: Tom Herbert <therbert@google.com>
> > ---
> > diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
> > index a1c0e7b..1e4de7b 100644
> > --- a/drivers/net/forcedeth.c
> > +++ b/drivers/net/forcedeth.c
> > @@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
> >  }
> >  
> >  #ifdef CONFIG_FORCEDETH_NAPI
> > +static inline int nv_has_work(struct fe_priv *np)
> > +{
> > +	if (nv_optimized(np)) {
> > +		return (
> > +		    ((np->get_rx.ex != np->put_rx.ex) &&
> > +		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
> > +		    ((np->get_tx.ex != np->put_tx.ex) &&
> > +		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
> > +	} else {
> > +		return (
> > +		    ((np->get_rx.orig != np->put_rx.orig) &&
> > +		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
> > +		    ((np->get_tx.orig != np->put_tx.orig) &&
> > +		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
> > +	}
> > +}

Why than adding another check step, why not just keep going until
rx_done returns 0?


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 18:07   ` Tom Herbert
@ 2010-04-28 18:18     ` Tom Herbert
  0 siblings, 0 replies; 9+ messages in thread
From: Tom Herbert @ 2010-04-28 18:18 UTC (permalink / raw)
  To: Joe Perches; +Cc: netdev, aabdulla, davem

On Wed, Apr 28, 2010 at 11:07 AM, Tom Herbert <therbert@google.com> wrote:
>>
>> It might be better to test the comparisons using
>> a cpu_to_le32 of the constants.
>>
> Yes.  Probably should also be changed in nv_tx_done{_optimized} and
> nv_rx_process{_optimized}
>
Scratch that.  flags are checked all over the place in those other functions.

>> static inline int nv_has_work(struct fe_priv *np)
>> {
>>        if (nv_optimized(np))
>>                return ((np->get_rx.ex != np->put_rx.ex) &&
>>                        !(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
>>                       ((np->get_tx.ex != np->put_tx.ex) &&
>>                        !(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));
>>
>>        return ((np->get_rx.orig != np->put_rx.orig) &&
>>                !(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
>>               ((np->get_tx.orig != np->put_tx.orig) &&
>>                !(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
>> }
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 18:13   ` Stephen Hemminger
@ 2010-04-28 18:25     ` Stephen Hemminger
  2010-04-28 21:25       ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2010-04-28 18:25 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Joe Perches, Tom Herbert, netdev, aabdulla, davem

The following does the same thing without the extra overhead
of testing all the registers. It also handles the out of memory
case.

Compile tested only...

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/drivers/net/forcedeth.c	2010-04-28 11:14:29.784799317 -0700
+++ b/drivers/net/forcedeth.c	2010-04-28 11:23:53.254354744 -0700
@@ -3743,23 +3743,26 @@ static int nv_napi_poll(struct napi_stru
 	u8 __iomem *base = get_hwbase(dev);
 	unsigned long flags;
 	int retcode;
-	int tx_work, rx_work;
+	int rx_count, tx_work=0, rx_work=0;
 
-	if (!nv_optimized(np)) {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+	do {
+		if (!nv_optimized(np)) {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process(dev, budget);
-		retcode = nv_alloc_rx(dev);
-	} else {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done_optimized(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+			rx_count = nv_rx_process(dev, budget);
+			retcode = nv_alloc_rx(dev);
+		} else {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process_optimized(dev, budget);
-		retcode = nv_alloc_rx_optimized(dev);
-	}
+			rx_count = nv_rx_process_optimized(dev, budget);
+			retcode = nv_alloc_rx_optimized(dev);
+		}
+	} while (retcode == 0 &&
+		 rx_count > 0 && (rx_work += rx_count) < budget);
 
 	if (retcode) {
 		spin_lock_irqsave(&np->lock, flags);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 18:25     ` Stephen Hemminger
@ 2010-04-28 21:25       ` David Miller
  2010-04-28 23:56         ` Tom Herbert
  0 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2010-04-28 21:25 UTC (permalink / raw)
  To: shemminger; +Cc: joe, therbert, netdev, aabdulla

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 28 Apr 2010 11:25:28 -0700

> The following does the same thing without the extra overhead
> of testing all the registers. It also handles the out of memory
> case.
> 
> Compile tested only...
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Tom can you test this version?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 21:25       ` David Miller
@ 2010-04-28 23:56         ` Tom Herbert
  2010-04-30 23:17           ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2010-04-28 23:56 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, joe, netdev, aabdulla

On Wed, Apr 28, 2010 at 2:25 PM, David Miller <davem@davemloft.net> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Wed, 28 Apr 2010 11:25:28 -0700
>
>> The following does the same thing without the extra overhead
>> of testing all the registers. It also handles the out of memory
>> case.
>>
>> Compile tested only...
>>
>> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
> Tom can you test this version?
>

Looks good.  406038 tps in my quick test which still is showing the
benefits.  Thanks for cleaning this up Stephen!

Tom

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
  2010-04-28 23:56         ` Tom Herbert
@ 2010-04-30 23:17           ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2010-04-30 23:17 UTC (permalink / raw)
  To: therbert; +Cc: shemminger, joe, netdev, aabdulla

From: Tom Herbert <therbert@google.com>
Date: Wed, 28 Apr 2010 16:56:30 -0700

> On Wed, Apr 28, 2010 at 2:25 PM, David Miller <davem@davemloft.net> wrote:
>> From: Stephen Hemminger <shemminger@vyatta.com>
>> Date: Wed, 28 Apr 2010 11:25:28 -0700
>>
>>> The following does the same thing without the extra overhead
>>> of testing all the registers. It also handles the out of memory
>>> case.
>>>
>>> Compile tested only...
>>>
>>> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>>
>> Tom can you test this version?
>>
> 
> Looks good.  406038 tps in my quick test which still is showing the
> benefits.  Thanks for cleaning this up Stephen!

Thanks for testing, applied, thanks everyone.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-04-30 23:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-28  6:36 [PATCH] forcedeth: Stay in NAPI as long as there's work Tom Herbert
2010-04-28 17:54 ` Joe Perches
2010-04-28 18:07   ` Tom Herbert
2010-04-28 18:18     ` Tom Herbert
2010-04-28 18:13   ` Stephen Hemminger
2010-04-28 18:25     ` Stephen Hemminger
2010-04-28 21:25       ` David Miller
2010-04-28 23:56         ` Tom Herbert
2010-04-30 23:17           ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).