netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] make qdisc_restart more readable
@ 2007-05-11  0:13 jamal
  2007-05-11 15:56 ` Waskiewicz Jr, Peter P
  2007-05-11 17:01 ` [RFC] make " Thomas Graf
  0 siblings, 2 replies; 26+ messages in thread
From: jamal @ 2007-05-11  0:13 UTC (permalink / raw)
  To: Herbert Xu, David Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 328 bytes --]


This compiles and passes some basic tests - no serious testing.
Against net-2.6.

The patch is ugly looking, so i have at the end the re-written qdisc;
you can easily tell the rest from the patch.

Please flush out any fluff - I would like to submit this (almost lost
it, thanks to an early posting, found it).


cheers,
jamal

[-- Attachment #2: rfc-qrestart --]
[-- Type: text/x-patch, Size: 6687 bytes --]

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f671cd2..718d6fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -83,6 +83,9 @@ struct wireless_dev;
 #define NETDEV_TX_OK 0		/* driver took care of packet */
 #define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
 #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
+#define NETDEV_TX_DROP -2	/* request caller to drop packet */
+#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */
+
 
 /*
  *	Compute the worst case header length according to the protocols
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..b821040 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
-/*
+static inline int qqlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (dev->xmit_lock_owner == smp_processor_id()) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return NETDEV_TX_DROP;
+	}
+	/* XXX: This maintains original code, but i think we should
+	 * update cpu_collision always
+	*/
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return NETDEV_TX_QUEUE;
+}
+
+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(q == &noop_qdisc))
+		kfree_skb(skb);
+
+	if (skb->next)
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}
+
+static inline int
+try_get_tx_pkt(struct sk_buff **skb, struct net_device *dev, struct Qdisc *q)
+{
+	if (((*skb = dev->gso_skb)) || ((*skb = q->dequeue(q)))) {
+
+		dev->gso_skb = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == NETDEV_TX_DROP) {
+		kfree_skb(skb);
+		return qqlen(q);
+	}
+
+	return handle_dev_requeue(skb, dev, q);
+}
+
+
+/* 
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,114 +139,63 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer, so that
+   we do not check dev->tbusy flag here.
 
+   Returns to the caller:  
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
-
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
-static inline int qdisc_restart(struct net_device *dev)
+int 
+qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
-	struct sk_buff *skb;
-
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
+	struct sk_buff *skb = NULL;
+	int ret;
+
+	ret = try_get_tx_pkt(&skb, dev, q);
+	if (ret == 0)
+		return qqlen(q);
+
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return handle_tx_locked(skb, dev, q);
+	}
+		
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
 
-		dev->gso_skb = NULL;
+	/* churn baby churn .. */
+	ret = dev_hard_start_xmit(skb, dev);
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
+	if (!lockless)  
+		netif_tx_unlock(dev);
 
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
+	spin_lock(&dev->queue_lock);
 
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qqlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless) {
+		return handle_tx_locked(skb, dev, q);
+	}
 
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-		return 0;
+	if (unlikely (ret != NETDEV_TX_BUSY)) { 
+		/* XXX: Do we need a ratelimit? or put a 
+		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
+		 **/
+		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
 	}
 
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	return handle_dev_requeue(skb, dev, q);
 }
 
 void __qdisc_run(struct net_device *dev)

[-- Attachment #3: sch_generic.c --]
[-- Type: text/x-csrc, Size: 1037 bytes --]

int 
qdisc_restart(struct net_device *dev)
{
	struct Qdisc *q = dev->qdisc;
	unsigned lockless = (dev->features & NETIF_F_LLTX);
	struct sk_buff *skb = NULL;
	int ret;

	ret = try_get_tx_pkt(&skb, dev, q);
	if (ret == 0)
		return qqlen(q);

	/* we have a packet to send */
	if (!lockless) {
		if (!netif_tx_trylock(dev))
			return handle_tx_locked(skb, dev, q);
	}
		
	/* all clear .. */
	spin_unlock(&dev->queue_lock);

	/* churn baby churn .. */
	ret = dev_hard_start_xmit(skb, dev);

	if (!lockless)  
		netif_tx_unlock(dev);

	spin_lock(&dev->queue_lock);

	q = dev->qdisc;
	/* most likely result, packet went ok */
	if (ret == NETDEV_TX_OK)
		return qqlen(q);
	/* only for lockless drivers .. */
	if (ret == NETDEV_TX_LOCKED && lockless) {
		return handle_tx_locked(skb, dev, q);
	}

	if (unlikely (ret != NETDEV_TX_BUSY)) { 
		/* XXX: Do we need a ratelimit? or put a 
		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
		 **/
		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
	}

	return handle_dev_requeue(skb, dev, q);
}


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [RFC] make qdisc_restart more readable
  2007-05-11  0:13 [RFC] make qdisc_restart more readable jamal
@ 2007-05-11 15:56 ` Waskiewicz Jr, Peter P
  2007-05-11 18:04   ` jamal
  2007-05-11 17:01 ` [RFC] make " Thomas Graf
  1 sibling, 1 reply; 26+ messages in thread
From: Waskiewicz Jr, Peter P @ 2007-05-11 15:56 UTC (permalink / raw)
  To: hadi, Herbert Xu, David Miller; +Cc: netdev

> This compiles and passes some basic tests - no serious testing.
> Against net-2.6.
> 
> The patch is ugly looking, so i have at the end the 
> re-written qdisc; you can easily tell the rest from the patch.
> 
> Please flush out any fluff - I would like to submit this 
> (almost lost it, thanks to an early posting, found it).
> 
> 
> cheers,
> jamal

In qdisc_restart(), you removed any check for if
(!netif_queue_stopped(dev)) before calling dev_hard_start_xmit().  If
the underlying queue is stopped and you send the skb, you'll generate a
requeue.  Is there a reason it was removed?

Thx,

-PJ Waskiewicz 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] make qdisc_restart more readable
  2007-05-11  0:13 [RFC] make qdisc_restart more readable jamal
  2007-05-11 15:56 ` Waskiewicz Jr, Peter P
@ 2007-05-11 17:01 ` Thomas Graf
  2007-05-11 18:11   ` jamal
  1 sibling, 1 reply; 26+ messages in thread
From: Thomas Graf @ 2007-05-11 17:01 UTC (permalink / raw)
  To: hadi; +Cc: Herbert Xu, David Miller, netdev

* jamal <j.hadi123@gmail.com> 2007-05-10 20:13
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index f671cd2..718d6fd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -83,6 +83,9 @@ struct wireless_dev;
>  #define NETDEV_TX_OK 0		/* driver took care of packet */
>  #define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
>  #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
> +#define NETDEV_TX_DROP -2	/* request caller to drop packet */
> +#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */
> +
>  
>  /*
>   *	Compute the worst case header length according to the protocols
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index f28bb2d..b821040 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
>  	spin_unlock_bh(&dev->queue_lock);
>  }
>  
> -/*
> +static inline int qqlen(struct Qdisc *q)
> +{
> +	BUG_ON((int) q->q.qlen < 0);
> +	return q->q.qlen;
> +}

Nitpicking but qdisc_qlen() sounds a lot better to me.

> +
> +static inline int handle_dev_cpu_collision(struct net_device *dev)
> +{
> +	if (dev->xmit_lock_owner == smp_processor_id()) {
> +		if (net_ratelimit())
> +			printk(KERN_DEBUG
> +			       "Dead loop on netdevice %s, fix it urgently!\n",
> +			       dev->name);
> +		return NETDEV_TX_DROP;
> +	}
> +	/* XXX: This maintains original code, but i think we should
> +	 * update cpu_collision always
> +	*/
> +	__get_cpu_var(netdev_rx_stat).cpu_collision++;
> +	return NETDEV_TX_QUEUE;
> +}
> +
> +static inline int
> +handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> +{
> +
> +	if (unlikely(q == &noop_qdisc))
> +		kfree_skb(skb);
> +
> +	if (skb->next)

Oops, missing else here, this will crash.

> +		dev->gso_skb = skb;
> +	else
> +		q->ops->requeue(skb, q);
> +	/* XXX: Could netif_schedule fail? Or is that fact we are
> +	 * requeueing imply the hardware path is closed
> +	 * and even if we fail, some interupt will wake us
> +	*/
> +	netif_schedule(dev);
> +	return 0;
> +}
> +
> +static inline int
> +try_get_tx_pkt(struct sk_buff **skb, struct net_device *dev, struct Qdisc *q)
> +{
> +	if (((*skb = dev->gso_skb)) || ((*skb = q->dequeue(q)))) {
> +
> +		dev->gso_skb = NULL;
> +		return -1;
> +	}
> +
> +	return 0;
> +}

Seems simpler to just return the skb or NULL instead

> +static inline int
> +handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> +{
> +	int ret = handle_dev_cpu_collision(dev);
> +
> +	if (ret == NETDEV_TX_DROP) {
> +		kfree_skb(skb);
> +		return qqlen(q);
> +	}
> +
> +	return handle_dev_requeue(skb, dev, q);
> +}
> +
> +
> +/* 
> +   NOTE: Called under dev->queue_lock with locally disabled BH.
> +
> +   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
> +   can enter this region at a time.
> +
>     dev->queue_lock serializes queue accesses for this device
>     AND dev->qdisc pointer itself.
>  
> @@ -67,114 +139,63 @@ void qdisc_unlock_tree(struct net_device *dev)
>  
>     dev->queue_lock and netif_tx_lock are mutually exclusive,
>     if one is grabbed, another must be free.
> - */
>  
> +   Multiple CPUs may contend for the two locks.
>  
> -/* Kick device.
> +   Note, that this procedure can be called by a watchdog timer, so that
> +   we do not check dev->tbusy flag here.
>  
> +   Returns to the caller:  
>     Returns:  0  - queue is empty or throttled.
>  	    >0  - queue is not empty.
> -
> -   NOTE: Called under dev->queue_lock with locally disabled BH.
>  */
>  
> -static inline int qdisc_restart(struct net_device *dev)
> +int 
> +qdisc_restart(struct net_device *dev)
>  {
>  	struct Qdisc *q = dev->qdisc;
> -	struct sk_buff *skb;
> -
> -	/* Dequeue packet */
> -	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
> -		unsigned nolock = (dev->features & NETIF_F_LLTX);
> +	unsigned lockless = (dev->features & NETIF_F_LLTX);
> +	struct sk_buff *skb = NULL;
> +	int ret;
> +
> +	ret = try_get_tx_pkt(&skb, dev, q);
> +	if (ret == 0)
> +		return qqlen(q);
> +
> +	/* we have a packet to send */
> +	if (!lockless) {
> +		if (!netif_tx_trylock(dev))
> +			return handle_tx_locked(skb, dev, q);
> +	}
> +		
> +	/* all clear .. */
> +	spin_unlock(&dev->queue_lock);
>  
> -		dev->gso_skb = NULL;
> +	/* churn baby churn .. */
> +	ret = dev_hard_start_xmit(skb, dev);

Check whether queue is stopped got lost here

> -		/*
> -		 * When the driver has LLTX set it does its own locking
> -		 * in start_xmit. No need to add additional overhead by
> -		 * locking again. These checks are worth it because
> -		 * even uncongested locks can be quite expensive.
> -		 * The driver can do trylock like here too, in case
> -		 * of lock congestion it should return -1 and the packet
> -		 * will be requeued.
> -		 */
> -		if (!nolock) {
> -			if (!netif_tx_trylock(dev)) {
> -			collision:
> -				/* So, someone grabbed the driver. */
> -
> -				/* It may be transient configuration error,
> -				   when hard_start_xmit() recurses. We detect
> -				   it by checking xmit owner and drop the
> -				   packet when deadloop is detected.
> -				*/
> -				if (dev->xmit_lock_owner == smp_processor_id()) {
> -					kfree_skb(skb);
> -					if (net_ratelimit())
> -						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
> -					goto out;
> -				}
> -				__get_cpu_var(netdev_rx_stat).cpu_collision++;
> -				goto requeue;
> -			}
> -		}
> +	if (!lockless)  
> +		netif_tx_unlock(dev);
>  
> -		{
> -			/* And release queue */
> -			spin_unlock(&dev->queue_lock);
> -
> -			if (!netif_queue_stopped(dev)) {
> -				int ret;
> -
> -				ret = dev_hard_start_xmit(skb, dev);
> -				if (ret == NETDEV_TX_OK) {
> -					if (!nolock) {
> -						netif_tx_unlock(dev);
> -					}
> -					spin_lock(&dev->queue_lock);
> -					q = dev->qdisc;
> -					goto out;
> -				}
> -				if (ret == NETDEV_TX_LOCKED && nolock) {
> -					spin_lock(&dev->queue_lock);
> -					q = dev->qdisc;
> -					goto collision;
> -				}
> -			}
> +	spin_lock(&dev->queue_lock);
>  
> -			/* NETDEV_TX_BUSY - we need to requeue */
> -			/* Release the driver */
> -			if (!nolock) {
> -				netif_tx_unlock(dev);
> -			}
> -			spin_lock(&dev->queue_lock);
> -			q = dev->qdisc;
> -		}
> +	q = dev->qdisc;

Maybe comment why q needs to be refreshed so nobody removes
it by accident.

> +	/* most likely result, packet went ok */
> +	if (ret == NETDEV_TX_OK)
> +		return qqlen(q);
> +	/* only for lockless drivers .. */
> +	if (ret == NETDEV_TX_LOCKED && lockless) {
> +		return handle_tx_locked(skb, dev, q);
> +	}

Your style of placing parentheses seems to vary randomly :-)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [RFC] make qdisc_restart more readable
  2007-05-11 15:56 ` Waskiewicz Jr, Peter P
@ 2007-05-11 18:04   ` jamal
  2007-05-11 18:13     ` Waskiewicz Jr, Peter P
  0 siblings, 1 reply; 26+ messages in thread
From: jamal @ 2007-05-11 18:04 UTC (permalink / raw)
  To: Waskiewicz Jr, Peter P; +Cc: Herbert Xu, David Miller, netdev

On Fri, 2007-11-05 at 08:56 -0700, Waskiewicz Jr, Peter P wrote:

> In qdisc_restart(), you removed any check for if
> (!netif_queue_stopped(dev)) before calling dev_hard_start_xmit().  If
> the underlying queue is stopped and you send the skb, you'll generate a
> requeue.  Is there a reason it was removed?
> 

No - its a bug ;-> 
Thanks for staring at this Peter. I will fix it.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] make qdisc_restart more readable
  2007-05-11 17:01 ` [RFC] make " Thomas Graf
@ 2007-05-11 18:11   ` jamal
  0 siblings, 0 replies; 26+ messages in thread
From: jamal @ 2007-05-11 18:11 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Herbert Xu, David Miller, netdev

On Fri, 2007-11-05 at 19:01 +0200, Thomas Graf wrote:
> * jamal <j.hadi123@gmail.com> 2007-05-10 20:13

> >   *	Compute the worst case header length according to the protocols
> > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> > index f28bb2d..b821040 100644
> > --- a/net/sched/sch_generic.c
> > +++ b/net/sched/sch_generic.c
> > @@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
> >  	spin_unlock_bh(&dev->queue_lock);
> >  }
> >  
> > -/*
> > +static inline int qqlen(struct Qdisc *q)
> > +{
> > +	BUG_ON((int) q->q.qlen < 0);
> > +	return q->q.qlen;
> > +}
> 
> Nitpicking but qdisc_qlen() sounds a lot better to me.
> 

I thought nitpicking was my department ;->
qdisc_qlen is more descriptive; i will change it.


> > +static inline int
> > +handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> > +{
> > +
> > +	if (unlikely(q == &noop_qdisc))
> > +		kfree_skb(skb);
> > +
> > +	if (skb->next)
> 
> Oops, missing else here, this will crash.
> 

tabarnaq. I will fix - thanks.

> Seems simpler to just return the skb or NULL instead
> 

sure.

> > +	/* churn baby churn .. */
> > +	ret = dev_hard_start_xmit(skb, dev);
> 
> Check whether queue is stopped got lost here
> 

Also caught by Peter - will fix.




> > +	q = dev->qdisc;
> 
> Maybe comment why q needs to be refreshed so nobody removes
> it by accident.

Makes sense ..

> > +	/* most likely result, packet went ok */
> > +	if (ret == NETDEV_TX_OK)
> > +		return qqlen(q);
> > +	/* only for lockless drivers .. */
> > +	if (ret == NETDEV_TX_LOCKED && lockless) {
> > +		return handle_tx_locked(skb, dev, q);
> > +	}
> 
> Your style of placing parentheses seems to vary randomly :-)

There may have been a debug in there that i removed. Frankly
i dont mind having braces even for a one liner, but i know that is not
popular with many folks.
Any other spot you noticed?

Thanks a lot Thomas.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [RFC] make qdisc_restart more readable
  2007-05-11 18:04   ` jamal
@ 2007-05-11 18:13     ` Waskiewicz Jr, Peter P
  2007-05-11 18:35       ` jamal
  0 siblings, 1 reply; 26+ messages in thread
From: Waskiewicz Jr, Peter P @ 2007-05-11 18:13 UTC (permalink / raw)
  To: hadi; +Cc: Herbert Xu, David Miller, netdev

> On Fri, 2007-11-05 at 08:56 -0700, Waskiewicz Jr, Peter P wrote:
> 
> > In qdisc_restart(), you removed any check for if
> > (!netif_queue_stopped(dev)) before calling 
> dev_hard_start_xmit().  If 
> > the underlying queue is stopped and you send the skb, 
> you'll generate 
> > a requeue.  Is there a reason it was removed?
> > 
> 
> No - its a bug ;->
> Thanks for staring at this Peter. I will fix it.
> 
> cheers,
> jamal

After thinking about this a bit more: even if the queue is stopped,
you'd end up requeueing anyways.  Plus, you'd need to re-acquire
dev->queue_lock (which is what happens today).  I think the best way
overall would be to check the queue state before you physically dequeue
(whether it's in qdisc_restart() or in the qdisc's ->dequeue()).  That
way you still hold dev->queue_lock in case the queue is stopped, and
haven't yet pulled an skb causing a requeue event.

Just $0.02 on Friday.

Thx,

-PJ Waskiewicz

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [RFC] make qdisc_restart more readable
  2007-05-11 18:13     ` Waskiewicz Jr, Peter P
@ 2007-05-11 18:35       ` jamal
  2007-05-11 18:46         ` Waskiewicz Jr, Peter P
  0 siblings, 1 reply; 26+ messages in thread
From: jamal @ 2007-05-11 18:35 UTC (permalink / raw)
  To: Waskiewicz Jr, Peter P; +Cc: Herbert Xu, David Miller, netdev

On Fri, 2007-11-05 at 11:13 -0700, Waskiewicz Jr, Peter P wrote:

> After thinking about this a bit more: even if the queue is stopped,
> you'd end up requeueing anyways.  Plus, you'd need to re-acquire
> dev->queue_lock (which is what happens today).  I think the best way
> overall would be to check the queue state before you physically dequeue
> (whether it's in qdisc_restart() or in the qdisc's ->dequeue()).

For lockless drivers, I think it could be moved up; only thing is
nothing is stopping it from changing again at transmit time.
For drivers that are not lockless, i am wondering if its even useful
to have that check given we have already grabbed the tx lock.

>   That
> way you still hold dev->queue_lock in case the queue is stopped, and
> haven't yet pulled an skb causing a requeue event.

It certainly sounds like a useful optimization. Defer that thought - for
now i just want to make sure its doing what the previous code did.

I will repost the fixed patch later today after some testing; if you
have time i will appreciate some eyeballing. That piece of code is
hairy. Hopefully we can make it bald.

cheers,
jamal



^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [RFC] make qdisc_restart more readable
  2007-05-11 18:35       ` jamal
@ 2007-05-11 18:46         ` Waskiewicz Jr, Peter P
  2007-05-11 19:29           ` take 2 WAS (RE: " jamal
  0 siblings, 1 reply; 26+ messages in thread
From: Waskiewicz Jr, Peter P @ 2007-05-11 18:46 UTC (permalink / raw)
  To: hadi; +Cc: Herbert Xu, David Miller, netdev

> For lockless drivers, I think it could be moved up; only 
> thing is nothing is stopping it from changing again at transmit time.
> For drivers that are not lockless, i am wondering if its even 
> useful to have that check given we have already grabbed the tx lock.

Good point.  I live in an LLTX world, so I don't think tx_lock mode very
often.

> 
> >   That
> > way you still hold dev->queue_lock in case the queue is 
> stopped, and 
> > haven't yet pulled an skb causing a requeue event.
> 
> It certainly sounds like a useful optimization. Defer that 
> thought - for now i just want to make sure its doing what the 
> previous code did.

Agreed.

> 
> I will repost the fixed patch later today after some testing; 
> if you have time i will appreciate some eyeballing. That 
> piece of code is hairy. Hopefully we can make it bald.

Clippers are standing by.  :)

-PJ

^ permalink raw reply	[flat|nested] 26+ messages in thread

* take 2 WAS (RE: [RFC] make qdisc_restart more readable
  2007-05-11 18:46         ` Waskiewicz Jr, Peter P
@ 2007-05-11 19:29           ` jamal
  2007-05-11 22:01             ` Waskiewicz Jr, Peter P
                               ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: jamal @ 2007-05-11 19:29 UTC (permalink / raw)
  To: Waskiewicz Jr, Peter P, Thomas Graf; +Cc: Herbert Xu, David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 392 bytes --]

On Fri, 2007-11-05 at 11:46 -0700, Waskiewicz Jr, Peter P wrote:

> Clippers are standing by.  :)

Nothing as nice as time off to code away;->

Ok, booting fine to me; 
if you receive this email, it must have worked. OTOH, if you didnt
receive this it probably failed ;->
(as the presenter said "if you cant hear me in the far corner please
raise your hand").

clip away ...

cheers,
jamal



[-- Attachment #2: rfc-qrestart-take2 --]
[-- Type: text/x-patch, Size: 6882 bytes --]

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f671cd2..718d6fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -83,6 +83,9 @@ struct wireless_dev;
 #define NETDEV_TX_OK 0		/* driver took care of packet */
 #define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
 #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
+#define NETDEV_TX_DROP -2	/* request caller to drop packet */
+#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */
+
 
 /*
  *	Compute the worst case header length according to the protocols
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..9898523 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
-/*
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (dev->xmit_lock_owner == smp_processor_id()) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return NETDEV_TX_DROP;
+	}
+	/* XXX: This maintains original code, but i think we should
+	 * update cpu_collision always
+	*/
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return NETDEV_TX_QUEUE;
+}
+
+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(q == &noop_qdisc))
+		kfree_skb(skb);
+	else if (skb->next)
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}
+
+static struct sk_buff *
+try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+{
+	struct sk_buff *skb = NULL;
+
+	if (skb = dev->gso_skb) 
+		dev->gso_skb = NULL;
+	else 
+		skb = q->dequeue(q);
+
+	return skb;
+}
+
+static inline int
+handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == NETDEV_TX_DROP) {
+		kfree_skb(skb);
+		return qdisc_qlen(q);
+	}
+
+	return handle_dev_requeue(skb, dev, q);
+}
+
+
+/* 
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,114 +139,68 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer, so that
+   we do not check dev->tbusy flag here.
 
+   Returns to the caller:  
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
-
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
-static inline int qdisc_restart(struct net_device *dev)
+int 
+qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	int ret;
 
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
-
-		dev->gso_skb = NULL;
+	skb = try_get_tx_pkt(dev, q);
+	if (skb == NULL)
+		return qdisc_qlen(q);
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
-
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
-
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
-
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-		return 0;
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return handle_tx_locked(skb, dev, q);
+	}
+		
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
+
+	ret = NETDEV_TX_BUSY;
+	if (!netif_queue_stopped(dev))
+		/* churn baby churn .. */
+		ret = dev_hard_start_xmit(skb, dev);
+
+	if (!lockless)  
+		netif_tx_unlock(dev);
+
+	spin_lock(&dev->queue_lock);
+
+	/* we need to refresh q because it may be invalid since we droped 
+	 * dev->queue_lock earlier ...
+	 * So dont try to be clever grasshopper
+	*/
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qdisc_qlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless) 
+		return handle_tx_locked(skb, dev, q);
+
+	if (unlikely (ret != NETDEV_TX_BUSY)) { 
+		/* XXX: Do we need a ratelimit? or put a 
+		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
+		 **/
+		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
 	}
 
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	return handle_dev_requeue(skb, dev, q);
 }
 
 void __qdisc_run(struct net_device *dev)

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: take 2 WAS (RE: [RFC] make qdisc_restart more readable
  2007-05-11 19:29           ` take 2 WAS (RE: " jamal
@ 2007-05-11 22:01             ` Waskiewicz Jr, Peter P
  2007-05-11 22:43               ` jamal
  2007-05-12  9:46             ` Thomas Graf
  2007-05-12 12:18             ` take 3 " jamal
  2 siblings, 1 reply; 26+ messages in thread
From: Waskiewicz Jr, Peter P @ 2007-05-11 22:01 UTC (permalink / raw)
  To: hadi, Thomas Graf; +Cc: Herbert Xu, David Miller, netdev

> Ok, booting fine to me;
> if you receive this email, it must have worked. OTOH, if you
> didnt receive this it probably failed ;-> (as the presenter
> said "if you cant hear me in the far corner please raise your hand").
>
> clip away ...

+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct
Qdisc *q)
+{
+
+	if (unlikely(q == &noop_qdisc))
+		kfree_skb(skb);
+	else if (skb->next)
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}

The current requeue calls in qdisc_restart() returns a value of 1 to the
upper layer.  If you return 0, the __qdisc_run() won't break from the
while() it seems if you have to successfully requeue a packet.


-static inline int qdisc_restart(struct net_device *dev)
+int
+qdisc_restart(struct net_device *dev)
 {

Why is this no longer defined static?


+	/* we need to refresh q because it may be invalid since we
droped 

<nitpick>
droped = dropped
</nitpick>

That's all I have for now.  I'll try applying it for kicks and see what
happens.

Thanks,

-PJ Waskiewicz

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: take 2 WAS (RE: [RFC] make qdisc_restart more readable
  2007-05-11 22:01             ` Waskiewicz Jr, Peter P
@ 2007-05-11 22:43               ` jamal
  0 siblings, 0 replies; 26+ messages in thread
From: jamal @ 2007-05-11 22:43 UTC (permalink / raw)
  To: Waskiewicz Jr, Peter P; +Cc: Thomas Graf, Herbert Xu, David Miller, netdev

On Fri, 2007-11-05 at 15:01 -0700, Waskiewicz Jr, Peter P wrote:

> The current requeue calls in qdisc_restart() returns a value of 1 to the
> upper layer. 

Which was not meaningful ;->
i.e qdisc run would break as a result.
Note, this is still in conformance with net-2.6 based on a change in the
last few days made by Herbert - I was following his lead.
I put that comment there just as something to remember and
maybe revisit later.

>  If you return 0, the __qdisc_run() won't break from the
> while() it seems if you have to successfully requeue a packet.
> 

It wont; the check is for < 0

> 
> -static inline int qdisc_restart(struct net_device *dev)
> +int
> +qdisc_restart(struct net_device *dev)
>  {
> 
> Why is this no longer defined static?
> 

Good catch - i will restore it to what it was originally.

> 
> +	/* we need to refresh q because it may be invalid since we
> droped 
> 
> <nitpick>
> droped = dropped
> </nitpick>
> 

Ok, better to fix it now before the spelling police show up ;->

> That's all I have for now.  I'll try applying it for kicks and see what
> happens.

Just make those two fixes in your version; also note, you need Davems
net-2.6 tree.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: take 2 WAS (RE: [RFC] make qdisc_restart more readable
  2007-05-11 19:29           ` take 2 WAS (RE: " jamal
  2007-05-11 22:01             ` Waskiewicz Jr, Peter P
@ 2007-05-12  9:46             ` Thomas Graf
  2007-05-12 11:58               ` jamal
  2007-05-12 12:18             ` take 3 " jamal
  2 siblings, 1 reply; 26+ messages in thread
From: Thomas Graf @ 2007-05-12  9:46 UTC (permalink / raw)
  To: jamal; +Cc: Waskiewicz Jr, Peter P, Herbert Xu, David Miller, netdev

* jamal <hadi@cyberus.ca> 2007-05-11 15:29
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> +	netif_schedule(dev);
> +	return 0;
> +}
> +
> +static struct sk_buff *
> +try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
> +{
> +	struct sk_buff *skb = NULL;
> +
> +	if (skb = dev->gso_skb) 

This must have caused a warning and you probably want to inline
this function.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: take 2 WAS (RE: [RFC] make qdisc_restart more readable
  2007-05-12  9:46             ` Thomas Graf
@ 2007-05-12 11:58               ` jamal
  0 siblings, 0 replies; 26+ messages in thread
From: jamal @ 2007-05-12 11:58 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Waskiewicz Jr, Peter P, Herbert Xu, David Miller, netdev

On Sat, 2007-12-05 at 11:46 +0200, Thomas Graf wrote:

> > +	struct sk_buff *skb = NULL;
> > +
> > +	if (skb = dev->gso_skb) 
> 
> This must have caused a warning 

Possibly (dont recall) - i will move the assignment to the first line.

> and you probably want to inline this function.

will do.

Thanks Thomas.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* take 3 [RFC] make qdisc_restart more readable
  2007-05-11 19:29           ` take 2 WAS (RE: " jamal
  2007-05-11 22:01             ` Waskiewicz Jr, Peter P
  2007-05-12  9:46             ` Thomas Graf
@ 2007-05-12 12:18             ` jamal
  2007-05-12 17:02               ` Patrick McHardy
  2 siblings, 1 reply; 26+ messages in thread
From: jamal @ 2007-05-12 12:18 UTC (permalink / raw)
  To: Waskiewicz Jr, Peter P, Thomas Graf; +Cc: Herbert Xu, David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 344 bytes --]


Ok, heres the next iteration based on feedback from Thomas and Peter.
It seems this is good form in matching what the original code does.
I have a few doubts in labelled as XXX in there. Thomas, if you have
time, do you mind looking at those?

The usual testing happened i.e "if you can see this email, it must
have worked";->

cheers,
jamal


[-- Attachment #2: rfc-qrestart-take3 --]
[-- Type: text/x-patch, Size: 6777 bytes --]

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f671cd2..718d6fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -83,6 +83,9 @@ struct wireless_dev;
 #define NETDEV_TX_OK 0		/* driver took care of packet */
 #define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
 #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
+#define NETDEV_TX_DROP -2	/* request caller to drop packet */
+#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */
+
 
 /*
  *	Compute the worst case header length according to the protocols
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..91cfcd5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (dev->xmit_lock_owner == smp_processor_id()) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return NETDEV_TX_DROP;
+	}
+	/* XXX: This maintains original code, but i think we should
+	 * update cpu_collision always
+	*/
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return NETDEV_TX_QUEUE;
+}
+
+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(q == &noop_qdisc))
+		kfree_skb(skb);
+	else if (skb->next)
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}
+
+static inline struct sk_buff *
+try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+{
+	struct sk_buff *skb = dev->gso_skb;
+
+	if (skb)
+		dev->gso_skb = NULL;
+	else
+		skb = q->dequeue(q);
+
+	return skb;
+}
+
+static inline int
+handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == NETDEV_TX_DROP) {
+		kfree_skb(skb);
+		return qdisc_qlen(q);
+	}
+
+	return handle_dev_requeue(skb, dev, q);
+}
+
+
 /*
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,114 +139,66 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer
 
+   Returns to the caller: 
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
-
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
 static inline int qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	int ret;
 
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
-
-		dev->gso_skb = NULL;
+	skb = try_get_tx_pkt(dev, q);
+	if (skb == NULL)
+		return qdisc_qlen(q);
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
-
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
-
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
-
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-		return 0;
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return handle_tx_locked(skb, dev, q);
+	}
+		
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
+
+	ret = NETDEV_TX_BUSY;
+	if (!netif_queue_stopped(dev))
+		/* churn baby churn .. */
+		ret = dev_hard_start_xmit(skb, dev);
+
+	if (!lockless)  
+		netif_tx_unlock(dev);
+
+	spin_lock(&dev->queue_lock);
+
+	/* we need to refresh q because it may be invalid since we 
+	 * dropped  dev->queue_lock earlier ...
+	 * So dont try to be clever grasshopper
+	*/
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qdisc_qlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless)
+		return handle_tx_locked(skb, dev, q);
+
+	if (unlikely (ret != NETDEV_TX_BUSY)) {
+		/* XXX: Do we need a ratelimit? or put a
+		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
+		 **/
+		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
 	}
 
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	return handle_dev_requeue(skb, dev, q);
 }
 
 void __qdisc_run(struct net_device *dev)

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: take 3 [RFC] make qdisc_restart more readable
  2007-05-12 12:18             ` take 3 " jamal
@ 2007-05-12 17:02               ` Patrick McHardy
  2007-05-12 19:56                 ` jamal
  0 siblings, 1 reply; 26+ messages in thread
From: Patrick McHardy @ 2007-05-12 17:02 UTC (permalink / raw)
  To: hadi; +Cc: Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu, David Miller,
	netdev

jamal wrote:
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index f671cd2..718d6fd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -83,6 +83,9 @@ struct wireless_dev;
>  #define NETDEV_TX_OK 0		/* driver took care of packet */
>  #define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
>  #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
> +#define NETDEV_TX_DROP -2	/* request caller to drop packet */

This shouldn't be a NETDEV_TX code since its only handled correctly
internally for handle_dev_cpu_collision().

> +#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */

How will this be used? The driver can simply stop the queue if
it doesn't want to receive more packets.

> +static inline int handle_dev_cpu_collision(struct net_device *dev)
> +{
> +	if (dev->xmit_lock_owner == smp_processor_id()) {

unlikely would make sense here.

> +static inline int
> +handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> +{
> +
> +	if (unlikely(q == &noop_qdisc))
> +		kfree_skb(skb);

Why is this special-casing needed? __qdisc_run already checks for
noop_qdisc and noop has a proper requeue function.

> +	else if (skb->next)
> +		dev->gso_skb = skb;

The gso_skb cases could probably also be marked unlikely.

> +	else
> +		q->ops->requeue(skb, q);
> +	/* XXX: Could netif_schedule fail? Or is that fact we are
> +	 * requeueing imply the hardware path is closed
> +	 * and even if we fail, some interupt will wake us
> +	*/
> +	netif_schedule(dev);
> +	return 0;
> +}
> +
> +static inline struct sk_buff *
> +try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
> +{
> +	struct sk_buff *skb = dev->gso_skb;
> +
> +	if (skb)
> +		dev->gso_skb = NULL;
> +	else
> +		skb = q->dequeue(q);
> +
> +	return skb;
> +}
> +
> +static inline int
> +handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)

handle_.* sounds a bit like "I couldn't think of a better name" :)
How about dev_tx_locked()?

> +{
> +	int ret =  (dev);
> +
> +	if (ret == NETDEV_TX_DROP) {
> +		kfree_skb(skb);
> +		return qdisc_qlen(q);
> +	}
> +
> +	return handle_dev_requeue(skb, dev, q);
> +}

> +	if (unlikely (ret != NETDEV_TX_BUSY)) {
> +		/* XXX: Do we need a ratelimit? or put a
> +		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
> +		 **/
> +		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);

BUG_ON sounds a bit extreme, net_ratelimit() makes sense.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: take 3 [RFC] make qdisc_restart more readable
  2007-05-12 17:02               ` Patrick McHardy
@ 2007-05-12 19:56                 ` jamal
  2007-05-13 14:28                   ` [LAST CALL] [PATCH] [NET_SCHED]make " jamal
  0 siblings, 1 reply; 26+ messages in thread
From: jamal @ 2007-05-12 19:56 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu, David Miller,
	netdev

On Sat, 2007-12-05 at 19:02 +0200, Patrick McHardy wrote:
> jamal wrote:
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

> >  #define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
> > +#define NETDEV_TX_DROP -2	/* request caller to drop packet */
> 
> This shouldn't be a NETDEV_TX code since its only handled correctly
> internally for handle_dev_cpu_collision().
> 
> > +#define NETDEV_TX_QUEUE -3	/* request caller to requeue packet */
> 
> How will this be used? The driver can simply stop the queue if
> it doesn't want to receive more packets.
> 

I can make those part of a different namespace; 
put defines in sch_generic sound reasonable?

> > +static inline int handle_dev_cpu_collision(struct net_device *dev)
> > +{
> > +	if (dev->xmit_lock_owner == smp_processor_id()) {
> 
> unlikely would make sense here.
> 

sure.

> > +static inline int
> > +handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> > +{
> > +
> > +	if (unlikely(q == &noop_qdisc))
> > +		kfree_skb(skb);
> 
> Why is this special-casing needed? __qdisc_run already checks for
> noop_qdisc and noop has a proper requeue function.
> 

Good catch: That was a patch from Herbert after Thomas' "q refresh"
patch. So i was just replicating. I will fix it. Note: I think that
change may have made it to -stable?

> > +	else if (skb->next)
> > +		dev->gso_skb = skb;
> 
> The gso_skb cases could probably also be marked unlikely.
> 

will do.

> > +
> > +static inline int
> > +handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
> 
> handle_.* sounds a bit like "I couldn't think of a better name" :)
> How about dev_tx_locked()?
> 

dev_tx_islocked()?

> > +	if (unlikely (ret != NETDEV_TX_BUSY)) {
> > +		/* XXX: Do we need a ratelimit? or put a
> > +		 * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
> > +		 **/
> > +		printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
> 
> BUG_ON sounds a bit extreme, net_ratelimit() makes sense.

Ok. 

Thanks a lot Patrick.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [LAST CALL] [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-12 19:56                 ` jamal
@ 2007-05-13 14:28                   ` jamal
  2007-05-14 10:40                     ` Patrick McHardy
  2007-05-14 13:30                     ` jamal
  0 siblings, 2 replies; 26+ messages in thread
From: jamal @ 2007-05-13 14:28 UTC (permalink / raw)
  To: David Miller
  Cc: Patrick McHardy, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

[-- Attachment #1: Type: text/plain, Size: 312 bytes --]


Ok, I am upgrading this to last call after taking in Patricks feedback.
Speak now - or send patches against it later. 
Dave, just let it simmer down for a day or two, then if no complaints,
go ahead and apply it. It is against net-2.6
Many thanks to Thomas, Peter and Patrick for their reviews.

cheers,
jamal


[-- Attachment #2: lastcall-qrs --]
[-- Type: text/plain, Size: 6897 bytes --]

[NET_SCHED]: Cleanup readability of qdisc restart

Over the years this code has gotten hairier. Resulting in many long
discussions and patches that get it wrong.
This patch helps tame that code so normal people will understand it.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>

---
commit d5088c3eb28211ac8c4137d6e2e69edf6061a57d
tree 7c683e412072a992ba49bccc34f3e1ab3c42efa3
parent 15cfc8c4efe964593b0246df53e3aa30502d9550
author Jamal Hadi Salim <hadi@cyberus.ca> Sun, 13 May 2007 10:19:52 -0400
committer Jamal Hadi Salim <hadi@cyberus.ca> Sun, 13 May 2007 10:19:52 -0400

 net/sched/sch_generic.c |  205 +++++++++++++++++++++++++----------------------
 1 files changed, 111 insertions(+), 94 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..0a000a9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,6 +34,9 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
+#define SCHED_TX_DROP -2
+#define SCHED_TX_QUEUE -3
+
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return SCHED_TX_DROP;
+	}
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return SCHED_TX_QUEUE;
+}
+
+static inline int
+do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(skb->next))
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}
+
+static inline struct sk_buff *
+try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+{
+	struct sk_buff *skb = dev->gso_skb;
+
+	if (skb)
+		dev->gso_skb = NULL;
+	else
+		skb = q->dequeue(q);
+
+	return skb;
+}
+
+static inline int
+tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == SCHED_TX_DROP) {
+		kfree_skb(skb);
+		return qdisc_qlen(q);
+	}
+
+	return do_dev_requeue(skb, dev, q);
+}
+
+
 /*
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,114 +137,61 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer
 
+   Returns to the caller:
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
-
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
 static inline int qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	int ret;
 
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
-
-		dev->gso_skb = NULL;
+	skb = try_get_tx_pkt(dev, q);
+	if (skb == NULL)
+		return qdisc_qlen(q);
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
-
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
-
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
-
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-		return 0;
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return tx_islocked(skb, dev, q);
 	}
-
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
+
+	ret = NETDEV_TX_BUSY;
+	if (!netif_queue_stopped(dev))
+		/* churn baby churn .. */
+		ret = dev_hard_start_xmit(skb, dev);
+
+	if (!lockless)
+		netif_tx_unlock(dev);
+
+	spin_lock(&dev->queue_lock);
+
+	/* we need to refresh q because it may be invalid since
+	 * we dropped  dev->queue_lock earlier ...
+	 * So dont try to be clever grasshopper
+	*/
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qdisc_qlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless)
+		return tx_islocked(skb, dev, q);
+
+	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+		printk(KERN_DEBUG " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
+
+	return do_dev_requeue(skb, dev, q);
 }
 
 void __qdisc_run(struct net_device *dev)

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [LAST CALL] [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-13 14:28                   ` [LAST CALL] [PATCH] [NET_SCHED]make " jamal
@ 2007-05-14 10:40                     ` Patrick McHardy
  2007-05-14 12:41                       ` jamal
  2007-05-14 13:30                     ` jamal
  1 sibling, 1 reply; 26+ messages in thread
From: Patrick McHardy @ 2007-05-14 10:40 UTC (permalink / raw)
  To: hadi; +Cc: David Miller, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

jamal wrote:
> Ok, I am upgrading this to last call after taking in Patricks feedback.
> Speak now - or send patches against it later. 
> Dave, just let it simmer down for a day or two, then if no complaints,
> go ahead and apply it. It is against net-2.6
> Many thanks to Thomas, Peter and Patrick for their reviews.


Two final suggestions, sorry for not mentioning this earlier.

> +			printk(KERN_DEBUG
> +			       "Dead loop on netdevice %s, fix it urgently!\n",
> +			       dev->name);

> +	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
> +		printk(KERN_DEBUG " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);


For both of these KERN_WARNING or above seems to make sense so
the messages don't end up in some debug log. Please also fix
whitespace and line length for the second printk if you're
going to change this :)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [LAST CALL] [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-14 10:40                     ` Patrick McHardy
@ 2007-05-14 12:41                       ` jamal
  2007-05-14 12:43                         ` Patrick McHardy
  0 siblings, 1 reply; 26+ messages in thread
From: jamal @ 2007-05-14 12:41 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: David Miller, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

On Mon, 2007-14-05 at 12:40 +0200, Patrick McHardy wrote:

> Two final suggestions, sorry for not mentioning this earlier.

np

> > +			printk(KERN_DEBUG
> > +			       "Dead loop on netdevice %s, fix it urgently!\n",
> > +			       dev->name);
> 
> > +	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
> > +		printk(KERN_DEBUG " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
> 
> 
> For both of these KERN_WARNING or above 

I will make them KERN_WARNING ..

> seems to make sense so
> the messages don't end up in some debug log. Please also fix
> whitespace 

What whitespace issue?

> and line length for the second printk if you're
> going to change this :)

I will fix the length of the second one.

Thanks Patrick.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [LAST CALL] [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-14 12:41                       ` jamal
@ 2007-05-14 12:43                         ` Patrick McHardy
  0 siblings, 0 replies; 26+ messages in thread
From: Patrick McHardy @ 2007-05-14 12:43 UTC (permalink / raw)
  To: hadi; +Cc: David Miller, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

jamal wrote:
> On Mon, 2007-14-05 at 12:40 +0200, Patrick McHardy wrote:

>>>+	if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
>>>+		printk(KERN_DEBUG " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);

>
> What whitespace issue?

Space between unlikely and '(', missing space after ',' and dev->name.

>>and line length for the second printk if you're
>>going to change this :)
> 
> 
> I will fix the length of the second one.

Thanks.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-13 14:28                   ` [LAST CALL] [PATCH] [NET_SCHED]make " jamal
  2007-05-14 10:40                     ` Patrick McHardy
@ 2007-05-14 13:30                     ` jamal
  2007-05-14 20:09                       ` Waskiewicz Jr, Peter P
  2007-05-16 22:55                       ` jamal
  1 sibling, 2 replies; 26+ messages in thread
From: jamal @ 2007-05-14 13:30 UTC (permalink / raw)
  To: David Miller
  Cc: Patrick McHardy, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

[-- Attachment #1: Type: text/plain, Size: 133 bytes --]


Incoporating last comments from Patrick.
Dave, I think this is ready to apply - against net-2.6 from this
morning. 

cheers,
jamal


[-- Attachment #2: rqs --]
[-- Type: text/plain, Size: 7028 bytes --]

[NET_SCHED]: Cleanup readability of qdisc restart

Over the years this code has gotten hairier. Resulting in many long
discussions over long summer days and patches that get it wrong.
This patch helps tame that code so normal people will understand it.

Thanks to Thomas Graf, Peter J. waskiewicz Jr, and Patrick McHardy
for their valuable reviews.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>

---
commit 4ad20f6340ed13bdfcc5082a2f538af0323c3330
tree 63050b1effc9d30e1c124f52605ce3f543a748e9
parent d831666e98b4f1e19ebdd2349735f47bf37cd293
author Jamal Hadi Salim <hadi@cyberus.ca> Mon, 14 May 2007 09:28:45 -0400
committer Jamal Hadi Salim <hadi@cyberus.ca> Mon, 14 May 2007 09:28:45 -0400

 net/sched/sch_generic.c |  206 ++++++++++++++++++++++++++---------------------
 1 files changed, 112 insertions(+), 94 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..ed80054 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,6 +34,9 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
+#define SCHED_TX_DROP -2
+#define SCHED_TX_QUEUE -3
+
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev)
 	spin_unlock_bh(&dev->queue_lock);
 }
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	BUG_ON((int) q->q.qlen < 0);
+	return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "Dead loop on netdevice %s, fix it urgently!\n",
+			       dev->name);
+		return SCHED_TX_DROP;
+	}
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return SCHED_TX_QUEUE;
+}
+
+static inline int
+do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+	if (unlikely(skb->next))
+		dev->gso_skb = skb;
+	else
+		q->ops->requeue(skb, q);
+	/* XXX: Could netif_schedule fail? Or is that fact we are
+	 * requeueing imply the hardware path is closed
+	 * and even if we fail, some interupt will wake us
+	*/
+	netif_schedule(dev);
+	return 0;
+}
+
+static inline struct sk_buff *
+try_get_tx_pkt(struct net_device *dev, struct Qdisc *q)
+{
+	struct sk_buff *skb = dev->gso_skb;
+
+	if (skb)
+		dev->gso_skb = NULL;
+	else
+		skb = q->dequeue(q);
+
+	return skb;
+}
+
+static inline int
+tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+	int ret = handle_dev_cpu_collision(dev);
+
+	if (ret == SCHED_TX_DROP) {
+		kfree_skb(skb);
+		return qdisc_qlen(q);
+	}
+
+	return do_dev_requeue(skb, dev, q);
+}
+
+
 /*
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+
+   __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+   can enter this region at a time.
+
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
@@ -67,114 +137,62 @@ void qdisc_unlock_tree(struct net_device *dev)
 
    dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
- */
 
+   Multiple CPUs may contend for the two locks.
 
-/* Kick device.
+   Note, that this procedure can be called by a watchdog timer
 
+   Returns to the caller:
    Returns:  0  - queue is empty or throttled.
 	    >0  - queue is not empty.
-
-   NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
 static inline int qdisc_restart(struct net_device *dev)
 {
 	struct Qdisc *q = dev->qdisc;
+	unsigned lockless = (dev->features & NETIF_F_LLTX);
 	struct sk_buff *skb;
+	int ret;
 
-	/* Dequeue packet */
-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
-		unsigned nolock = (dev->features & NETIF_F_LLTX);
-
-		dev->gso_skb = NULL;
+	skb = try_get_tx_pkt(dev, q);
+	if (skb == NULL)
+		return qdisc_qlen(q);
 
-		/*
-		 * When the driver has LLTX set it does its own locking
-		 * in start_xmit. No need to add additional overhead by
-		 * locking again. These checks are worth it because
-		 * even uncongested locks can be quite expensive.
-		 * The driver can do trylock like here too, in case
-		 * of lock congestion it should return -1 and the packet
-		 * will be requeued.
-		 */
-		if (!nolock) {
-			if (!netif_tx_trylock(dev)) {
-			collision:
-				/* So, someone grabbed the driver. */
-
-				/* It may be transient configuration error,
-				   when hard_start_xmit() recurses. We detect
-				   it by checking xmit owner and drop the
-				   packet when deadloop is detected.
-				*/
-				if (dev->xmit_lock_owner == smp_processor_id()) {
-					kfree_skb(skb);
-					if (net_ratelimit())
-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-					goto out;
-				}
-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
-				goto requeue;
-			}
-		}
-
-		{
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				int ret;
-
-				ret = dev_hard_start_xmit(skb, dev);
-				if (ret == NETDEV_TX_OK) {
-					if (!nolock) {
-						netif_tx_unlock(dev);
-					}
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto out;
-				}
-				if (ret == NETDEV_TX_LOCKED && nolock) {
-					spin_lock(&dev->queue_lock);
-					q = dev->qdisc;
-					goto collision;
-				}
-			}
-
-			/* NETDEV_TX_BUSY - we need to requeue */
-			/* Release the driver */
-			if (!nolock) {
-				netif_tx_unlock(dev);
-			}
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		}
-
-		/* Device kicked us out :(
-		   This is possible in three cases:
-
-		   0. driver is locked
-		   1. fastroute is enabled
-		   2. device cannot determine busy state
-		      before start of transmission (f.e. dialout)
-		   3. device is buggy (ppp)
-		 */
-
-requeue:
-		if (unlikely(q == &noop_qdisc))
-			kfree_skb(skb);
-		else if (skb->next)
-			dev->gso_skb = skb;
-		else
-			q->ops->requeue(skb, q);
-		netif_schedule(dev);
-		return 0;
+	/* we have a packet to send */
+	if (!lockless) {
+		if (!netif_tx_trylock(dev))
+			return tx_islocked(skb, dev, q);
 	}
-
-out:
-	BUG_ON((int) q->q.qlen < 0);
-	return q->q.qlen;
+	/* all clear .. */
+	spin_unlock(&dev->queue_lock);
+
+	ret = NETDEV_TX_BUSY;
+	if (!netif_queue_stopped(dev))
+		/* churn baby churn .. */
+		ret = dev_hard_start_xmit(skb, dev);
+
+	if (!lockless)
+		netif_tx_unlock(dev);
+
+	spin_lock(&dev->queue_lock);
+
+	/* we need to refresh q because it may be invalid since
+	 * we dropped dev->queue_lock earlier ...
+	 * So dont try to be clever grasshopper
+	*/
+	q = dev->qdisc;
+	/* most likely result, packet went ok */
+	if (ret == NETDEV_TX_OK)
+		return qdisc_qlen(q);
+	/* only for lockless drivers .. */
+	if (ret == NETDEV_TX_LOCKED && lockless)
+		return tx_islocked(skb, dev, q);
+
+	if (unlikely(ret != NETDEV_TX_BUSY && net_ratelimit()))
+		printk(KERN_WARNING " BUG %s code %d qlen %d\n",
+		       dev->name, ret, q->q.qlen);
+
+	return do_dev_requeue(skb, dev, q);
 }
 
 void __qdisc_run(struct net_device *dev)

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-14 13:30                     ` jamal
@ 2007-05-14 20:09                       ` Waskiewicz Jr, Peter P
  2007-05-16 22:55                       ` jamal
  1 sibling, 0 replies; 26+ messages in thread
From: Waskiewicz Jr, Peter P @ 2007-05-14 20:09 UTC (permalink / raw)
  To: hadi, David Miller; +Cc: Patrick McHardy, Thomas Graf, Herbert Xu, netdev

> Incoporating last comments from Patrick.
> Dave, I think this is ready to apply - against net-2.6 from 
> this morning. 

Aside from the whitespace changes, I've run this patch over the weekend
with no issue under moderate stress.  Looks fine to me.

Thx,

-PJ Waskiewicz

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-14 13:30                     ` jamal
  2007-05-14 20:09                       ` Waskiewicz Jr, Peter P
@ 2007-05-16 22:55                       ` jamal
  2007-05-16 23:00                         ` David Miller
  2007-05-16 23:15                         ` Sridhar Samudrala
  1 sibling, 2 replies; 26+ messages in thread
From: jamal @ 2007-05-16 22:55 UTC (permalink / raw)
  To: David Miller
  Cc: Patrick McHardy, Waskiewicz Jr, Peter P, Thomas Graf, Herbert Xu,
	netdev

On Mon, 2007-14-05 at 09:30 -0400, jamal wrote:
> Incoporating last comments from Patrick.
> Dave, I think this is ready to apply - against net-2.6 from this
> morning. 

Dave - hold onto this patch - Sridhar Samudrala <sri@us.ibm.com> caught
a bug i need to fix (GSO wont work).
The game starting in a few holds priority for now. I will send a patch
either tonight or tommorow morning.
Otoh, if you have applied it - it is a simple incremental patch.

cheers,
jamal



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-16 22:55                       ` jamal
@ 2007-05-16 23:00                         ` David Miller
  2007-05-16 23:15                         ` Sridhar Samudrala
  1 sibling, 0 replies; 26+ messages in thread
From: David Miller @ 2007-05-16 23:00 UTC (permalink / raw)
  To: hadi; +Cc: kaber, peter.p.waskiewicz.jr, tgraf, herbert, netdev

From: jamal <hadi@cyberus.ca>
Date: Wed, 16 May 2007 18:55:04 -0400

> On Mon, 2007-14-05 at 09:30 -0400, jamal wrote:
> > Incoporating last comments from Patrick.
> > Dave, I think this is ready to apply - against net-2.6 from this
> > morning. 
> 
> Dave - hold onto this patch - Sridhar Samudrala <sri@us.ibm.com> caught
> a bug i need to fix (GSO wont work).

No problems, I'm busy with something else anyways.

> The game starting in a few holds priority for now.

And I can't say whether or not it is related to that :-)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-16 22:55                       ` jamal
  2007-05-16 23:00                         ` David Miller
@ 2007-05-16 23:15                         ` Sridhar Samudrala
  2007-05-17  2:12                           ` jamal
  1 sibling, 1 reply; 26+ messages in thread
From: Sridhar Samudrala @ 2007-05-16 23:15 UTC (permalink / raw)
  To: hadi
  Cc: David Miller, Patrick McHardy, Waskiewicz Jr, Peter P,
	Thomas Graf, Herbert Xu, netdev

On Wed, 2007-05-16 at 18:55 -0400, jamal wrote:
> On Mon, 2007-14-05 at 09:30 -0400, jamal wrote:
> > Incoporating last comments from Patrick.
> > Dave, I think this is ready to apply - against net-2.6 from this
> > morning. 
> 
> Dave - hold onto this patch - Sridhar Samudrala <sri@us.ibm.com> caught
> a bug i need to fix (GSO wont work).

I think your qdisc_restart() cleanup patch is OK. There you are
still calling dev_hard_start_xmit() and GSO should work fine.

But the batch xmit patch has some issues.

Thanks
Sridhar

> The game starting in a few holds priority for now. I will send a patch
> either tonight or tommorow morning.
> Otoh, if you have applied it - it is a simple incremental patch.
> 
> cheers,
> jamal



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [NET_SCHED]make qdisc_restart more readable
  2007-05-16 23:15                         ` Sridhar Samudrala
@ 2007-05-17  2:12                           ` jamal
  0 siblings, 0 replies; 26+ messages in thread
From: jamal @ 2007-05-17  2:12 UTC (permalink / raw)
  To: Sridhar Samudrala
  Cc: David Miller, Patrick McHardy, Waskiewicz Jr, Peter P,
	Thomas Graf, Herbert Xu, netdev

On Wed, 2007-16-05 at 16:15 -0700, Sridhar Samudrala wrote:

> 
> I think your qdisc_restart() cleanup patch is OK. There you are
> still calling dev_hard_start_xmit() and GSO should work fine.


Sorry, you are right - I thought i cutnpasted from it.
So Dave, false alarm ;-> That patch is clean.

> But the batch xmit patch has some issues.

I will resubmit that. I nheed to think about it a little.
It should work fine as is if no GSO or nit.

cheers,
jamal


^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2007-05-17  2:12 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-11  0:13 [RFC] make qdisc_restart more readable jamal
2007-05-11 15:56 ` Waskiewicz Jr, Peter P
2007-05-11 18:04   ` jamal
2007-05-11 18:13     ` Waskiewicz Jr, Peter P
2007-05-11 18:35       ` jamal
2007-05-11 18:46         ` Waskiewicz Jr, Peter P
2007-05-11 19:29           ` take 2 WAS (RE: " jamal
2007-05-11 22:01             ` Waskiewicz Jr, Peter P
2007-05-11 22:43               ` jamal
2007-05-12  9:46             ` Thomas Graf
2007-05-12 11:58               ` jamal
2007-05-12 12:18             ` take 3 " jamal
2007-05-12 17:02               ` Patrick McHardy
2007-05-12 19:56                 ` jamal
2007-05-13 14:28                   ` [LAST CALL] [PATCH] [NET_SCHED]make " jamal
2007-05-14 10:40                     ` Patrick McHardy
2007-05-14 12:41                       ` jamal
2007-05-14 12:43                         ` Patrick McHardy
2007-05-14 13:30                     ` jamal
2007-05-14 20:09                       ` Waskiewicz Jr, Peter P
2007-05-16 22:55                       ` jamal
2007-05-16 23:00                         ` David Miller
2007-05-16 23:15                         ` Sridhar Samudrala
2007-05-17  2:12                           ` jamal
2007-05-11 17:01 ` [RFC] make " Thomas Graf
2007-05-11 18:11   ` jamal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).