From: jamal <j.hadi123@gmail.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
David Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Subject: [RFC] make qdisc_restart more readable
Date: Thu, 10 May 2007 20:13:20 -0400 [thread overview]
Message-ID: <1178842400.5008.17.camel@localhost> (raw)
[-- Attachment #1: Type: text/plain, Size: 328 bytes --]
This compiles and passes some basic tests - no serious testing.
Against net-2.6.
The patch is ugly looking, so i have at the end the re-written qdisc;
you can easily tell the rest from the patch.
Please flush out any fluff - I would like to submit this (almost lost
it, thanks to an early posting, found it).
cheers,
jamal
[-- Attachment #2: rfc-qrestart --]
[-- Type: text/x-patch, Size: 6687 bytes --]
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f671cd2..718d6fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -83,6 +83,9 @@ struct wireless_dev;
#define NETDEV_TX_OK 0 /* driver took care of packet */
#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/
#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */
+#define NETDEV_TX_DROP -2 /* request caller to drop packet */
+#define NETDEV_TX_QUEUE -3 /* request caller to requeue packet */
+
/*
* Compute the worst case header length according to the protocols
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f28bb2d..b821040 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -59,7 +59,79 @@ void qdisc_unlock_tree(struct net_device *dev)
spin_unlock_bh(&dev->queue_lock);
}
-/*
+static inline int qqlen(struct Qdisc *q)
+{
+ BUG_ON((int) q->q.qlen < 0);
+ return q->q.qlen;
+}
+
+static inline int handle_dev_cpu_collision(struct net_device *dev)
+{
+ if (dev->xmit_lock_owner == smp_processor_id()) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG
+ "Dead loop on netdevice %s, fix it urgently!\n",
+ dev->name);
+ return NETDEV_TX_DROP;
+ }
+ /* XXX: This maintains original code, but i think we should
+ * update cpu_collision always
+ */
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+ return NETDEV_TX_QUEUE;
+}
+
+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+
+ if (unlikely(q == &noop_qdisc))
+ kfree_skb(skb);
+
+ if (skb->next)
+ dev->gso_skb = skb;
+ else
+ q->ops->requeue(skb, q);
+ /* XXX: Could netif_schedule fail? Or is that fact we are
+ * requeueing imply the hardware path is closed
+ * and even if we fail, some interupt will wake us
+ */
+ netif_schedule(dev);
+ return 0;
+}
+
+static inline int
+try_get_tx_pkt(struct sk_buff **skb, struct net_device *dev, struct Qdisc *q)
+{
+ if (((*skb = dev->gso_skb)) || ((*skb = q->dequeue(q)))) {
+
+ dev->gso_skb = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int
+handle_tx_locked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q)
+{
+ int ret = handle_dev_cpu_collision(dev);
+
+ if (ret == NETDEV_TX_DROP) {
+ kfree_skb(skb);
+ return qqlen(q);
+ }
+
+ return handle_dev_requeue(skb, dev, q);
+}
+
+
+/*
+ NOTE: Called under dev->queue_lock with locally disabled BH.
+
+ __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+ can enter this region at a time.
+
dev->queue_lock serializes queue accesses for this device
AND dev->qdisc pointer itself.
@@ -67,114 +139,63 @@ void qdisc_unlock_tree(struct net_device *dev)
dev->queue_lock and netif_tx_lock are mutually exclusive,
if one is grabbed, another must be free.
- */
+ Multiple CPUs may contend for the two locks.
-/* Kick device.
+ Note, that this procedure can be called by a watchdog timer, so that
+ we do not check dev->tbusy flag here.
+ Returns to the caller:
Returns: 0 - queue is empty or throttled.
>0 - queue is not empty.
-
- NOTE: Called under dev->queue_lock with locally disabled BH.
*/
-static inline int qdisc_restart(struct net_device *dev)
+int
+qdisc_restart(struct net_device *dev)
{
struct Qdisc *q = dev->qdisc;
- struct sk_buff *skb;
-
- /* Dequeue packet */
- if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
- unsigned nolock = (dev->features & NETIF_F_LLTX);
+ unsigned lockless = (dev->features & NETIF_F_LLTX);
+ struct sk_buff *skb = NULL;
+ int ret;
+
+ ret = try_get_tx_pkt(&skb, dev, q);
+ if (ret == 0)
+ return qqlen(q);
+
+ /* we have a packet to send */
+ if (!lockless) {
+ if (!netif_tx_trylock(dev))
+ return handle_tx_locked(skb, dev, q);
+ }
+
+ /* all clear .. */
+ spin_unlock(&dev->queue_lock);
- dev->gso_skb = NULL;
+ /* churn baby churn .. */
+ ret = dev_hard_start_xmit(skb, dev);
- /*
- * When the driver has LLTX set it does its own locking
- * in start_xmit. No need to add additional overhead by
- * locking again. These checks are worth it because
- * even uncongested locks can be quite expensive.
- * The driver can do trylock like here too, in case
- * of lock congestion it should return -1 and the packet
- * will be requeued.
- */
- if (!nolock) {
- if (!netif_tx_trylock(dev)) {
- collision:
- /* So, someone grabbed the driver. */
-
- /* It may be transient configuration error,
- when hard_start_xmit() recurses. We detect
- it by checking xmit owner and drop the
- packet when deadloop is detected.
- */
- if (dev->xmit_lock_owner == smp_processor_id()) {
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
- goto out;
- }
- __get_cpu_var(netdev_rx_stat).cpu_collision++;
- goto requeue;
- }
- }
+ if (!lockless)
+ netif_tx_unlock(dev);
- {
- /* And release queue */
- spin_unlock(&dev->queue_lock);
-
- if (!netif_queue_stopped(dev)) {
- int ret;
-
- ret = dev_hard_start_xmit(skb, dev);
- if (ret == NETDEV_TX_OK) {
- if (!nolock) {
- netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- goto out;
- }
- if (ret == NETDEV_TX_LOCKED && nolock) {
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- goto collision;
- }
- }
+ spin_lock(&dev->queue_lock);
- /* NETDEV_TX_BUSY - we need to requeue */
- /* Release the driver */
- if (!nolock) {
- netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- }
+ q = dev->qdisc;
+ /* most likely result, packet went ok */
+ if (ret == NETDEV_TX_OK)
+ return qqlen(q);
+ /* only for lockless drivers .. */
+ if (ret == NETDEV_TX_LOCKED && lockless) {
+ return handle_tx_locked(skb, dev, q);
+ }
- /* Device kicked us out :(
- This is possible in three cases:
-
- 0. driver is locked
- 1. fastroute is enabled
- 2. device cannot determine busy state
- before start of transmission (f.e. dialout)
- 3. device is buggy (ppp)
- */
-
-requeue:
- if (unlikely(q == &noop_qdisc))
- kfree_skb(skb);
- else if (skb->next)
- dev->gso_skb = skb;
- else
- q->ops->requeue(skb, q);
- netif_schedule(dev);
- return 0;
+ if (unlikely (ret != NETDEV_TX_BUSY)) {
+ /* XXX: Do we need a ratelimit? or put a
+ * BUG_ON((int) ret != NETDEV_TX_BUSY) ?
+ **/
+ printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
}
-out:
- BUG_ON((int) q->q.qlen < 0);
- return q->q.qlen;
+ return handle_dev_requeue(skb, dev, q);
}
void __qdisc_run(struct net_device *dev)
[-- Attachment #3: sch_generic.c --]
[-- Type: text/x-csrc, Size: 1037 bytes --]
int
qdisc_restart(struct net_device *dev)
{
struct Qdisc *q = dev->qdisc;
unsigned lockless = (dev->features & NETIF_F_LLTX);
struct sk_buff *skb = NULL;
int ret;
ret = try_get_tx_pkt(&skb, dev, q);
if (ret == 0)
return qqlen(q);
/* we have a packet to send */
if (!lockless) {
if (!netif_tx_trylock(dev))
return handle_tx_locked(skb, dev, q);
}
/* all clear .. */
spin_unlock(&dev->queue_lock);
/* churn baby churn .. */
ret = dev_hard_start_xmit(skb, dev);
if (!lockless)
netif_tx_unlock(dev);
spin_lock(&dev->queue_lock);
q = dev->qdisc;
/* most likely result, packet went ok */
if (ret == NETDEV_TX_OK)
return qqlen(q);
/* only for lockless drivers .. */
if (ret == NETDEV_TX_LOCKED && lockless) {
return handle_tx_locked(skb, dev, q);
}
if (unlikely (ret != NETDEV_TX_BUSY)) {
/* XXX: Do we need a ratelimit? or put a
* BUG_ON((int) ret != NETDEV_TX_BUSY) ?
**/
printk("BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen);
}
return handle_dev_requeue(skb, dev, q);
}
next reply other threads:[~2007-05-11 0:13 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-11 0:13 jamal [this message]
2007-05-11 15:56 ` [RFC] make qdisc_restart more readable Waskiewicz Jr, Peter P
2007-05-11 18:04 ` jamal
2007-05-11 18:13 ` Waskiewicz Jr, Peter P
2007-05-11 18:35 ` jamal
2007-05-11 18:46 ` Waskiewicz Jr, Peter P
2007-05-11 19:29 ` take 2 WAS (RE: " jamal
2007-05-11 22:01 ` Waskiewicz Jr, Peter P
2007-05-11 22:43 ` jamal
2007-05-12 9:46 ` Thomas Graf
2007-05-12 11:58 ` jamal
2007-05-12 12:18 ` take 3 " jamal
2007-05-12 17:02 ` Patrick McHardy
2007-05-12 19:56 ` jamal
2007-05-13 14:28 ` [LAST CALL] [PATCH] [NET_SCHED]make " jamal
2007-05-14 10:40 ` Patrick McHardy
2007-05-14 12:41 ` jamal
2007-05-14 12:43 ` Patrick McHardy
2007-05-14 13:30 ` jamal
2007-05-14 20:09 ` Waskiewicz Jr, Peter P
2007-05-16 22:55 ` jamal
2007-05-16 23:00 ` David Miller
2007-05-16 23:15 ` Sridhar Samudrala
2007-05-17 2:12 ` jamal
2007-05-11 17:01 ` [RFC] make " Thomas Graf
2007-05-11 18:11 ` jamal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1178842400.5008.17.camel@localhost \
--to=j.hadi123@gmail.com \
--cc=davem@davemloft.net \
--cc=hadi@cyberus.ca \
--cc=herbert@gondor.apana.org.au \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.