netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: Patrick McHardy <kaber@trash.net>
Cc: hadi@cyberus.ca, Jarek Poplawski <jarkao2@o2.pl>,
	netdev@vger.kernel.org, Dave Jones <davej@redhat.com>,
	davem@davemloft.net
Subject: Re: tc related lockdep warning.
Date: Tue, 26 Sep 2006 18:15:21 +0200	[thread overview]
Message-ID: <45195219.7050105@trash.net> (raw)
In-Reply-To: <4517D9A6.70307@trash.net>

[-- Attachment #1: Type: text/plain, Size: 1168 bytes --]

Patrick McHardy wrote:
> jamal wrote:
> 
>>Yes, that looks plausible. Can you try making those changes and see if
>>the warning is gone?
> 
>
> I think this points to a bigger brokeness caused by the move of
> dev->qdisc to RCU. It means destruction of filters and actions doesn't
> necessarily happens in user-context and thus not protected by the rtnl
> anymore.

I looked into this and we indeed still have lots of problems from that
broken RCU patch. Basically all locking (qdiscs, classifiers, actions,
estimators) assumes that updates are only done in process context and
thus read_lock doesn't need bottem half protection. Quite a few things
also assume that updates only happen under the RTNL and don't need
any further protection if not used during packet processing.

Instead of "fixing" all this I suggest something like this (untested)
patch instead. Since only the dev->qdisc pointer is protected by RCU,
but enqueue and the qdisc tree are still protected by dev->qdisc_lock,
we can perform destruction of the tree immediately and only do the
final free in the rcu callback, as long as we make sure not to enqueue
anything to a half-way destroyed qdisc.


[-- Attachment #2: x --]
[-- Type: text/plain, Size: 3479 bytes --]

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b0e9108..278d5e6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -31,6 +31,7 @@ struct Qdisc
 #define TCQ_F_BUILTIN	1
 #define TCQ_F_THROTTLED	2
 #define TCQ_F_INGRESS	4
+#define TCQ_F_DYING	8
 	int			padded;
 	struct Qdisc_ops	*ops;
 	u32			handle;
diff --git a/net/core/dev.c b/net/core/dev.c
index 14de297..c9c65c2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1480,14 +1480,15 @@ #endif
 	if (q->enqueue) {
 		/* Grab device queue */
 		spin_lock(&dev->queue_lock);
+		if (likely(!(q->flags & TCQ_F_DYING))) {
+			rc = q->enqueue(skb, q);
+			qdisc_run(dev);
+			spin_unlock(&dev->queue_lock);
 
-		rc = q->enqueue(skb, q);
-
-		qdisc_run(dev);
-
+			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+			goto out;
+		}
 		spin_unlock(&dev->queue_lock);
-		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-		goto out;
 	}
 
 	/* The device has no queue. Common case for software devices:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6f91518..93d52e0 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -47,9 +47,8 @@ #include <net/pkt_sched.h>
      spinlock dev->queue_lock.
    - tree walking is protected by read_lock_bh(qdisc_tree_lock)
      and this lock is used only in process context.
-   - updates to tree are made under rtnl semaphore or
-     from softirq context (__qdisc_destroy rcu-callback)
-     hence this lock needs local bh disabling.
+   - updates to tree are made only under rtnl semaphore,
+     hence this lock may be made without local bh disabling.
 
    qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
  */
@@ -483,20 +482,6 @@ void qdisc_reset(struct Qdisc *qdisc)
 static void __qdisc_destroy(struct rcu_head *head)
 {
 	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
-	struct Qdisc_ops  *ops = qdisc->ops;
-
-#ifdef CONFIG_NET_ESTIMATOR
-	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
-#endif
-	write_lock(&qdisc_tree_lock);
-	if (ops->reset)
-		ops->reset(qdisc);
-	if (ops->destroy)
-		ops->destroy(qdisc);
-	write_unlock(&qdisc_tree_lock);
-	module_put(ops->owner);
-
-	dev_put(qdisc->dev);
 	kfree((char *) qdisc - qdisc->padded);
 }
 
@@ -504,32 +489,24 @@ #endif
 
 void qdisc_destroy(struct Qdisc *qdisc)
 {
-	struct list_head cql = LIST_HEAD_INIT(cql);
-	struct Qdisc *cq, *q, *n;
+	struct Qdisc_ops  *ops = qdisc->ops;
 
 	if (qdisc->flags & TCQ_F_BUILTIN ||
-		!atomic_dec_and_test(&qdisc->refcnt))
+	    !atomic_dec_and_test(&qdisc->refcnt))
 		return;
+	qdisc->flags |= TCQ_F_DYING;
 
-	if (!list_empty(&qdisc->list)) {
-		if (qdisc->ops->cl_ops == NULL)
-			list_del(&qdisc->list);
-		else
-			list_move(&qdisc->list, &cql);
-	}
-
-	/* unlink inner qdiscs from dev->qdisc_list immediately */
-	list_for_each_entry(cq, &cql, list)
-		list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list)
-			if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) {
-				if (q->ops->cl_ops == NULL)
-					list_del_init(&q->list);
-				else
-					list_move_tail(&q->list, &cql);
-			}
-	list_for_each_entry_safe(cq, n, &cql, list)
-		list_del_init(&cq->list);
+	list_del(&qdisc->list);
+#ifdef CONFIG_NET_ESTIMATOR
+	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+#endif
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
 
+	module_put(ops->owner);
+	dev_put(qdisc->dev);
 	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
 }
 

  reply	other threads:[~2006-09-26 16:18 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-09-24 21:29 tc related lockdep warning Dave Jones
2006-09-25 12:43 ` Jarek Poplawski
2006-09-25 12:47   ` jamal
2006-09-25 13:05     ` Jarek Poplawski
2006-09-25 13:29     ` Patrick McHardy
2006-09-26 16:15       ` Patrick McHardy [this message]
2006-09-26 21:20         ` Dave Jones
2006-09-27  8:54           ` Jarek Poplawski
2006-09-27  9:57             ` Patrick McHardy
2006-09-28 12:17             ` Patrick McHardy
2006-09-28 13:13               ` Jarek Poplawski
2006-09-28 14:20                 ` Stephen Hemminger
2006-09-29  6:28                   ` Jarek Poplawski
2006-09-27 10:14           ` Patrick McHardy
2006-09-27 14:41             ` Ismail Donmez
2006-09-27 12:07           ` Patrick McHardy
2006-09-27 17:26             ` Ismail Donmez
2006-09-27 17:33             ` Dave Jones
2006-09-27 23:53             ` David Miller
2006-09-28  9:07               ` Jarek Poplawski
2006-09-28  8:17             ` Jarek Poplawski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45195219.7050105@trash.net \
    --to=kaber@trash.net \
    --cc=davej@redhat.com \
    --cc=davem@davemloft.net \
    --cc=hadi@cyberus.ca \
    --cc=jarkao2@o2.pl \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).