public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Willy Tarreau <w@1wt.eu>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Ranko Zivojnovic <ranko@spidernet.net>,
	"David S. Miller" <davem@davemloft.net>,
	Greg Kroah-Hartman <gregkh@suse.de>, Willy Tarreau <w@1wt.eu>
Subject: [2.6.20.17 review 05/58] gen estimator deadlock fix
Date: Wed, 22 Aug 2007 11:38:49 +0200	[thread overview]
Message-ID: <20070822083943.%N@1wt.eu> (raw)
In-Reply-To: 20070822083844.%N@1wt.eu

[-- Attachment #1: 0005-gen-estimator-deadlock-fix.patch --]
[-- Type: text/plain, Size: 5993 bytes --]

[NET]: gen_estimator deadlock fix

-Fixes ABBA deadlock noted by Patrick McHardy <kaber@trash.net>:

> There is at least one ABBA deadlock, est_timer() does:
> read_lock(&est_lock)
> spin_lock(e->stats_lock) (which is dev->queue_lock)
>
> and qdisc_destroy calls htb_destroy under dev->queue_lock, which
> calls htb_destroy_class, then gen_kill_estimator and this
> write_locks est_lock.

To fix the ABBA deadlock the rate estimators are now kept on an rcu list.

-The est_lock changes the use from protecting the list to protecting
the update to the 'bstat' pointer in order to avoid NULL dereferencing.

-The 'interval' member of the gen_estimator structure removed as it is
not needed.

Signed-off-by: Ranko Zivojnovic <ranko@spidernet.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 net/core/gen_estimator.c |   81 ++++++++++++++++++++++++++++------------------
 1 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 33d82bf..acc1ee0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -79,27 +79,27 @@
 
 struct gen_estimator
 {
-	struct gen_estimator	*next;
+	struct list_head	list;
 	struct gnet_stats_basic	*bstats;
 	struct gnet_stats_rate_est	*rate_est;
 	spinlock_t		*stats_lock;
-	unsigned		interval;
 	int			ewma_log;
 	u64			last_bytes;
 	u32			last_packets;
 	u32			avpps;
 	u32			avbps;
+	struct rcu_head		e_rcu;
 };
 
 struct gen_estimator_head
 {
 	struct timer_list	timer;
-	struct gen_estimator	*list;
+	struct list_head	list;
 };
 
 static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 
-/* Estimator array lock */
+/* Protects against NULL dereference */
 static DEFINE_RWLOCK(est_lock);
 
 static void est_timer(unsigned long arg)
@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg)
 	int idx = (int)arg;
 	struct gen_estimator *e;
 
-	read_lock(&est_lock);
-	for (e = elist[idx].list; e; e = e->next) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(e, &elist[idx].list, list) {
 		u64 nbytes;
 		u32 npackets;
 		u32 rate;
 
 		spin_lock(e->stats_lock);
+		read_lock(&est_lock);
+		if (e->bstats == NULL)
+			goto skip;
+
 		nbytes = e->bstats->bytes;
 		npackets = e->bstats->packets;
 		rate = (nbytes - e->last_bytes)<<(7 - idx);
@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg)
 		e->last_packets = npackets;
 		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
 		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+skip:
+		read_unlock(&est_lock);
 		spin_unlock(e->stats_lock);
 	}
 
-	if (elist[idx].list != NULL)
+	if (!list_empty(&elist[idx].list))
 		mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
-	read_unlock(&est_lock);
+	rcu_read_unlock();
 }
 
 /**
@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg)
  * &rate_est with the statistics lock grabed during this period.
  * 
  * Returns 0 on success or a negative error code.
+ *
+ * NOTE: Called under rtnl_mutex
  */
 int gen_new_estimator(struct gnet_stats_basic *bstats,
-	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+		      struct gnet_stats_rate_est *rate_est,
+		      spinlock_t *stats_lock,
+		      struct rtattr *opt)
 {
 	struct gen_estimator *est;
 	struct gnet_estimator *parm = RTA_DATA(opt);
+	int idx;
 
 	if (RTA_PAYLOAD(opt) < sizeof(*parm))
 		return -EINVAL;
@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	if (est == NULL)
 		return -ENOBUFS;
 
-	est->interval = parm->interval + 2;
+	idx = parm->interval + 2;
 	est->bstats = bstats;
 	est->rate_est = rate_est;
 	est->stats_lock = stats_lock;
@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
-	est->next = elist[est->interval].list;
-	if (est->next == NULL) {
-		init_timer(&elist[est->interval].timer);
-		elist[est->interval].timer.data = est->interval;
-		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
-		elist[est->interval].timer.function = est_timer;
-		add_timer(&elist[est->interval].timer);
+	if (!elist[idx].timer.function) {
+		INIT_LIST_HEAD(&elist[idx].list);
+		setup_timer(&elist[idx].timer, est_timer, idx);
 	}
-	write_lock_bh(&est_lock);
-	elist[est->interval].list = est;
-	write_unlock_bh(&est_lock);
+
+	if (list_empty(&elist[idx].list))
+		mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+
+	list_add_rcu(&est->list, &elist[idx].list);
 	return 0;
 }
 
+static void __gen_kill_estimator(struct rcu_head *head)
+{
+	struct gen_estimator *e = container_of(head,
+					struct gen_estimator, e_rcu);
+	kfree(e);
+}
+
 /**
  * gen_kill_estimator - remove a rate estimator
  * @bstats: basic statistics
@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
  *
  * Removes the rate estimator specified by &bstats and &rate_est
  * and deletes the timer.
+ *
+ * NOTE: Called under rtnl_mutex
  */
 void gen_kill_estimator(struct gnet_stats_basic *bstats,
 	struct gnet_stats_rate_est *rate_est)
 {
 	int idx;
-	struct gen_estimator *est, **pest;
+	struct gen_estimator *e, *n;
 
 	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
-		int killed = 0;
-		pest = &elist[idx].list;
-		while ((est=*pest) != NULL) {
-			if (est->rate_est != rate_est || est->bstats != bstats) {
-				pest = &est->next;
+
+		/* Skip non initialized indexes */
+		if (!elist[idx].timer.function)
+			continue;
+
+		list_for_each_entry_safe(e, n, &elist[idx].list, list) {
+			if (e->rate_est != rate_est || e->bstats != bstats)
 				continue;
-			}
 
 			write_lock_bh(&est_lock);
-			*pest = est->next;
+			e->bstats = NULL;
 			write_unlock_bh(&est_lock);
 
-			kfree(est);
-			killed++;
+			list_del_rcu(&e->list);
+			call_rcu(&e->e_rcu, __gen_kill_estimator);
 		}
-		if (killed && elist[idx].list == NULL)
-			del_timer(&elist[idx].timer);
 	}
 }
 
-- 
1.5.2.5

-- 

  parent reply	other threads:[~2007-08-22  8:56 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-22  8:38 [2.6.20.17 review 00/58] 2.6.20.17 -stable review Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 01/58] Fix IPCOMP crashes Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 03/58] SCTP scope_id handling fix Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 04/58] gen estimator timer unload race Willy Tarreau
2007-08-22  9:38 ` Willy Tarreau [this message]
2007-08-22  9:38 ` [2.6.20.17 review 07/58] Fix ipv6 link down handling Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 08/58] Netpoll leak Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 09/58] Sparc64 bootup assembler bug Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 11/58] Fix sparc32 udelay() rounding errors Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 13/58] KVM: SVM: Reliably detect if SVM was disabled by BIOS Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 14/58] USB: fix warning caused by autosuspend counter going negative Willy Tarreau
2007-08-22  9:38 ` [2.6.20.17 review 15/58] aacraid: fix security hole Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 16/58] V4L: Add check for valid control ID to v4l2_ctrl_next Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 17/58] V4L: wm8775/wm8739: Fix memory leak when unloading module Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 18/58] splice: fix double page unlock Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 19/58] drm/i915: Fix i965 secured batchbuffer usage (CVE-2007-3851) Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 20/58] Fix leak on /proc/lockdep_stats Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 21/58] CPU online file permission Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 22/58] Fix user struct leakage with locked IPC shem segment Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 23/58] md: handle writes to broken raid10 arrays gracefully Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 24/58] md: raid10: fix use-after-free of bio Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 25/58] pcmcia: give socket time to power down Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 26/58] "ext4_ext_put_in_cache" uses __u32 to receive physical block number Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 27/58] Include serial_reg.h with userspace headers Willy Tarreau
2007-08-22  9:23   ` Russell King
2007-08-22 12:22     ` Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 28/58] dm io: fix panic on large request Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 29/58] softmac: Fix ESSID problem Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 30/58] nfsd: fix possible read-ahead cache and export table corruption Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 31/58] readahead: MIN_RA_PAGES/MAX_RA_PAGES macros Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 32/58] fs: 9p/conv.c error path fix Willy Tarreau
2007-08-22 15:53   ` Eric Van Hensbergen
2007-08-22 16:06     ` Willy Tarreau
2007-08-22 18:59     ` [stable] " Greg KH
2007-08-22 20:31       ` Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 33/58] forcedeth bug fix: cicada phy Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 34/58] forcedeth bug fix: vitesse phy Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 35/58] forcedeth bug fix: realtek phy Willy Tarreau
2007-08-22 15:56   ` Chuck Ebbert
2007-08-22 16:10     ` Willy Tarreau
2007-08-22 18:15   ` Prakash Punnoor
2007-08-22 20:42     ` Willy Tarreau
2007-08-22 23:05       ` Greg KH
2007-08-23 15:50         ` Prakash Punnoor
2007-08-23 16:55           ` [stable] " Greg KH
2007-08-23 19:27             ` [PATCH] fix realtek phy id in forcedeth Willy Tarreau
2007-08-25  4:01               ` Jeff Garzik
2007-08-22  9:39 ` [2.6.20.17 review 36/58] acpi-cpufreq: Proper ReadModifyWrite of PERF_CTL MSR Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 38/58] jbd2 commit: fix transaction dropping Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 39/58] hugetlb: fix race in alloc_fresh_huge_page() Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 40/58] do not limit locked memory when RLIMIT_MEMLOCK is RLIM_INFINITY Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 41/58] drivers/video/macmodes.c:mac_find_mode() mustnt be __devinit Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 42/58] nfsd: fix possible oops on re-insertion of rpcsec_gss modules Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 43/58] dm snapshot: permit invalid activation Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 45/58] Hangup TTY before releasing rfcomm_dev Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 46/58] Keep rfcomm_dev on the list until it is freed Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 47/58] IPV6: /proc/net/anycast6 unbalanced inet6_dev refcnt Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 48/58] sx: switch subven and subid values Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 49/58] UML: exports for hostfs Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 50/58] random: fix bound check ordering (CVE-2007-3105) Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 51/58] softmac: Fix deadlock of wx_set_essid with assoc work Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 52/58] ata_piix: update map 10b for ich8m Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 53/58] direct-io: fix error-path crashes Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 54/58] stifb: detect cards in double buffer mode more reliably Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 55/58] pata_atiixp: add SB700 PCI ID Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 57/58] CPUFREQ: ondemand: add a check to avoid negative load calculation Willy Tarreau
2007-08-22  9:39 ` [2.6.20.17 review 58/58] Reset current->pdeath_signal on SUID binary execution (CVE-2007-3848) Willy Tarreau
2007-08-22 11:10 ` [2.6.20.17 review 00/58] 2.6.20.17 -stable review Michal Piotrowski
2007-08-22 12:10   ` Willy Tarreau
2007-08-22 13:23   ` James Morris
2007-08-22 13:36     ` Stephen Smalley
2007-08-22 13:42       ` Stephen Smalley
2007-08-22 14:08         ` James Morris
2007-08-22 14:29           ` Michal Piotrowski
2007-08-22 14:32             ` Stephen Smalley
2007-08-22 16:33             ` James Morris
2007-08-22 16:46               ` Michal Piotrowski
2007-08-22 17:38                 ` James Morris
2007-08-22 18:08                   ` Michal Piotrowski
2007-08-22 17:50                 ` Michal Piotrowski
2007-08-22 19:15                   ` Stephen Smalley
2007-08-22 20:16                     ` Willy Tarreau
2007-08-23 11:13                       ` Michal Piotrowski
2007-08-23 14:08                         ` Willy Tarreau
2007-08-22 13:38     ` James Morris

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070822083943.%N@1wt.eu \
    --to=w@1wt.eu \
    --cc=davem@davemloft.net \
    --cc=gregkh@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ranko@spidernet.net \
    --cc=stable@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox