All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] linux-2.6.18: netback: take net_schedule_list_lock when removing entry from net_schedule_list
@ 2010-11-02  8:13 Jan Beulich
  2010-11-04 11:09 ` Laszlo Ersek
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2010-11-02  8:13 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com; +Cc: tomasz.wroblewski, Ian Campbell

[-- Attachment #1: Type: text/plain, Size: 4277 bytes --]

From: Ian Campbell <ian.campbell@citrix.com>

There is a race in net_tx_build_mops between checking if
net_schedule_list is empty and actually dequeuing the first entry on
the list. If another thread dequeues the only entry on the list during
this window we crash because list_first_entry expects a non-empty
list, like so:

[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
[ 0.133147] Oops: 0002 1 SMP
[ 0.133150] last sysfs file:
[ 0.133152] Modules linked in:
[ 0.133154]
[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
[ 0.133170] Stack:
[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
[ 0.133193] Call Trace:
[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
[ 0.133212] [<c1057444>] kthread+0x74/0x80
[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
                  58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
             58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
[ 0.133265] CR2: 0000000000000008
[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--

Therefore after the initial lock free check for an empty list check
again with the lock held before dequeueing the entry.

Based on a patch by Tomasz Wroblewski.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@novell.com>

--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -784,15 +784,28 @@ static int __on_net_schedule_list(netif_
 	return netif->list.next != NULL;
 }
 
+/* Must be called with net_schedule_list_lock held. */
 static void remove_from_net_schedule_list(netif_t *netif)
 {
-	spin_lock_irq(&net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
 		list_del(&netif->list);
 		netif->list.next = NULL;
 		netif_put(netif);
 	}
+}
+
+static netif_t *poll_net_schedule_list(void)
+{
+	netif_t *netif = NULL;
+
+	spin_lock_irq(&net_schedule_list_lock);
+	if (!list_empty(&net_schedule_list)) {
+		netif = list_first_entry(&net_schedule_list, netif_t, list);
+		netif_get(netif);
+		remove_from_net_schedule_list(netif);
+	}
 	spin_unlock_irq(&net_schedule_list_lock);
+	return netif;
 }
 
 static void add_to_net_schedule_list_tail(netif_t *netif)
@@ -837,7 +850,9 @@ void netif_schedule_work(netif_t *netif)
 
 void netif_deschedule_work(netif_t *netif)
 {
+	spin_lock_irq(&net_schedule_list_lock);
 	remove_from_net_schedule_list(netif);
+	spin_unlock_irq(&net_schedule_list_lock);
 }
 
 
@@ -1224,7 +1239,6 @@ static int netbk_set_skb_gso(struct sk_b
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
-	struct list_head *ent;
 	struct sk_buff *skb;
 	netif_t *netif;
 	netif_tx_request_t txreq;
@@ -1242,10 +1256,9 @@ static void net_tx_action(unsigned long 
 	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&net_schedule_list)) {
 		/* Get a netif from the list with work to do. */
-		ent = net_schedule_list.next;
-		netif = list_entry(ent, netif_t, list);
-		netif_get(netif);
-		remove_from_net_schedule_list(netif);
+		netif = poll_net_schedule_list();
+		if (!netif)
+			continue;
 
 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
 		if (!work_to_do) {



[-- Attachment #2: xen-netback-sched-list-remove.patch --]
[-- Type: text/plain, Size: 4368 bytes --]

From: Ian Campbell <ian.campbell@citrix.com>
Subject: xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list

There is a race in net_tx_build_mops between checking if
net_schedule_list is empty and actually dequeuing the first entry on
the list. If another thread dequeues the only entry on the list during
this window we crash because list_first_entry expects a non-empty
list, like so:

[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
[ 0.133147] Oops: 0002 1 SMP
[ 0.133150] last sysfs file:
[ 0.133152] Modules linked in:
[ 0.133154]
[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
[ 0.133170] Stack:
[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
[ 0.133193] Call Trace:
[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
[ 0.133212] [<c1057444>] kthread+0x74/0x80
[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
                  58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
             58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
[ 0.133265] CR2: 0000000000000008
[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--

Therefore after the initial lock free check for an empty list check
again with the lock held before dequeueing the entry.

Based on a patch by Tomasz Wroblewski.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@novell.com>

--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -784,15 +784,28 @@ static int __on_net_schedule_list(netif_
 	return netif->list.next != NULL;
 }
 
+/* Must be called with net_schedule_list_lock held. */
 static void remove_from_net_schedule_list(netif_t *netif)
 {
-	spin_lock_irq(&net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
 		list_del(&netif->list);
 		netif->list.next = NULL;
 		netif_put(netif);
 	}
+}
+
+static netif_t *poll_net_schedule_list(void)
+{
+	netif_t *netif = NULL;
+
+	spin_lock_irq(&net_schedule_list_lock);
+	if (!list_empty(&net_schedule_list)) {
+		netif = list_first_entry(&net_schedule_list, netif_t, list);
+		netif_get(netif);
+		remove_from_net_schedule_list(netif);
+	}
 	spin_unlock_irq(&net_schedule_list_lock);
+	return netif;
 }
 
 static void add_to_net_schedule_list_tail(netif_t *netif)
@@ -837,7 +850,9 @@ void netif_schedule_work(netif_t *netif)
 
 void netif_deschedule_work(netif_t *netif)
 {
+	spin_lock_irq(&net_schedule_list_lock);
 	remove_from_net_schedule_list(netif);
+	spin_unlock_irq(&net_schedule_list_lock);
 }
 
 
@@ -1224,7 +1239,6 @@ static int netbk_set_skb_gso(struct sk_b
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
-	struct list_head *ent;
 	struct sk_buff *skb;
 	netif_t *netif;
 	netif_tx_request_t txreq;
@@ -1242,10 +1256,9 @@ static void net_tx_action(unsigned long 
 	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&net_schedule_list)) {
 		/* Get a netif from the list with work to do. */
-		ent = net_schedule_list.next;
-		netif = list_entry(ent, netif_t, list);
-		netif_get(netif);
-		remove_from_net_schedule_list(netif);
+		netif = poll_net_schedule_list();
+		if (!netif)
+			continue;
 
 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
 		if (!work_to_do) {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-11-04 11:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-02  8:13 [PATCH] linux-2.6.18: netback: take net_schedule_list_lock when removing entry from net_schedule_list Jan Beulich
2010-11-04 11:09 ` Laszlo Ersek
2010-11-04 11:15   ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.