netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH iwl-net] igc: Fix deadlock on module removal
@ 2024-04-14  7:44 Kurt Kanzenbach
  2024-04-14  9:02 ` Lukas Wunner
  0 siblings, 1 reply; 7+ messages in thread
From: Kurt Kanzenbach @ 2024-04-14  7:44 UTC (permalink / raw)
  To: Jesse Brandeburg, Tony Nguyen, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Andrew Lunn
  Cc: Lukas Wunner, Sasha Neftin, intel-wired-lan, netdev,
	Kurt Kanzenbach

The removal of the igc module leads to a deadlock:

|[Mon Apr  8 17:38:55 2024]  __mutex_lock.constprop.0+0x3e5/0x7a0
|[Mon Apr  8 17:38:55 2024]  ? preempt_count_add+0x85/0xd0
|[Mon Apr  8 17:38:55 2024]  __mutex_lock_slowpath+0x13/0x20
|[Mon Apr  8 17:38:55 2024]  mutex_lock+0x3b/0x50
|[Mon Apr  8 17:38:55 2024]  rtnl_lock+0x19/0x20
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_notifier+0x2a/0xc0
|[Mon Apr  8 17:38:55 2024]  netdev_trig_deactivate+0x25/0x70
|[Mon Apr  8 17:38:55 2024]  led_trigger_set+0xe2/0x2d0
|[Mon Apr  8 17:38:55 2024]  led_classdev_unregister+0x4f/0x100
|[Mon Apr  8 17:38:55 2024]  devm_led_classdev_release+0x15/0x20
|[Mon Apr  8 17:38:55 2024]  release_nodes+0x47/0xc0
|[Mon Apr  8 17:38:55 2024]  devres_release_all+0x9f/0xe0
|[Mon Apr  8 17:38:55 2024]  device_del+0x272/0x3c0
|[Mon Apr  8 17:38:55 2024]  netdev_unregister_kobject+0x8c/0xa0
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_many_notify+0x530/0x7c0
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_queue+0xad/0xf0
|[Mon Apr  8 17:38:55 2024]  unregister_netdev+0x21/0x30
|[Mon Apr  8 17:38:55 2024]  igc_remove+0xfb/0x1f0 [igc]
|[Mon Apr  8 17:38:55 2024]  pci_device_remove+0x42/0xb0
|[Mon Apr  8 17:38:55 2024]  device_remove+0x43/0x70

unregister_netdev() acquires the RNTL lock and releases the LEDs bound
to that netdevice. However, netdev_trig_deactivate() and later
unregister_netdevice_notifier() try to acquire the RTNL lock again.

Avoid this situation by not using the device-managed LED class
functions.

Suggested-by: Lukas Wunner <lukas@wunner.de>
Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
 drivers/net/ethernet/intel/igc/igc.h      |  4 ++++
 drivers/net/ethernet/intel/igc/igc_leds.c | 37 ++++++++++++++++++++++++-------
 drivers/net/ethernet/intel/igc/igc_main.c |  3 +++
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 90316dc58630..9f352cbe5d56 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -164,6 +164,8 @@ struct igc_ring {
 	struct xsk_buff_pool *xsk_pool;
 } ____cacheline_internodealigned_in_smp;
 
+struct igc_led_classdev;
+
 /* Board specific private data structure */
 struct igc_adapter {
 	struct net_device *netdev;
@@ -298,6 +300,7 @@ struct igc_adapter {
 
 	/* LEDs */
 	struct mutex led_mutex;
+	struct igc_led_classdev *leds;
 };
 
 void igc_up(struct igc_adapter *adapter);
@@ -723,6 +726,7 @@ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
 void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
 
 int igc_led_setup(struct igc_adapter *adapter);
+void igc_led_free(struct igc_adapter *adapter);
 
 #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
 
diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c
index bf240c5daf86..eee550cdb1d5 100644
--- a/drivers/net/ethernet/intel/igc/igc_leds.c
+++ b/drivers/net/ethernet/intel/igc/igc_leds.c
@@ -236,8 +236,8 @@ static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf,
 		 pci_dev_id(adapter->pdev), index);
 }
 
-static void igc_setup_ldev(struct igc_led_classdev *ldev,
-			   struct net_device *netdev, int index)
+static int igc_setup_ldev(struct igc_led_classdev *ldev,
+			  struct net_device *netdev, int index)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct led_classdev *led_cdev = &ldev->led;
@@ -257,24 +257,45 @@ static void igc_setup_ldev(struct igc_led_classdev *ldev,
 	led_cdev->hw_control_get = igc_led_hw_control_get;
 	led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
 
-	devm_led_classdev_register(&netdev->dev, led_cdev);
+	return led_classdev_register(&netdev->dev, led_cdev);
 }
 
 int igc_led_setup(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct device *dev = &netdev->dev;
 	struct igc_led_classdev *leds;
-	int i;
+	int i, err;
 
 	mutex_init(&adapter->led_mutex);
 
-	leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+	leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
 	if (!leds)
 		return -ENOMEM;
 
-	for (i = 0; i < IGC_NUM_LEDS; i++)
-		igc_setup_ldev(leds + i, netdev, i);
+	for (i = 0; i < IGC_NUM_LEDS; i++) {
+		err = igc_setup_ldev(leds + i, netdev, i);
+		if (err)
+			goto err;
+	}
+
+	adapter->leds = leds;
 
 	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		led_classdev_unregister(&((leds + i)->led));
+
+	return err;
+}
+
+void igc_led_free(struct igc_adapter *adapter)
+{
+	struct igc_led_classdev *leds = adapter->leds;
+	int i;
+
+	for (i = 0; i < IGC_NUM_LEDS; i++)
+		led_classdev_unregister(&((leds + i)->led));
+
+	kfree(leds);
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 35ad40a803cb..4d975d620a8e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7021,6 +7021,9 @@ static void igc_remove(struct pci_dev *pdev)
 	cancel_work_sync(&adapter->watchdog_task);
 	hrtimer_cancel(&adapter->hrtimer);
 
+	if (IS_ENABLED(CONFIG_IGC_LEDS))
+		igc_led_free(adapter);
+
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */

---
base-commit: 7efd0a74039fb6b584be2cb91c1d0ef0bd796ee1
change-id: 20240411-igc_led_deadlock-7abd85954f5e

Best regards,
-- 
Kurt Kanzenbach <kurt@linutronix.de>


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-14  7:44 [PATCH iwl-net] igc: Fix deadlock on module removal Kurt Kanzenbach
@ 2024-04-14  9:02 ` Lukas Wunner
  2024-04-14  9:15   ` Kurt Kanzenbach
  0 siblings, 1 reply; 7+ messages in thread
From: Lukas Wunner @ 2024-04-14  9:02 UTC (permalink / raw)
  To: Kurt Kanzenbach, Roman Lozko
  Cc: Jesse Brandeburg, Tony Nguyen, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Andrew Lunn, Sasha Neftin,
	intel-wired-lan, netdev

[cc += Roman Lozko who originally reported the issue]

On Sun, Apr 14, 2024 at 09:44:10AM +0200, Kurt Kanzenbach wrote:
> unregister_netdev() acquires the RNTL lock and releases the LEDs bound
> to that netdevice. However, netdev_trig_deactivate() and later
> unregister_netdevice_notifier() try to acquire the RTNL lock again.
> 
> Avoid this situation by not using the device-managed LED class
> functions.
> 
> Suggested-by: Lukas Wunner <lukas@wunner.de>
> Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>

This patch is almost a 1:1 copy of the patch I submitted on April 5:

https://lore.kernel.org/all/ZhBN9p1yOyciXkzw@wunner.de/

I think it is mandatory that you include a Signed-off-by with my name
in that case.  Arguably the commit author ("From:") should also be me.

Moreover this is missing a Reported-by tag with Roman Lozko's name.

AFAICS the only changes that you made are:
- rename igc_led_teardown() to igc_led_free()
- rename ret to err
- replace devm_kcalloc() with kcalloc()
  (and you introduced a memory leak while doing so, see below)

Honestly I don't see how those small changes justify omitting a
Signed-off-by or assuming authorship.

I would have been happy to submit a patch myself, I was waiting
for a Tested-by from Roman or you.


> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -164,6 +164,8 @@ struct igc_ring {
>  	struct xsk_buff_pool *xsk_pool;
>  } ____cacheline_internodealigned_in_smp;
>  
> +struct igc_led_classdev;

Unnecessary forward declaration, this compiled fine for me without it.


>  int igc_led_setup(struct igc_adapter *adapter)
>  {
>  	struct net_device *netdev = adapter->netdev;
> -	struct device *dev = &netdev->dev;
>  	struct igc_led_classdev *leds;
> -	int i;
> +	int i, err;
>  
>  	mutex_init(&adapter->led_mutex);
>  
> -	leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
> +	leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
>  	if (!leds)
>  		return -ENOMEM;
>  
> -	for (i = 0; i < IGC_NUM_LEDS; i++)
> -		igc_setup_ldev(leds + i, netdev, i);
> +	for (i = 0; i < IGC_NUM_LEDS; i++) {
> +		err = igc_setup_ldev(leds + i, netdev, i);
> +		if (err)
> +			goto err;
> +	}
> +
> +	adapter->leds = leds;
>  
>  	return 0;
> +
> +err:
> +	for (i--; i >= 0; i--)
> +		led_classdev_unregister(&((leds + i)->led));
> +
> +	return err;
> +}

"leds" allocation is leaked in the error path.

This memory leak was not present in my original patch.  Not good!

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-14  9:02 ` Lukas Wunner
@ 2024-04-14  9:15   ` Kurt Kanzenbach
  2024-04-15 11:02     ` Kurt Kanzenbach
  2024-04-15 13:51     ` Lukas Wunner
  0 siblings, 2 replies; 7+ messages in thread
From: Kurt Kanzenbach @ 2024-04-14  9:15 UTC (permalink / raw)
  To: Lukas Wunner, Roman Lozko
  Cc: Jesse Brandeburg, Tony Nguyen, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Andrew Lunn, Sasha Neftin,
	intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 1700 bytes --]

Hi Lukas,

On Sun Apr 14 2024, Lukas Wunner wrote:
> [cc += Roman Lozko who originally reported the issue]
>
> On Sun, Apr 14, 2024 at 09:44:10AM +0200, Kurt Kanzenbach wrote:
>> unregister_netdev() acquires the RNTL lock and releases the LEDs bound
>> to that netdevice. However, netdev_trig_deactivate() and later
>> unregister_netdevice_notifier() try to acquire the RTNL lock again.
>> 
>> Avoid this situation by not using the device-managed LED class
>> functions.
>> 
>> Suggested-by: Lukas Wunner <lukas@wunner.de>
>> Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
>> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
>
> This patch is almost a 1:1 copy of the patch I submitted on April 5:
>
> https://lore.kernel.org/all/ZhBN9p1yOyciXkzw@wunner.de/
>
> I think it is mandatory that you include a Signed-off-by with my name
> in that case.  Arguably the commit author ("From:") should also be me.

I was a bit unsure how to proceed with that. See below.

>
> Moreover this is missing a Reported-by tag with Roman Lozko's name.
>
> AFAICS the only changes that you made are:
> - rename igc_led_teardown() to igc_led_free()
> - rename ret to err
> - replace devm_kcalloc() with kcalloc()
>   (and you introduced a memory leak while doing so, see below)
>
> Honestly I don't see how those small changes justify omitting a
> Signed-off-by or assuming authorship.
>
> I would have been happy to submit a patch myself, I was waiting
> for a Tested-by from Roman or you.

Perfect. I was wondering why you are not submitting the patch
yourself. Then, please go ahead and submit the patch. Feel free to add
my Tested-by.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-14  9:15   ` Kurt Kanzenbach
@ 2024-04-15 11:02     ` Kurt Kanzenbach
  2024-04-15 13:54       ` Lukas Wunner
  2024-04-15 13:51     ` Lukas Wunner
  1 sibling, 1 reply; 7+ messages in thread
From: Kurt Kanzenbach @ 2024-04-15 11:02 UTC (permalink / raw)
  To: Lukas Wunner, Roman Lozko
  Cc: Jesse Brandeburg, Tony Nguyen, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Andrew Lunn, Sasha Neftin,
	intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 476 bytes --]

Lukas,

>> I would have been happy to submit a patch myself, I was waiting
>> for a Tested-by from Roman or you.
>
> Perfect. I was wondering why you are not submitting the patch
> yourself. Then, please go ahead and submit the patch. Feel free to add
> my Tested-by.

Scratch that. I've sent v2 with your SoB. PTAL, because your original
code snippet didn't have a SoB.

https://lore.kernel.org/netdev/20240411-igc_led_deadlock-v2-1-b758c0c88b2b@linutronix.de/

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-14  9:15   ` Kurt Kanzenbach
  2024-04-15 11:02     ` Kurt Kanzenbach
@ 2024-04-15 13:51     ` Lukas Wunner
  1 sibling, 0 replies; 7+ messages in thread
From: Lukas Wunner @ 2024-04-15 13:51 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Roman Lozko, Jesse Brandeburg, Tony Nguyen, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
	Sasha Neftin, intel-wired-lan, netdev

On Sun, Apr 14, 2024 at 11:15:26AM +0200, Kurt Kanzenbach wrote:
> Perfect. I was wondering why you are not submitting the patch
> yourself. Then, please go ahead and submit the patch.

Here you go:

https://lore.kernel.org/netdev/2f1be6b1cf2b3346929b0049f2ac7d7d79acb5c9.1713188539.git.lukas@wunner.de/

You may want to double-check that it fixes the issue and doesn't
cause any new ones.

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-15 11:02     ` Kurt Kanzenbach
@ 2024-04-15 13:54       ` Lukas Wunner
  2024-04-15 14:00         ` Kurt Kanzenbach
  0 siblings, 1 reply; 7+ messages in thread
From: Lukas Wunner @ 2024-04-15 13:54 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Roman Lozko, Jesse Brandeburg, Tony Nguyen, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
	Sasha Neftin, intel-wired-lan, netdev

On Mon, Apr 15, 2024 at 01:02:14PM +0200, Kurt Kanzenbach wrote:
> > > I would have been happy to submit a patch myself, I was waiting
> > > for a Tested-by from Roman or you.
> >
> > Perfect. I was wondering why you are not submitting the patch
> > yourself. Then, please go ahead and submit the patch. Feel free to add
> > my Tested-by.
> 
> Scratch that. I've sent v2 with your SoB. PTAL, because your original
> code snippet didn't have a SoB.
> 
> https://lore.kernel.org/netdev/20240411-igc_led_deadlock-v2-1-b758c0c88b2b@linutronix.de/

I created a patch yesterday, as you've requested, then waited for 0-day
to crunch through it and report success.  Which it just did, so here's
my proposal and I guess maintainers now have more than enough options
to choose from:

https://lore.kernel.org/netdev/2f1be6b1cf2b3346929b0049f2ac7d7d79acb5c9.1713188539.git.lukas@wunner.de/

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH iwl-net] igc: Fix deadlock on module removal
  2024-04-15 13:54       ` Lukas Wunner
@ 2024-04-15 14:00         ` Kurt Kanzenbach
  0 siblings, 0 replies; 7+ messages in thread
From: Kurt Kanzenbach @ 2024-04-15 14:00 UTC (permalink / raw)
  To: Lukas Wunner
  Cc: Roman Lozko, Jesse Brandeburg, Tony Nguyen, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
	Sasha Neftin, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 1253 bytes --]

Hi Lukas,

On Mon Apr 15 2024, Lukas Wunner wrote:
> On Mon, Apr 15, 2024 at 01:02:14PM +0200, Kurt Kanzenbach wrote:
>> > > I would have been happy to submit a patch myself, I was waiting
>> > > for a Tested-by from Roman or you.
>> >
>> > Perfect. I was wondering why you are not submitting the patch
>> > yourself. Then, please go ahead and submit the patch. Feel free to add
>> > my Tested-by.
>> 
>> Scratch that. I've sent v2 with your SoB. PTAL, because your original
>> code snippet didn't have a SoB.
>> 
>> https://lore.kernel.org/netdev/20240411-igc_led_deadlock-v2-1-b758c0c88b2b@linutronix.de/
>
> I created a patch yesterday, as you've requested, then waited for 0-day
> to crunch through it and report success.  Which it just did, so here's
> my proposal and I guess maintainers now have more than enough options
> to choose from:
>
> https://lore.kernel.org/netdev/2f1be6b1cf2b3346929b0049f2ac7d7d79acb5c9.1713188539.git.lukas@wunner.de/

Yes. And sorry for being a bit unresponsive, but i was out-of-office for
the last couple of weeks. Anyway, thank you for your help in debugging
this!

Regarding testing, it worked on my test machines and the Intel
maintainers will validate it as well.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-15 14:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-14  7:44 [PATCH iwl-net] igc: Fix deadlock on module removal Kurt Kanzenbach
2024-04-14  9:02 ` Lukas Wunner
2024-04-14  9:15   ` Kurt Kanzenbach
2024-04-15 11:02     ` Kurt Kanzenbach
2024-04-15 13:54       ` Lukas Wunner
2024-04-15 14:00         ` Kurt Kanzenbach
2024-04-15 13:51     ` Lukas Wunner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).