Linux PCI subsystem development
 help / color / mirror / Atom feed
* [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
@ 2025-04-05 14:54 Hans Zhang
  2025-04-05 15:28 ` Bjorn Helgaas
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 14:54 UTC (permalink / raw)
  To: lpieralisi
  Cc: manivannan.sadhasivam, thierry.reding, kw, robh, bhelgaas,
	jonathanh, linux-pci, linux-kernel, linux-tegra, Hans Zhang

When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
debugfs_remove_recursive(), leading to potential NULL pointer operations.

Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
stubbed for !CONFIG_PCIEASPM. Use this function during removal/shutdown to
ensure debugfs cleanup only occurs when entries were initialized.

This prevents kernel warnings and instability when ASPM support is
disabled.

Signed-off-by: Hans Zhang <18255117159@163.com>
---
 drivers/pci/controller/dwc/pcie-tegra194.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index 5103995cd6c7..d762e733c2d8 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -716,11 +716,20 @@ static void init_debugfs(struct tegra_pcie_dw *pcie)
 	debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt", pcie->debugfs,
 				    aspm_state_cnt);
 }
+
+static void deinit_debugfs(struct tegra_pcie_dw *pcie)
+{
+	if (!pcie->debugfs)
+		return;
+
+	debugfs_remove_recursive(pcie->debugfs);
+}
 #else
 static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
 static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
 static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
 static inline void init_debugfs(struct tegra_pcie_dw *pcie) { return; }
+static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
 #endif
 
 static void tegra_pcie_enable_system_interrupts(struct dw_pcie_rp *pp)
@@ -2289,7 +2298,7 @@ static void tegra_pcie_dw_remove(struct platform_device *pdev)
 		if (!pcie->link_state)
 			return;
 
-		debugfs_remove_recursive(pcie->debugfs);
+		deinit_debugfs(pcie->debugfs);
 		tegra_pcie_deinit_controller(pcie);
 		pm_runtime_put_sync(pcie->dev);
 	} else {
@@ -2408,7 +2417,7 @@ static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
 		if (!pcie->link_state)
 			return;
 
-		debugfs_remove_recursive(pcie->debugfs);
+		deinit_debugfs(pcie->debugfs);
 		tegra_pcie_downstream_dev_to_D0(pcie);
 
 		disable_irq(pcie->pci.pp.irq);

base-commit: a8662bcd2ff152bfbc751cab20f33053d74d0963
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 14:54 [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM Hans Zhang
@ 2025-04-05 15:28 ` Bjorn Helgaas
  2025-04-05 15:49   ` Hans Zhang
  2025-04-05 16:14 ` Christophe JAILLET
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Bjorn Helgaas @ 2025-04-05 15:28 UTC (permalink / raw)
  To: Hans Zhang
  Cc: lpieralisi, manivannan.sadhasivam, thierry.reding, kw, robh,
	bhelgaas, jonathanh, linux-pci, linux-kernel, linux-tegra

Follow subject line capitalization convention.

On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
> debugfs_remove_recursive(), leading to potential NULL pointer operations.
> 
> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
> stubbed for !CONFIG_PCIEASPM. Use this function during removal/shutdown to
> ensure debugfs cleanup only occurs when entries were initialized.
> 
> This prevents kernel warnings and instability when ASPM support is
> disabled.

This looks like there should be a Fixes: tag to connect this to the
commit that introduced the problem.

If this is something that broke with the v6.15 merge window, we should
include this in v6.15 via pci/for-linus.  If this broke earlier, we
would have to decide whether pci/for-linus is still appropriate or a
stable tag.

We did merge some debugfs things for v6.15, but I don't see anything
specific to pcie-tegra194.c, so I'm confused about why this fix would
be in pcie-tegra194.c instead of some more generic place.

> Signed-off-by: Hans Zhang <18255117159@163.com>
> ---
>  drivers/pci/controller/dwc/pcie-tegra194.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
> index 5103995cd6c7..d762e733c2d8 100644
> --- a/drivers/pci/controller/dwc/pcie-tegra194.c
> +++ b/drivers/pci/controller/dwc/pcie-tegra194.c
> @@ -716,11 +716,20 @@ static void init_debugfs(struct tegra_pcie_dw *pcie)
>  	debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt", pcie->debugfs,
>  				    aspm_state_cnt);
>  }
> +
> +static void deinit_debugfs(struct tegra_pcie_dw *pcie)
> +{
> +	if (!pcie->debugfs)
> +		return;
> +
> +	debugfs_remove_recursive(pcie->debugfs);
> +}
>  #else
>  static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
>  static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
>  static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
>  static inline void init_debugfs(struct tegra_pcie_dw *pcie) { return; }
> +static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
>  #endif
>  
>  static void tegra_pcie_enable_system_interrupts(struct dw_pcie_rp *pp)
> @@ -2289,7 +2298,7 @@ static void tegra_pcie_dw_remove(struct platform_device *pdev)
>  		if (!pcie->link_state)
>  			return;
>  
> -		debugfs_remove_recursive(pcie->debugfs);
> +		deinit_debugfs(pcie->debugfs);
>  		tegra_pcie_deinit_controller(pcie);
>  		pm_runtime_put_sync(pcie->dev);
>  	} else {
> @@ -2408,7 +2417,7 @@ static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
>  		if (!pcie->link_state)
>  			return;
>  
> -		debugfs_remove_recursive(pcie->debugfs);
> +		deinit_debugfs(pcie->debugfs);
>  		tegra_pcie_downstream_dev_to_D0(pcie);
>  
>  		disable_irq(pcie->pci.pp.irq);
> 
> base-commit: a8662bcd2ff152bfbc751cab20f33053d74d0963
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 15:28 ` Bjorn Helgaas
@ 2025-04-05 15:49   ` Hans Zhang
  2025-04-05 16:17     ` Christophe JAILLET
  0 siblings, 1 reply; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 15:49 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: lpieralisi, manivannan.sadhasivam, thierry.reding, kw, robh,
	bhelgaas, jonathanh, linux-pci, linux-kernel, linux-tegra



On 2025/4/5 23:28, Bjorn Helgaas wrote:
> Follow subject line capitalization convention.
> 
> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
>> debugfs_remove_recursive(), leading to potential NULL pointer operations.
>>
>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/shutdown to
>> ensure debugfs cleanup only occurs when entries were initialized.
>>
>> This prevents kernel warnings and instability when ASPM support is
>> disabled.
> 
> This looks like there should be a Fixes: tag to connect this to the
> commit that introduced the problem.

Hi Bjorn,

Thanks your for reply. Will add.

Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for Endpoint 
mode)

> 
> If this is something that broke with the v6.15 merge window, we should
> include this in v6.15 via pci/for-linus.  If this broke earlier, we
> would have to decide whether pci/for-linus is still appropriate or a
> stable tag.
> 

The original code that introduced the unconditional 
`debugfs_remove_recursive()` calls was actually merged in an earlier cycle.

> We did merge some debugfs things for v6.15, but I don't see anything
> specific to pcie-tegra194.c, so I'm confused about why this fix would
> be in pcie-tegra194.c instead of some more generic place.
> 

The Tegra194 driver conditionally initializes pcie->debugfs based on 
CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
uninitialized, but tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() 
unconditionally call debugfs_remove_recursive(), leading to a NULL 
pointer dereference. This is specific to the Tegra194 implementation, as 
other drivers or core PCI code may already guard debugfs cleanup against 
uninitialized states through different mechanisms.

Best regards,
Hans


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 14:54 [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM Hans Zhang
  2025-04-05 15:28 ` Bjorn Helgaas
@ 2025-04-05 16:14 ` Christophe JAILLET
  2025-04-05 16:41   ` Hans Zhang
  2025-04-06  0:48 ` kernel test robot
  2025-04-06  2:10 ` kernel test robot
  3 siblings, 1 reply; 12+ messages in thread
From: Christophe JAILLET @ 2025-04-05 16:14 UTC (permalink / raw)
  To: 18255117159
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding

Le 05/04/2025 à 16:54, Hans Zhang a écrit :
> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
> debugfs_remove_recursive(), leading to potential NULL pointer operations.
> 
> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
> stubbed for !CONFIG_PCIEASPM. Use this function during removal/shutdown to
> ensure debugfs cleanup only occurs when entries were initialized.
> 
> This prevents kernel warnings and instability when ASPM support is
> disabled.
> 

Could you elaborate?


debugfs_remove_recursive() ends either to:

static inline void debugfs_remove(struct dentry *dentry)
{ }
if CONFIG_DEBUG_FS is not set,

or
to a function which starts with:
	if (IS_ERR_OR_NULL(dentry))
		return;
if it is set.


So what does this new deinit_debugfs() add?


Which NULL pointer are you seeing?
Did you actually manage to trigger it?

CJ



> Signed-off-by: Hans Zhang <18255117159-9Onoh4P/yGk@public.gmane.org>
> ---
>   drivers/pci/controller/dwc/pcie-tegra194.c | 13 +++++++++++--
>   1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
> index 5103995cd6c7..d762e733c2d8 100644
> --- a/drivers/pci/controller/dwc/pcie-tegra194.c
> +++ b/drivers/pci/controller/dwc/pcie-tegra194.c
> @@ -716,11 +716,20 @@ static void init_debugfs(struct tegra_pcie_dw *pcie)
>   	debugfs_create_devm_seqfile(pcie->dev, "aspm_state_cnt", pcie->debugfs,
>   				    aspm_state_cnt);
>   }
> +
> +static void deinit_debugfs(struct tegra_pcie_dw *pcie)
> +{
> +	if (!pcie->debugfs)
> +		return;
> +
> +	debugfs_remove_recursive(pcie->debugfs);
> +}
>   #else
>   static inline void disable_aspm_l12(struct tegra_pcie_dw *pcie) { return; }
>   static inline void disable_aspm_l11(struct tegra_pcie_dw *pcie) { return; }
>   static inline void init_host_aspm(struct tegra_pcie_dw *pcie) { return; }
>   static inline void init_debugfs(struct tegra_pcie_dw *pcie) { return; }
> +static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
>   #endif
>   
>   static void tegra_pcie_enable_system_interrupts(struct dw_pcie_rp *pp)
> @@ -2289,7 +2298,7 @@ static void tegra_pcie_dw_remove(struct platform_device *pdev)
>   		if (!pcie->link_state)
>   			return;
>   
> -		debugfs_remove_recursive(pcie->debugfs);
> +		deinit_debugfs(pcie->debugfs);
>   		tegra_pcie_deinit_controller(pcie);
>   		pm_runtime_put_sync(pcie->dev);
>   	} else {
> @@ -2408,7 +2417,7 @@ static void tegra_pcie_dw_shutdown(struct platform_device *pdev)
>   		if (!pcie->link_state)
>   			return;
>   
> -		debugfs_remove_recursive(pcie->debugfs);
> +		deinit_debugfs(pcie->debugfs);
>   		tegra_pcie_downstream_dev_to_D0(pcie);
>   
>   		disable_irq(pcie->pci.pp.irq);
> 
> base-commit: a8662bcd2ff152bfbc751cab20f33053d74d0963


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 15:49   ` Hans Zhang
@ 2025-04-05 16:17     ` Christophe JAILLET
  2025-04-05 16:35       ` Hans Zhang
  0 siblings, 1 reply; 12+ messages in thread
From: Christophe JAILLET @ 2025-04-05 16:17 UTC (permalink / raw)
  To: 18255117159
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding

Le 05/04/2025 à 17:49, Hans Zhang a écrit :
> 
> 
> On 2025/4/5 23:28, Bjorn Helgaas wrote:
>> Follow subject line capitalization convention.
>>
>> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
>>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
>>> debugfs_remove_recursive(), leading to potential NULL pointer 
>>> operations.
>>>
>>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
>>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/ 
>>> shutdown to
>>> ensure debugfs cleanup only occurs when entries were initialized.
>>>
>>> This prevents kernel warnings and instability when ASPM support is
>>> disabled.
>>
>> This looks like there should be a Fixes: tag to connect this to the
>> commit that introduced the problem.
> 
> Hi Bjorn,
> 
> Thanks your for reply. Will add.
> 
> Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for Endpoint 
> mode)
> 
>>
>> If this is something that broke with the v6.15 merge window, we should
>> include this in v6.15 via pci/for-linus.  If this broke earlier, we
>> would have to decide whether pci/for-linus is still appropriate or a
>> stable tag.
>>
> 
> The original code that introduced the unconditional 
> `debugfs_remove_recursive()` calls was actually merged in an earlier cycle.
> 
>> We did merge some debugfs things for v6.15, but I don't see anything
>> specific to pcie-tegra194.c, so I'm confused about why this fix would
>> be in pcie-tegra194.c instead of some more generic place.
>>
> 
> The Tegra194 driver conditionally initializes pcie->debugfs based on 
> CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
> uninitialized, but tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() 
> unconditionally call debugfs_remove_recursive(), leading to a NULL 

debugfs IS initialized, because it is in a structure allocated with 
devm_kzalloc().

And debugfs functions handle such cases.

CJ

> pointer dereference. This is specific to the Tegra194 implementation, as 
> other drivers or core PCI code may already guard debugfs cleanup against 
> uninitialized states through different mechanisms.
> 
> Best regards,
> Hans
> 
> 
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 16:17     ` Christophe JAILLET
@ 2025-04-05 16:35       ` Hans Zhang
  2025-04-05 16:47         ` Hans Zhang
  0 siblings, 1 reply; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 16:35 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding



On 2025/4/6 00:17, Christophe JAILLET wrote:
> Le 05/04/2025 à 17:49, Hans Zhang a écrit :
>>
>>
>> On 2025/4/5 23:28, Bjorn Helgaas wrote:
>>> Follow subject line capitalization convention.
>>>
>>> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>>>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
>>>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally 
>>>> call
>>>> debugfs_remove_recursive(), leading to potential NULL pointer 
>>>> operations.
>>>>
>>>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
>>>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/ 
>>>> shutdown to
>>>> ensure debugfs cleanup only occurs when entries were initialized.
>>>>
>>>> This prevents kernel warnings and instability when ASPM support is
>>>> disabled.
>>>
>>> This looks like there should be a Fixes: tag to connect this to the
>>> commit that introduced the problem.
>>
>> Hi Bjorn,
>>
>> Thanks your for reply. Will add.
>>
>> Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for 
>> Endpoint mode)
>>
>>>
>>> If this is something that broke with the v6.15 merge window, we should
>>> include this in v6.15 via pci/for-linus.  If this broke earlier, we
>>> would have to decide whether pci/for-linus is still appropriate or a
>>> stable tag.
>>>
>>
>> The original code that introduced the unconditional 
>> `debugfs_remove_recursive()` calls was actually merged in an earlier 
>> cycle.
>>
>>> We did merge some debugfs things for v6.15, but I don't see anything
>>> specific to pcie-tegra194.c, so I'm confused about why this fix would
>>> be in pcie-tegra194.c instead of some more generic place.
>>>
>>
>> The Tegra194 driver conditionally initializes pcie->debugfs based on 
>> CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
>> uninitialized, but tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() 
>> unconditionally call debugfs_remove_recursive(), leading to a NULL 
> 
> debugfs IS initialized, because it is in a structure allocated with 
> devm_kzalloc().
> 
> And debugfs functions handle such cases.
> 

Oh, my mind went wrong and I didn't pay attention to devm, and I'm 
really sorry about that.

Another problem I noticed here is that currently, no matter what, 
pcie->debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
defined(CONFIG_PCIEASPM) is valid, then pcie->debugfs = 
debugfs_create_dir(name, NULL); Is it superfluous?

Best regards,
Hans


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 16:14 ` Christophe JAILLET
@ 2025-04-05 16:41   ` Hans Zhang
  0 siblings, 0 replies; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 16:41 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding



On 2025/4/6 00:14, Christophe JAILLET wrote:
> Le 05/04/2025 à 16:54, Hans Zhang a écrit :
>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally call
>> debugfs_remove_recursive(), leading to potential NULL pointer operations.
>>
>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), which is
>> stubbed for !CONFIG_PCIEASPM. Use this function during 
>> removal/shutdown to
>> ensure debugfs cleanup only occurs when entries were initialized.
>>
>> This prevents kernel warnings and instability when ASPM support is
>> disabled.
>>
> 
> Could you elaborate?
> 
> 
> debugfs_remove_recursive() ends either to:
> 
> static inline void debugfs_remove(struct dentry *dentry)
> { }
> if CONFIG_DEBUG_FS is not set,
> 
> or
> to a function which starts with:
>      if (IS_ERR_OR_NULL(dentry))
>          return;
> if it is set.
> 
> 
> So what does this new deinit_debugfs() add?
> 
> 
> Which NULL pointer are you seeing?
> Did you actually manage to trigger it?
> 


Hi Christophe,

You're right, and I'm sorry about that.

The following line of code only makes sense if the #if 
defined(CONFIG_PCIEASPM) condition holds. Do we need to optimize this?

pcie->debugfs = debugfs_create_dir(name, NULL);

Best regards,
Hans


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 16:35       ` Hans Zhang
@ 2025-04-05 16:47         ` Hans Zhang
  2025-04-05 17:04           ` Christophe JAILLET
  0 siblings, 1 reply; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 16:47 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding



On 2025/4/6 00:35, Hans Zhang wrote:
> 
> 
> On 2025/4/6 00:17, Christophe JAILLET wrote:
>> Le 05/04/2025 à 17:49, Hans Zhang a écrit :
>>>
>>>
>>> On 2025/4/5 23:28, Bjorn Helgaas wrote:
>>>> Follow subject line capitalization convention.
>>>>
>>>> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>>>>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, but
>>>>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() unconditionally 
>>>>> call
>>>>> debugfs_remove_recursive(), leading to potential NULL pointer 
>>>>> operations.
>>>>>
>>>>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), 
>>>>> which is
>>>>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/ 
>>>>> shutdown to
>>>>> ensure debugfs cleanup only occurs when entries were initialized.
>>>>>
>>>>> This prevents kernel warnings and instability when ASPM support is
>>>>> disabled.
>>>>
>>>> This looks like there should be a Fixes: tag to connect this to the
>>>> commit that introduced the problem.
>>>
>>> Hi Bjorn,
>>>
>>> Thanks your for reply. Will add.
>>>
>>> Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for 
>>> Endpoint mode)
>>>
>>>>
>>>> If this is something that broke with the v6.15 merge window, we should
>>>> include this in v6.15 via pci/for-linus.  If this broke earlier, we
>>>> would have to decide whether pci/for-linus is still appropriate or a
>>>> stable tag.
>>>>
>>>
>>> The original code that introduced the unconditional 
>>> `debugfs_remove_recursive()` calls was actually merged in an earlier 
>>> cycle.
>>>
>>>> We did merge some debugfs things for v6.15, but I don't see anything
>>>> specific to pcie-tegra194.c, so I'm confused about why this fix would
>>>> be in pcie-tegra194.c instead of some more generic place.
>>>>
>>>
>>> The Tegra194 driver conditionally initializes pcie->debugfs based on 
>>> CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
>>> uninitialized, but tegra_pcie_dw_remove() and 
>>> tegra_pcie_dw_shutdown() unconditionally call 
>>> debugfs_remove_recursive(), leading to a NULL 
>>
>> debugfs IS initialized, because it is in a structure allocated with 
>> devm_kzalloc().
>>
>> And debugfs functions handle such cases.
>>
> 
> Oh, my mind went wrong and I didn't pay attention to devm, and I'm 
> really sorry about that.
> 
> Another problem I noticed here is that currently, no matter what, 
> pcie->debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
> defined(CONFIG_PCIEASPM) is valid, then pcie->debugfs = 
> debugfs_create_dir(name, NULL); Is it superfluous?

Sorry, let me reply again:
Another problem I noticed here is that currently, no matter what, 
pcie->debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
defined(CONFIG_PCIEASPM) is invalid, then pcie->debugfs = 
debugfs_create_dir(name, NULL); Is it superfluous?

Best regards,
Hans


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 16:47         ` Hans Zhang
@ 2025-04-05 17:04           ` Christophe JAILLET
  2025-04-05 17:10             ` Hans Zhang
  0 siblings, 1 reply; 12+ messages in thread
From: Christophe JAILLET @ 2025-04-05 17:04 UTC (permalink / raw)
  To: 18255117159
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding

Le 05/04/2025 à 18:47, Hans Zhang a écrit :
> 
> 
> On 2025/4/6 00:35, Hans Zhang wrote:
>>
>>
>> On 2025/4/6 00:17, Christophe JAILLET wrote:
>>> Le 05/04/2025 à 17:49, Hans Zhang a écrit :
>>>>
>>>>
>>>> On 2025/4/5 23:28, Bjorn Helgaas wrote:
>>>>> Follow subject line capitalization convention.
>>>>>
>>>>> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>>>>>> When CONFIG_PCIEASPM is disabled, debugfs entries are not created, 
>>>>>> but
>>>>>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() 
>>>>>> unconditionally call
>>>>>> debugfs_remove_recursive(), leading to potential NULL pointer 
>>>>>> operations.
>>>>>>
>>>>>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), 
>>>>>> which is
>>>>>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/ 
>>>>>> shutdown to
>>>>>> ensure debugfs cleanup only occurs when entries were initialized.
>>>>>>
>>>>>> This prevents kernel warnings and instability when ASPM support is
>>>>>> disabled.
>>>>>
>>>>> This looks like there should be a Fixes: tag to connect this to the
>>>>> commit that introduced the problem.
>>>>
>>>> Hi Bjorn,
>>>>
>>>> Thanks your for reply. Will add.
>>>>
>>>> Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for 
>>>> Endpoint mode)
>>>>
>>>>>
>>>>> If this is something that broke with the v6.15 merge window, we should
>>>>> include this in v6.15 via pci/for-linus.  If this broke earlier, we
>>>>> would have to decide whether pci/for-linus is still appropriate or a
>>>>> stable tag.
>>>>>
>>>>
>>>> The original code that introduced the unconditional 
>>>> `debugfs_remove_recursive()` calls was actually merged in an earlier 
>>>> cycle.
>>>>
>>>>> We did merge some debugfs things for v6.15, but I don't see anything
>>>>> specific to pcie-tegra194.c, so I'm confused about why this fix would
>>>>> be in pcie-tegra194.c instead of some more generic place.
>>>>>
>>>>
>>>> The Tegra194 driver conditionally initializes pcie->debugfs based on 
>>>> CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
>>>> uninitialized, but tegra_pcie_dw_remove() and 
>>>> tegra_pcie_dw_shutdown() unconditionally call 
>>>> debugfs_remove_recursive(), leading to a NULL 
>>>
>>> debugfs IS initialized, because it is in a structure allocated with 
>>> devm_kzalloc().
>>>
>>> And debugfs functions handle such cases.
>>>
>>
>> Oh, my mind went wrong and I didn't pay attention to devm, and I'm 

Here, what is relevant in devm_kzalloc() is not devm but the kzalloc 
part. the "z" is for zeroing the allocated memory.

>> really sorry about that.
>>
>> Another problem I noticed here is that currently, no matter what, 
>> pcie->debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
>> defined(CONFIG_PCIEASPM) is valid, then pcie->debugfs = 
>> debugfs_create_dir(name, NULL); Is it superfluous?
> 
> Sorry, let me reply again:
> Another problem I noticed here is that currently, no matter what, pcie- 
>  >debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
> defined(CONFIG_PCIEASPM) is invalid, then pcie->debugfs = 
> debugfs_create_dir(name, NULL); Is it superfluous?

AFAICT, it looks useless in this case.

I guess that moving debugfs_create_dir() and the
name = devm_kasprintf()...
above it, into init_debugfs() would do the trick.

CJ

> 
> Best regards,
> Hans
> 
> 
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 17:04           ` Christophe JAILLET
@ 2025-04-05 17:10             ` Hans Zhang
  0 siblings, 0 replies; 12+ messages in thread
From: Hans Zhang @ 2025-04-05 17:10 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: bhelgaas, jonathanh, kw, linux-kernel, linux-pci, linux-tegra,
	lpieralisi, manivannan.sadhasivam, robh, thierry.reding



On 2025/4/6 01:04, Christophe JAILLET wrote:
> Le 05/04/2025 à 18:47, Hans Zhang a écrit :
>>
>>
>> On 2025/4/6 00:35, Hans Zhang wrote:
>>>
>>>
>>> On 2025/4/6 00:17, Christophe JAILLET wrote:
>>>> Le 05/04/2025 à 17:49, Hans Zhang a écrit :
>>>>>
>>>>>
>>>>> On 2025/4/5 23:28, Bjorn Helgaas wrote:
>>>>>> Follow subject line capitalization convention.
>>>>>>
>>>>>> On Sat, Apr 05, 2025 at 10:54:59PM +0800, Hans Zhang wrote:
>>>>>>> When CONFIG_PCIEASPM is disabled, debugfs entries are not 
>>>>>>> created, but
>>>>>>> tegra_pcie_dw_remove() and tegra_pcie_dw_shutdown() 
>>>>>>> unconditionally call
>>>>>>> debugfs_remove_recursive(), leading to potential NULL pointer 
>>>>>>> operations.
>>>>>>>
>>>>>>> Introduce deinit_debugfs() to wrap debugfs_remove_recursive(), 
>>>>>>> which is
>>>>>>> stubbed for !CONFIG_PCIEASPM. Use this function during removal/ 
>>>>>>> shutdown to
>>>>>>> ensure debugfs cleanup only occurs when entries were initialized.
>>>>>>>
>>>>>>> This prevents kernel warnings and instability when ASPM support is
>>>>>>> disabled.
>>>>>>
>>>>>> This looks like there should be a Fixes: tag to connect this to the
>>>>>> commit that introduced the problem.
>>>>>
>>>>> Hi Bjorn,
>>>>>
>>>>> Thanks your for reply. Will add.
>>>>>
>>>>> Fixes: bb617cbd8151 (PCI: tegra194: Clean up the exit path for 
>>>>> Endpoint mode)
>>>>>
>>>>>>
>>>>>> If this is something that broke with the v6.15 merge window, we 
>>>>>> should
>>>>>> include this in v6.15 via pci/for-linus.  If this broke earlier, we
>>>>>> would have to decide whether pci/for-linus is still appropriate or a
>>>>>> stable tag.
>>>>>>
>>>>>
>>>>> The original code that introduced the unconditional 
>>>>> `debugfs_remove_recursive()` calls was actually merged in an 
>>>>> earlier cycle.
>>>>>
>>>>>> We did merge some debugfs things for v6.15, but I don't see anything
>>>>>> specific to pcie-tegra194.c, so I'm confused about why this fix would
>>>>>> be in pcie-tegra194.c instead of some more generic place.
>>>>>>
>>>>>
>>>>> The Tegra194 driver conditionally initializes pcie->debugfs based 
>>>>> on CONFIG_PCIEASPM. When ASPM is disabled, pcie->debugfs remains 
>>>>> uninitialized, but tegra_pcie_dw_remove() and 
>>>>> tegra_pcie_dw_shutdown() unconditionally call 
>>>>> debugfs_remove_recursive(), leading to a NULL 
>>>>
>>>> debugfs IS initialized, because it is in a structure allocated with 
>>>> devm_kzalloc().
>>>>
>>>> And debugfs functions handle such cases.
>>>>
>>>
>>> Oh, my mind went wrong and I didn't pay attention to devm, and I'm 
> 
> Here, what is relevant in devm_kzalloc() is not devm but the kzalloc 
> part. the "z" is for zeroing the allocated memory.
> 

Hi Christophe,

Thanks your for reply. I understand.

>>> really sorry about that.
>>>
>>> Another problem I noticed here is that currently, no matter what, 
>>> pcie->debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
>>> defined(CONFIG_PCIEASPM) is valid, then pcie->debugfs = 
>>> debugfs_create_dir(name, NULL); Is it superfluous?
>>
>> Sorry, let me reply again:
>> Another problem I noticed here is that currently, no matter what, 
>> pcie-  >debugfs = debugfs_create_dir(name, NULL) is executed; if #if 
>> defined(CONFIG_PCIEASPM) is invalid, then pcie->debugfs = 
>> debugfs_create_dir(name, NULL); Is it superfluous?
> 
> AFAICT, it looks useless in this case.
> 
> I guess that moving debugfs_create_dir() and the
> name = devm_kasprintf()...
> above it, into init_debugfs() would do the trick.

That's what I was thinking. I will submit the version again in the future.

Best regards,
Hans


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 14:54 [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM Hans Zhang
  2025-04-05 15:28 ` Bjorn Helgaas
  2025-04-05 16:14 ` Christophe JAILLET
@ 2025-04-06  0:48 ` kernel test robot
  2025-04-06  2:10 ` kernel test robot
  3 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2025-04-06  0:48 UTC (permalink / raw)
  To: Hans Zhang, lpieralisi
  Cc: oe-kbuild-all, manivannan.sadhasivam, thierry.reding, kw, robh,
	bhelgaas, jonathanh, linux-pci, linux-kernel, linux-tegra,
	Hans Zhang

Hi Hans,

kernel test robot noticed the following build errors:

[auto build test ERROR on a8662bcd2ff152bfbc751cab20f33053d74d0963]

url:    https://github.com/intel-lab-lkp/linux/commits/Hans-Zhang/pci-tegra194-Fix-debugfs-cleanup-for-CONFIG_PCIEASPM/20250405-230047
base:   a8662bcd2ff152bfbc751cab20f33053d74d0963
patch link:    https://lore.kernel.org/r/20250405145459.26800-1-18255117159%40163.com
patch subject: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
config: arm-randconfig-002-20250406 (https://download.01.org/0day-ci/archive/20250406/202504060814.4HRuwajf-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250406/202504060814.4HRuwajf-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202504060814.4HRuwajf-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/pci/controller/dwc/pcie-tegra194.c: In function 'tegra_pcie_dw_remove':
>> drivers/pci/controller/dwc/pcie-tegra194.c:2301:18: error: passing argument 1 of 'deinit_debugfs' from incompatible pointer type [-Werror=incompatible-pointer-types]
      deinit_debugfs(pcie->debugfs);
                     ^~~~
   drivers/pci/controller/dwc/pcie-tegra194.c:732:20: note: expected 'struct tegra_pcie_dw *' but argument is of type 'struct dentry *'
    static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
                       ^~~~~~~~~~~~~~
   drivers/pci/controller/dwc/pcie-tegra194.c: In function 'tegra_pcie_dw_shutdown':
   drivers/pci/controller/dwc/pcie-tegra194.c:2420:18: error: passing argument 1 of 'deinit_debugfs' from incompatible pointer type [-Werror=incompatible-pointer-types]
      deinit_debugfs(pcie->debugfs);
                     ^~~~
   drivers/pci/controller/dwc/pcie-tegra194.c:732:20: note: expected 'struct tegra_pcie_dw *' but argument is of type 'struct dentry *'
    static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
                       ^~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/deinit_debugfs +2301 drivers/pci/controller/dwc/pcie-tegra194.c

  2292	
  2293	static void tegra_pcie_dw_remove(struct platform_device *pdev)
  2294	{
  2295		struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
  2296	
  2297		if (pcie->of_data->mode == DW_PCIE_RC_TYPE) {
  2298			if (!pcie->link_state)
  2299				return;
  2300	
> 2301			deinit_debugfs(pcie->debugfs);
  2302			tegra_pcie_deinit_controller(pcie);
  2303			pm_runtime_put_sync(pcie->dev);
  2304		} else {
  2305			disable_irq(pcie->pex_rst_irq);
  2306			pex_ep_event_pex_rst_assert(pcie);
  2307		}
  2308	
  2309		pm_runtime_disable(pcie->dev);
  2310		tegra_bpmp_put(pcie->bpmp);
  2311		if (pcie->pex_refclk_sel_gpiod)
  2312			gpiod_set_value(pcie->pex_refclk_sel_gpiod, 0);
  2313	}
  2314	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
  2025-04-05 14:54 [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM Hans Zhang
                   ` (2 preceding siblings ...)
  2025-04-06  0:48 ` kernel test robot
@ 2025-04-06  2:10 ` kernel test robot
  3 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2025-04-06  2:10 UTC (permalink / raw)
  To: Hans Zhang, lpieralisi
  Cc: llvm, oe-kbuild-all, manivannan.sadhasivam, thierry.reding, kw,
	robh, bhelgaas, jonathanh, linux-pci, linux-kernel, linux-tegra,
	Hans Zhang

Hi Hans,

kernel test robot noticed the following build errors:

[auto build test ERROR on a8662bcd2ff152bfbc751cab20f33053d74d0963]

url:    https://github.com/intel-lab-lkp/linux/commits/Hans-Zhang/pci-tegra194-Fix-debugfs-cleanup-for-CONFIG_PCIEASPM/20250405-230047
base:   a8662bcd2ff152bfbc751cab20f33053d74d0963
patch link:    https://lore.kernel.org/r/20250405145459.26800-1-18255117159%40163.com
patch subject: [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM
config: arm-randconfig-001-20250406 (https://download.01.org/0day-ci/archive/20250406/202504060938.xa7VrE6O-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 92c93f5286b9ff33f27ff694d2dc33da1c07afdd)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250406/202504060938.xa7VrE6O-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202504060938.xa7VrE6O-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/pci/controller/dwc/pcie-tegra194.c:2301:18: error: incompatible pointer types passing 'struct dentry *' to parameter of type 'struct tegra_pcie_dw *' [-Werror,-Wincompatible-pointer-types]
    2301 |                 deinit_debugfs(pcie->debugfs);
         |                                ^~~~~~~~~~~~~
   drivers/pci/controller/dwc/pcie-tegra194.c:732:57: note: passing argument to parameter 'pcie' here
     732 | static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
         |                                                         ^
   drivers/pci/controller/dwc/pcie-tegra194.c:2420:18: error: incompatible pointer types passing 'struct dentry *' to parameter of type 'struct tegra_pcie_dw *' [-Werror,-Wincompatible-pointer-types]
    2420 |                 deinit_debugfs(pcie->debugfs);
         |                                ^~~~~~~~~~~~~
   drivers/pci/controller/dwc/pcie-tegra194.c:732:57: note: passing argument to parameter 'pcie' here
     732 | static inline void deinit_debugfs(struct tegra_pcie_dw *pcie) { return; }
         |                                                         ^
   2 errors generated.


vim +2301 drivers/pci/controller/dwc/pcie-tegra194.c

  2292	
  2293	static void tegra_pcie_dw_remove(struct platform_device *pdev)
  2294	{
  2295		struct tegra_pcie_dw *pcie = platform_get_drvdata(pdev);
  2296	
  2297		if (pcie->of_data->mode == DW_PCIE_RC_TYPE) {
  2298			if (!pcie->link_state)
  2299				return;
  2300	
> 2301			deinit_debugfs(pcie->debugfs);
  2302			tegra_pcie_deinit_controller(pcie);
  2303			pm_runtime_put_sync(pcie->dev);
  2304		} else {
  2305			disable_irq(pcie->pex_rst_irq);
  2306			pex_ep_event_pex_rst_assert(pcie);
  2307		}
  2308	
  2309		pm_runtime_disable(pcie->dev);
  2310		tegra_bpmp_put(pcie->bpmp);
  2311		if (pcie->pex_refclk_sel_gpiod)
  2312			gpiod_set_value(pcie->pex_refclk_sel_gpiod, 0);
  2313	}
  2314	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-04-06  2:10 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-05 14:54 [PATCH] pci: tegra194: Fix debugfs cleanup for !CONFIG_PCIEASPM Hans Zhang
2025-04-05 15:28 ` Bjorn Helgaas
2025-04-05 15:49   ` Hans Zhang
2025-04-05 16:17     ` Christophe JAILLET
2025-04-05 16:35       ` Hans Zhang
2025-04-05 16:47         ` Hans Zhang
2025-04-05 17:04           ` Christophe JAILLET
2025-04-05 17:10             ` Hans Zhang
2025-04-05 16:14 ` Christophe JAILLET
2025-04-05 16:41   ` Hans Zhang
2025-04-06  0:48 ` kernel test robot
2025-04-06  2:10 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox