linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] r8169: hardening and stability improvements
@ 2025-08-30  7:30 Mohammad Amin Hosseini
  2025-08-30  8:29 ` Heiner Kallweit
  2025-08-30 14:44 ` Andrew Lunn
  0 siblings, 2 replies; 3+ messages in thread
From: Mohammad Amin Hosseini @ 2025-08-30  7:30 UTC (permalink / raw)
  To: hkallweit1, nic_swsd
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, netdev,
	linux-kernel, mohammad amin hosseini

From: mohammad amin hosseini <moahmmad.hosseinii@gmail.com>

This patch improves robustness and reliability of the r8169 driver. The
changes cover buffer management, interrupt handling, parameter validation,
and resource cleanup.

While the updates touch multiple areas, they are interdependent parts of a
cohesive hardening effort. Splitting them would leave intermediate states
with incomplete validation.

Key changes:
- Buffer handling: add packet length checks, NUMA-aware fallback allocation,
  descriptor zero-initialization, and memory barriers.
- Interrupt handling: fix return codes, selective NAPI scheduling, and
  improved SYSErr handling for RTL_GIGA_MAC_VER_52.
- Parameter validation: stricter RX/TX bounds checking and consistent
  error codes.
- Resource management: safer workqueue shutdown, proper clock lifecycle,
  WARN_ON for unexpected device states.
- Logging: use severity-appropriate levels, add rate limiting, and extend
  statistics tracking.

Testing:
- Kernel builds and module loads without warnings.
- Runtime tested in QEMU (rtl8139 emulation).
- Hardware validation requested from community due to lack of local device.

v2:
 - Resent using msmtp (fix whitespace damage)

Signed-off-by: Mohammad Amin Hosseini <moahmmad.hosseinii@gmail.com>
---
 drivers/net/ethernet/realtek/r8169_main.c | 150 ++++++++++++++++++----
 1 file changed, 123 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9c601f271c02..66d7dcd8bf7b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 	int node = dev_to_node(d);
 	dma_addr_t mapping;
 	struct page *data;
+	gfp_t gfp_flags = GFP_KERNEL;
 
-	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
-	if (!data)
-		return NULL;
+	/* Use atomic allocation in interrupt/atomic context */
+	if (in_atomic() || irqs_disabled())
+		gfp_flags = GFP_ATOMIC;
+
+	data = alloc_pages_node(node, gfp_flags, get_order(R8169_RX_BUF_SIZE));
+	if (unlikely(!data)) {
+		/* Try fallback allocation on any node if local node fails */
+		data = alloc_pages(gfp_flags | __GFP_NOWARN, get_order(R8169_RX_BUF_SIZE));
+		if (unlikely(!data)) {
+			if (net_ratelimit())
+				netdev_err(tp->dev, "Failed to allocate RX buffer\n");
+			return NULL;
+		}
+		
+		if (net_ratelimit())
+			netdev_warn(tp->dev, "Fallback allocation used for RX buffer\n");
+	}
 
 	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(d, mapping))) {
-		netdev_err(tp->dev, "Failed to map RX DMA!\n");
+		if (net_ratelimit())
+			netdev_err(tp->dev, "Failed to map RX DMA page\n");
 		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
 		return NULL;
 	}
 
 	desc->addr = cpu_to_le64(mapping);
+	desc->opts2 = 0;
+	
+	/* Ensure writes complete before marking descriptor ready */
+	wmb();
 	rtl8169_mark_to_asic(desc);
 
 	return data;
@@ -4150,11 +4170,30 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
 	u32 opts1;
 	int ret;
 
+	/* Validate parameters before DMA mapping */
+	if (unlikely(!addr)) {
+		if (net_ratelimit())
+			netdev_err(tp->dev, "TX mapping with NULL address\n");
+		return -EINVAL;
+	}
+	
+	if (unlikely(!len)) {
+		if (net_ratelimit())
+			netdev_err(tp->dev, "TX mapping with zero length\n");
+		return -EINVAL;
+	}
+	
+	if (unlikely(len > RTL_GSO_MAX_SIZE_V2)) {
+		if (net_ratelimit())
+			netdev_err(tp->dev, "TX length too large: %u\n", len);
+		return -EINVAL;
+	}
+
 	mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE);
 	ret = dma_mapping_error(d, mapping);
 	if (unlikely(ret)) {
 		if (net_ratelimit())
-			netdev_err(tp->dev, "Failed to map TX data!\n");
+			netdev_err(tp->dev, "Failed to map TX DMA: len=%u\n", len);
 		return ret;
 	}
 
@@ -4601,9 +4640,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		if (status & DescOwn)
 			break;
 
-		/* This barrier is needed to keep us from reading
-		 * any other fields out of the Rx descriptor until
-		 * we know the status of DescOwn
+		/* Ensure descriptor ownership check completes before accessing
+		 * other fields to prevent hardware race conditions
 		 */
 		dma_rmb();
 
@@ -4624,8 +4662,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		}
 
 		pkt_size = status & GENMASK(13, 0);
-		if (likely(!(dev->features & NETIF_F_RXFCS)))
-			pkt_size -= ETH_FCS_LEN;
+		
+		/* Validate packet size to prevent buffer overflows */
+		if (unlikely(pkt_size > R8169_RX_BUF_SIZE)) {
+			if (net_ratelimit())
+				netdev_warn(dev, "Oversized packet: %u bytes (status=0x%08x)\n",
+					    pkt_size, status);
+			dev->stats.rx_length_errors++;
+			goto release_descriptor;
+		}
+		
+		if (likely(!(dev->features & NETIF_F_RXFCS))) {
+			if (pkt_size >= ETH_FCS_LEN) {
+				pkt_size -= ETH_FCS_LEN;
+			} else {
+				if (net_ratelimit())
+					netdev_warn(dev, "Packet smaller than FCS: %u bytes (status=0x%08x)\n",
+						    pkt_size, status);
+				dev->stats.rx_length_errors++;
+				goto release_descriptor;
+			}
+		}
 
 		/* The driver does not support incoming fragmented frames.
 		 * They are seen as a symptom of over-mtu sized frames.
@@ -4674,26 +4731,44 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 {
 	struct rtl8169_private *tp = dev_instance;
 	u32 status = rtl_get_events(tp);
+	bool handled = false;
 
+	/* Check for invalid hardware state or no relevant interrupts */
 	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
 		return IRQ_NONE;
 
-	/* At least RTL8168fp may unexpectedly set the SYSErr bit */
-	if (unlikely(status & SYSErr &&
-	    tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
-		rtl8169_pcierr_interrupt(tp->dev);
+	/* Handle system errors based on chip version capabilities */
+	if (unlikely(status & SYSErr)) {
+		/* SYSErr handling for older chips and specific newer models
+		 * based on vendor documentation and observed behavior
+		 */
+		if (tp->mac_version <= RTL_GIGA_MAC_VER_06 || 
+		    tp->mac_version == RTL_GIGA_MAC_VER_52) {
+			rtl8169_pcierr_interrupt(tp->dev);
+		} else {
+			/* Log for diagnostic purposes on newer chips */
+			if (net_ratelimit())
+				netdev_warn(tp->dev, "SYSErr on newer chip: status=0x%08x\n", status);
+		}
+		handled = true;
 		goto out;
 	}
 
-	if (status & LinkChg)
+	if (status & LinkChg) {
 		phy_mac_interrupt(tp->phydev);
+		handled = true;
+	}
+
+	if (status & (RxOK | RxErr | TxOK | TxErr)) {
+		rtl_irq_disable(tp);
+		napi_schedule(&tp->napi);
+		handled = true;
+	}
 
-	rtl_irq_disable(tp);
-	napi_schedule(&tp->napi);
 out:
 	rtl_ack_events(tp, status);
 
-	return IRQ_HANDLED;
+	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
 static void rtl_task(struct work_struct *work)
@@ -4783,8 +4858,11 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
 
 static void rtl8169_down(struct rtl8169_private *tp)
 {
-	disable_work_sync(&tp->wk.work);
-	/* Clear all task flags */
+	/* Synchronize with pending work to prevent races during shutdown.
+	 * This is necessary because work items may access hardware registers
+	 * that we're about to reset.
+	 */
+	cancel_work_sync(&tp->wk.work);
 	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
 
 	phy_stop(tp->phydev);
@@ -4798,6 +4876,10 @@ static void rtl8169_down(struct rtl8169_private *tp)
 	rtl_disable_exit_l1(tp);
 	rtl_prepare_power_down(tp);
 
+	/* Disable clock if it was enabled during resume */
+	if (tp->clk)
+		clk_disable_unprepare(tp->clk);
+
 	if (tp->dash_type != RTL_DASH_NONE)
 		rtl8168_driver_stop(tp);
 }
@@ -4812,7 +4894,6 @@ static void rtl8169_up(struct rtl8169_private *tp)
 	phy_resume(tp->phydev);
 	rtl8169_init_phy(tp);
 	napi_enable(&tp->napi);
-	enable_work(&tp->wk.work);
 	rtl_reset_work(tp);
 
 	phy_start(tp->phydev);
@@ -4962,12 +5043,27 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
 static int rtl8169_runtime_resume(struct device *dev)
 {
 	struct rtl8169_private *tp = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (WARN_ON(!tp || !tp->dev)) {
+		dev_err(dev, "Critical: Invalid device state during resume\n");
+		return -ENODEV;
+	}
 
 	rtl_rar_set(tp, tp->dev->dev_addr);
 	__rtl8169_set_wol(tp, tp->saved_wolopts);
 
-	if (tp->TxDescArray)
+	if (tp->TxDescArray) {
+		/* Enable clock if available */
+		if (tp->clk) {
+			ret = clk_prepare_enable(tp->clk);
+			if (ret) {
+				dev_err(dev, "Failed to enable clock: %d\n", ret);
+				return ret;
+			}
+		}
 		rtl8169_up(tp);
+	}
 
 	netif_device_attach(tp->dev);
 
@@ -4980,7 +5076,7 @@ static int rtl8169_suspend(struct device *device)
 
 	rtnl_lock();
 	rtl8169_net_suspend(tp);
-	if (!device_may_wakeup(tp_to_dev(tp)))
+	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
 		clk_disable_unprepare(tp->clk);
 	rtnl_unlock();
 
@@ -4991,7 +5087,7 @@ static int rtl8169_resume(struct device *device)
 {
 	struct rtl8169_private *tp = dev_get_drvdata(device);
 
-	if (!device_may_wakeup(tp_to_dev(tp)))
+	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
 		clk_prepare_enable(tp->clk);
 
 	/* Reportedly at least Asus X453MA truncates packets otherwise */
@@ -5059,7 +5155,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_get_noresume(&pdev->dev);
 
-	disable_work_sync(&tp->wk.work);
+	/* Ensure all work is completed before device removal */
+	cancel_work_sync(&tp->wk.work);
 
 	if (IS_ENABLED(CONFIG_R8169_LEDS))
 		r8169_remove_leds(tp->leds);
@@ -5471,7 +5568,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->irq = pci_irq_vector(pdev, 0);
 
 	INIT_WORK(&tp->wk.work, rtl_task);
-	disable_work(&tp->wk.work);
 
 	rtl_init_mac_address(tp);
 
@@ -5593,4 +5689,4 @@ static struct pci_driver rtl8169_pci_driver = {
 	.driver.pm	= pm_ptr(&rtl8169_pm_ops),
 };
 
-module_pci_driver(rtl8169_pci_driver);
+module_pci_driver(rtl8169_pci_driver);
\ No newline at end of file
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] r8169: hardening and stability improvements
  2025-08-30  7:30 [PATCH v2] r8169: hardening and stability improvements Mohammad Amin Hosseini
@ 2025-08-30  8:29 ` Heiner Kallweit
  2025-08-30 14:44 ` Andrew Lunn
  1 sibling, 0 replies; 3+ messages in thread
From: Heiner Kallweit @ 2025-08-30  8:29 UTC (permalink / raw)
  To: Mohammad Amin Hosseini, nic_swsd
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, netdev,
	linux-kernel

On 8/30/2025 9:30 AM, Mohammad Amin Hosseini wrote:
> From: mohammad amin hosseini <moahmmad.hosseinii@gmail.com>
> 
> This patch improves robustness and reliability of the r8169 driver. The
> changes cover buffer management, interrupt handling, parameter validation,
> and resource cleanup.
> 
Are there any known issues which require these changes?
Then changes which actually fix something should come with a Fixes tag.

Several changes IMO don't make sense because they try to handle error
conditions which can't occur.

Please split this wild mix of changes and explain per patch:
- What is the problematic scenario and can it actually occur?

> While the updates touch multiple areas, they are interdependent parts of a
> cohesive hardening effort. Splitting them would leave intermediate states
> with incomplete validation.
> 
> Key changes:
> - Buffer handling: add packet length checks, NUMA-aware fallback allocation,
>   descriptor zero-initialization, and memory barriers.
> - Interrupt handling: fix return codes, selective NAPI scheduling, and
>   improved SYSErr handling for RTL_GIGA_MAC_VER_52.
> - Parameter validation: stricter RX/TX bounds checking and consistent
>   error codes.
> - Resource management: safer workqueue shutdown, proper clock lifecycle,
>   WARN_ON for unexpected device states.
> - Logging: use severity-appropriate levels, add rate limiting, and extend
>   statistics tracking.
> 
> Testing:
> - Kernel builds and module loads without warnings.
> - Runtime tested in QEMU (rtl8139 emulation).
> - Hardware validation requested from community due to lack of local device.
> 
> v2:
>  - Resent using msmtp (fix whitespace damage)
> 
> Signed-off-by: Mohammad Amin Hosseini <moahmmad.hosseinii@gmail.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 150 ++++++++++++++++++----
>  1 file changed, 123 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 9c601f271c02..66d7dcd8bf7b 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	int node = dev_to_node(d);
>  	dma_addr_t mapping;
>  	struct page *data;
> +	gfp_t gfp_flags = GFP_KERNEL;
>  
> -	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> -	if (!data)
> -		return NULL;
> +	/* Use atomic allocation in interrupt/atomic context */

Is there any scenario where rtl8169_alloc_rx_data() would be called in atomic context?

> +	if (in_atomic() || irqs_disabled())
> +		gfp_flags = GFP_ATOMIC;
> +
> +	data = alloc_pages_node(node, gfp_flags, get_order(R8169_RX_BUF_SIZE));
> +	if (unlikely(!data)) {
> +		/* Try fallback allocation on any node if local node fails */
> +		data = alloc_pages(gfp_flags | __GFP_NOWARN, get_order(R8169_RX_BUF_SIZE));
> +		if (unlikely(!data)) {
> +			if (net_ratelimit())
> +				netdev_err(tp->dev, "Failed to allocate RX buffer\n");
> +			return NULL;
> +		}
> +		
> +		if (net_ratelimit())
> +			netdev_warn(tp->dev, "Fallback allocation used for RX buffer\n");
> +	}
>  
>  	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
>  	if (unlikely(dma_mapping_error(d, mapping))) {
> -		netdev_err(tp->dev, "Failed to map RX DMA!\n");
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "Failed to map RX DMA page\n");
>  		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
>  		return NULL;
>  	}
>  
>  	desc->addr = cpu_to_le64(mapping);
> +	desc->opts2 = 0;
> +	
> +	/* Ensure writes complete before marking descriptor ready */
> +	wmb();

Changes messing with memory barriers need a thorough explanation.
What is the problematic scenario? Any known issues?

>  	rtl8169_mark_to_asic(desc);
>  
>  	return data;
> @@ -4150,11 +4170,30 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
>  	u32 opts1;
>  	int ret;
>  
> +	/* Validate parameters before DMA mapping */
> +	if (unlikely(!addr)) {

Any scenario where addr would be NULL?

> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX mapping with NULL address\n");
> +		return -EINVAL;
> +	}
> +	
> +	if (unlikely(!len)) {
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX mapping with zero length\n");
> +		return -EINVAL;
> +	}
> +	
> +	if (unlikely(len > RTL_GSO_MAX_SIZE_V2)) {
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX length too large: %u\n", len);
> +		return -EINVAL;
> +	}
> +
>  	mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE);
>  	ret = dma_mapping_error(d, mapping);
>  	if (unlikely(ret)) {
>  		if (net_ratelimit())
> -			netdev_err(tp->dev, "Failed to map TX data!\n");
> +			netdev_err(tp->dev, "Failed to map TX DMA: len=%u\n", len);
>  		return ret;
>  	}
>  
> @@ -4601,9 +4640,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (status & DescOwn)
>  			break;
>  
> -		/* This barrier is needed to keep us from reading
> -		 * any other fields out of the Rx descriptor until
> -		 * we know the status of DescOwn
> +		/* Ensure descriptor ownership check completes before accessing
> +		 * other fields to prevent hardware race conditions
>  		 */
>  		dma_rmb();
>  
> @@ -4624,8 +4662,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		}
>  
>  		pkt_size = status & GENMASK(13, 0);
> -		if (likely(!(dev->features & NETIF_F_RXFCS)))
> -			pkt_size -= ETH_FCS_LEN;
> +		
> +		/* Validate packet size to prevent buffer overflows */
> +		if (unlikely(pkt_size > R8169_RX_BUF_SIZE)) {
> +			if (net_ratelimit())
> +				netdev_warn(dev, "Oversized packet: %u bytes (status=0x%08x)\n",
> +					    pkt_size, status);
> +			dev->stats.rx_length_errors++;
> +			goto release_descriptor;
> +		}
> +		
> +		if (likely(!(dev->features & NETIF_F_RXFCS))) {
> +			if (pkt_size >= ETH_FCS_LEN) {
> +				pkt_size -= ETH_FCS_LEN;
> +			} else {
> +				if (net_ratelimit())
> +					netdev_warn(dev, "Packet smaller than FCS: %u bytes (status=0x%08x)\n",
> +						    pkt_size, status);
> +				dev->stats.rx_length_errors++;
> +				goto release_descriptor;
> +			}
> +		}
>  
>  		/* The driver does not support incoming fragmented frames.
>  		 * They are seen as a symptom of over-mtu sized frames.
> @@ -4674,26 +4731,44 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
>  	struct rtl8169_private *tp = dev_instance;
>  	u32 status = rtl_get_events(tp);
> +	bool handled = false;
>  
> +	/* Check for invalid hardware state or no relevant interrupts */
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
>  		return IRQ_NONE;
>  
> -	/* At least RTL8168fp may unexpectedly set the SYSErr bit */
> -	if (unlikely(status & SYSErr &&
> -	    tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
> -		rtl8169_pcierr_interrupt(tp->dev);
> +	/* Handle system errors based on chip version capabilities */
> +	if (unlikely(status & SYSErr)) {
> +		/* SYSErr handling for older chips and specific newer models
> +		 * based on vendor documentation and observed behavior
> +		 */
> +		if (tp->mac_version <= RTL_GIGA_MAC_VER_06 || 
> +		    tp->mac_version == RTL_GIGA_MAC_VER_52) {
> +			rtl8169_pcierr_interrupt(tp->dev);
> +		} else {
> +			/* Log for diagnostic purposes on newer chips */
> +			if (net_ratelimit())
> +				netdev_warn(tp->dev, "SYSErr on newer chip: status=0x%08x\n", status);
> +		}
> +		handled = true;
>  		goto out;
>  	}
>  
> -	if (status & LinkChg)
> +	if (status & LinkChg) {
>  		phy_mac_interrupt(tp->phydev);
> +		handled = true;
> +	}
> +
> +	if (status & (RxOK | RxErr | TxOK | TxErr)) {
> +		rtl_irq_disable(tp);
> +		napi_schedule(&tp->napi);
> +		handled = true;
> +	}
>  
> -	rtl_irq_disable(tp);
> -	napi_schedule(&tp->napi);
>  out:
>  	rtl_ack_events(tp, status);
>  
> -	return IRQ_HANDLED;
> +	return handled ? IRQ_HANDLED : IRQ_NONE;
>  }
>  
>  static void rtl_task(struct work_struct *work)
> @@ -4783,8 +4858,11 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
>  
>  static void rtl8169_down(struct rtl8169_private *tp)
>  {
> -	disable_work_sync(&tp->wk.work);
> -	/* Clear all task flags */
> +	/* Synchronize with pending work to prevent races during shutdown.
> +	 * This is necessary because work items may access hardware registers
> +	 * that we're about to reset.
> +	 */
> +	cancel_work_sync(&tp->wk.work);
>  	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
>  
>  	phy_stop(tp->phydev);
> @@ -4798,6 +4876,10 @@ static void rtl8169_down(struct rtl8169_private *tp)
>  	rtl_disable_exit_l1(tp);
>  	rtl_prepare_power_down(tp);
>  
> +	/* Disable clock if it was enabled during resume */
> +	if (tp->clk)
> +		clk_disable_unprepare(tp->clk);

The clk functions can deal with NULL arguments.

> +
>  	if (tp->dash_type != RTL_DASH_NONE)
>  		rtl8168_driver_stop(tp);
>  }
> @@ -4812,7 +4894,6 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	phy_resume(tp->phydev);
>  	rtl8169_init_phy(tp);
>  	napi_enable(&tp->napi);
> -	enable_work(&tp->wk.work);
>  	rtl_reset_work(tp);
>  
>  	phy_start(tp->phydev);
> @@ -4962,12 +5043,27 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
>  static int rtl8169_runtime_resume(struct device *dev)
>  {
>  	struct rtl8169_private *tp = dev_get_drvdata(dev);
> +	int ret = 0;
> +
> +	if (WARN_ON(!tp || !tp->dev)) {
> +		dev_err(dev, "Critical: Invalid device state during resume\n");
> +		return -ENODEV;
> +	}
>  
>  	rtl_rar_set(tp, tp->dev->dev_addr);
>  	__rtl8169_set_wol(tp, tp->saved_wolopts);
>  
> -	if (tp->TxDescArray)
> +	if (tp->TxDescArray) {
> +		/* Enable clock if available */
> +		if (tp->clk) {
> +			ret = clk_prepare_enable(tp->clk);
> +			if (ret) {
> +				dev_err(dev, "Failed to enable clock: %d\n", ret);
> +				return ret;
> +			}
> +		}
>  		rtl8169_up(tp);
> +	}
>  
>  	netif_device_attach(tp->dev);
>  
> @@ -4980,7 +5076,7 @@ static int rtl8169_suspend(struct device *device)
>  
>  	rtnl_lock();
>  	rtl8169_net_suspend(tp);
> -	if (!device_may_wakeup(tp_to_dev(tp)))
> +	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
>  		clk_disable_unprepare(tp->clk);
>  	rtnl_unlock();
>  
> @@ -4991,7 +5087,7 @@ static int rtl8169_resume(struct device *device)
>  {
>  	struct rtl8169_private *tp = dev_get_drvdata(device);
>  
> -	if (!device_may_wakeup(tp_to_dev(tp)))
> +	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
>  		clk_prepare_enable(tp->clk);
>  
>  	/* Reportedly at least Asus X453MA truncates packets otherwise */
> @@ -5059,7 +5155,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
>  	if (pci_dev_run_wake(pdev))
>  		pm_runtime_get_noresume(&pdev->dev);
>  
> -	disable_work_sync(&tp->wk.work);
> +	/* Ensure all work is completed before device removal */
> +	cancel_work_sync(&tp->wk.work);
>  
>  	if (IS_ENABLED(CONFIG_R8169_LEDS))
>  		r8169_remove_leds(tp->leds);
> @@ -5471,7 +5568,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	tp->irq = pci_irq_vector(pdev, 0);
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
> -	disable_work(&tp->wk.work);
>  
>  	rtl_init_mac_address(tp);
>  
> @@ -5593,4 +5689,4 @@ static struct pci_driver rtl8169_pci_driver = {
>  	.driver.pm	= pm_ptr(&rtl8169_pm_ops),
>  };
>  
> -module_pci_driver(rtl8169_pci_driver);
> +module_pci_driver(rtl8169_pci_driver);
> \ No newline at end of file


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] r8169: hardening and stability improvements
  2025-08-30  7:30 [PATCH v2] r8169: hardening and stability improvements Mohammad Amin Hosseini
  2025-08-30  8:29 ` Heiner Kallweit
@ 2025-08-30 14:44 ` Andrew Lunn
  1 sibling, 0 replies; 3+ messages in thread
From: Andrew Lunn @ 2025-08-30 14:44 UTC (permalink / raw)
  To: Mohammad Amin Hosseini
  Cc: hkallweit1, nic_swsd, andrew+netdev, davem, edumazet, kuba,
	pabeni, netdev, linux-kernel

> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 9c601f271c02..66d7dcd8bf7b 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	int node = dev_to_node(d);
>  	dma_addr_t mapping;
>  	struct page *data;
> +	gfp_t gfp_flags = GFP_KERNEL;
>  
> -	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> -	if (!data)
> -		return NULL;
> +	/* Use atomic allocation in interrupt/atomic context */
> +	if (in_atomic() || irqs_disabled())
> +		gfp_flags = GFP_ATOMIC;

/*
 * Are we running in atomic context?  WARNING: this macro cannot
 * always detect atomic context; in particular, it cannot know about
 * held spinlocks in non-preemptible kernels.  Thus it should not be
 * used in the general case to determine whether sleeping is possible.
 * Do not use in_atomic() in driver code.
 */
#define in_atomic()	(preempt_count() != 0)

You need to explain why you ignored this last line, and why this is
safe in this context. For a change like this, which breaks the normal
rules, you want it in a patch of its own, so the explanation can be in
the commit message and it is then very clear why you are ignoring the
rules.

Looking at the rest of the patch, a lot of the changes are
questionable. Splitting it up will help you describe why you are
making each change, why it is needed.

    Andrew

---
pw-bot: cr

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-08-30 14:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-30  7:30 [PATCH v2] r8169: hardening and stability improvements Mohammad Amin Hosseini
2025-08-30  8:29 ` Heiner Kallweit
2025-08-30 14:44 ` Andrew Lunn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).