netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
@ 2025-08-22  7:28 Kurt Kanzenbach
  2025-08-22  7:52 ` Sebastian Andrzej Siewior
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-22  7:28 UTC (permalink / raw)
  To: Tony Nguyen, Przemek Kitszel
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Sebastian Andrzej Siewior, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, Jacob Keller, intel-wired-lan, netdev,
	Kurt Kanzenbach

The current implementation uses schedule_work() which is executed by the
system work queue to retrieve Tx timestamps. This increases latency and can
lead to timeouts in case of heavy system load.

Therefore, switch to the PTP aux worker which can be prioritized and pinned
according to use case. Tested on Intel i210.

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
Changes in v2:
- Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
- Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
---
 drivers/net/ethernet/intel/igb/igb.h      |  1 -
 drivers/net/ethernet/intel/igb/igb_main.c |  6 +++---
 drivers/net/ethernet/intel/igb/igb_ptp.c  | 28 +++++++++++++++-------------
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index c3f4f7cd264e9b2ff70f03b580f95b15b528028c..f285def61f7a778f66944d6c52bb31f11ff803cf 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -624,7 +624,6 @@ struct igb_adapter {
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_caps;
 	struct delayed_work ptp_overflow_work;
-	struct work_struct ptp_tx_work;
 	struct sk_buff *ptp_tx_skb;
 	struct kernel_hwtstamp_config tstamp_config;
 	unsigned long ptp_tx_start;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a9a7a94ae61e93aa737b0103e00580e73601d62b..76467f0e53305188fcbbff27e21e478e764ca552 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6576,7 +6576,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 			adapter->ptp_tx_skb = skb_get(skb);
 			adapter->ptp_tx_start = jiffies;
 			if (adapter->hw.mac.type == e1000_82576)
-				schedule_work(&adapter->ptp_tx_work);
+				ptp_schedule_worker(adapter->ptp_clock, 0);
 		} else {
 			adapter->tx_hwtstamp_skipped++;
 		}
@@ -6612,7 +6612,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 		dev_kfree_skb_any(adapter->ptp_tx_skb);
 		adapter->ptp_tx_skb = NULL;
 		if (adapter->hw.mac.type == e1000_82576)
-			cancel_work_sync(&adapter->ptp_tx_work);
+			ptp_cancel_worker_sync(adapter->ptp_clock);
 		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
 	}
 
@@ -7080,7 +7080,7 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter)
 
 	if (tsicr & E1000_TSICR_TXTS) {
 		/* retrieve hardware timestamp */
-		schedule_work(&adapter->ptp_tx_work);
+		ptp_schedule_worker(adapter->ptp_clock, 0);
 	}
 
 	if (tsicr & TSINTR_TT0)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index a7876882aeaf2b2a7fb9ec6ff5c83d8a1b06008a..8dabde01d09dcacc13e19fa4ce7ad0327077190a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -798,20 +798,20 @@ static int igb_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
 
 /**
  * igb_ptp_tx_work
- * @work: pointer to work struct
+ * @ptp: pointer to ptp clock information
  *
  * This work function polls the TSYNCTXCTL valid bit to determine when a
  * timestamp has been taken for the current stored skb.
  **/
-static void igb_ptp_tx_work(struct work_struct *work)
+static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
 {
-	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
-						   ptp_tx_work);
+	struct igb_adapter *adapter = container_of(ptp, struct igb_adapter,
+						   ptp_caps);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 tsynctxctl;
 
 	if (!adapter->ptp_tx_skb)
-		return;
+		return -1;
 
 	if (time_is_before_jiffies(adapter->ptp_tx_start +
 				   IGB_PTP_TX_TIMEOUT)) {
@@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct *work)
 		 */
 		rd32(E1000_TXSTMPH);
 		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
-		return;
+		return -1;
 	}
 
 	tsynctxctl = rd32(E1000_TSYNCTXCTL);
-	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
+	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
 		igb_ptp_tx_hwtstamp(adapter);
-	else
-		/* reschedule to check later */
-		schedule_work(&adapter->ptp_tx_work);
+		return -1;
+	}
+
+	/* reschedule to check later */
+	return 1;
 }
 
 static void igb_ptp_overflow_check(struct work_struct *work)
@@ -915,7 +917,7 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter)
 	 * timestamp bit when this occurs.
 	 */
 	if (timeout) {
-		cancel_work_sync(&adapter->ptp_tx_work);
+		ptp_cancel_worker_sync(adapter->ptp_clock);
 		dev_kfree_skb_any(adapter->ptp_tx_skb);
 		adapter->ptp_tx_skb = NULL;
 		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
@@ -1381,6 +1383,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		return;
 	}
 
+	adapter->ptp_caps.do_aux_work = igb_ptp_tx_work;
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
@@ -1392,7 +1395,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->ptp_flags |= IGB_PTP_ENABLED;
 
 		spin_lock_init(&adapter->tmreg_lock);
-		INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
 
 		if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
 			INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
@@ -1437,7 +1439,7 @@ void igb_ptp_suspend(struct igb_adapter *adapter)
 	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
 		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
 
-	cancel_work_sync(&adapter->ptp_tx_work);
+	ptp_cancel_worker_sync(adapter->ptp_clock);
 	if (adapter->ptp_tx_skb) {
 		dev_kfree_skb_any(adapter->ptp_tx_skb);
 		adapter->ptp_tx_skb = NULL;

---
base-commit: a7bd72158063740212344fad5d99dcef45bc70d6
change-id: 20250813-igb_irq_ts-1aa77cc7b4cb

Best regards,
-- 
Kurt Kanzenbach <kurt@linutronix.de>


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22  7:28 [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker Kurt Kanzenbach
@ 2025-08-22  7:52 ` Sebastian Andrzej Siewior
  2025-08-22 23:55   ` Jacob Keller
  2025-08-23  7:29   ` Kurt Kanzenbach
  2025-08-22 16:27 ` Vadim Fedorenko
  2025-08-25 10:58 ` [Intel-wired-lan] " Loktionov, Aleksandr
  2 siblings, 2 replies; 24+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-08-22  7:52 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, Jacob Keller, intel-wired-lan, netdev

On 2025-08-22 09:28:10 [+0200], Kurt Kanzenbach wrote:
> The current implementation uses schedule_work() which is executed by the
> system work queue to retrieve Tx timestamps. This increases latency and can
> lead to timeouts in case of heavy system load.
> 
> Therefore, switch to the PTP aux worker which can be prioritized and pinned
> according to use case. Tested on Intel i210.
> 
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
> ---
> Changes in v2:
> - Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de

For the i210 it makes sense to read it directly from IRQ avoiding the
context switch and the delay resulting for it. For the e1000_82576 it
makes sense to avoid the system workqueue and use a dedicated thread
which is not CPU bound and could prioritized/ isolated further if
needed.
I don't understand *why* reading the TS in IRQ is causing this packet
loss. This is also what the igc does and the performance improved
	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")

and here it causes the opposite?

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22  7:28 [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker Kurt Kanzenbach
  2025-08-22  7:52 ` Sebastian Andrzej Siewior
@ 2025-08-22 16:27 ` Vadim Fedorenko
  2025-08-23  7:44   ` Kurt Kanzenbach
  2025-08-25 10:58 ` [Intel-wired-lan] " Loktionov, Aleksandr
  2 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2025-08-22 16:27 UTC (permalink / raw)
  To: Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Sebastian Andrzej Siewior, Paul Menzel, Miroslav Lichvar,
	Jacob Keller, intel-wired-lan, netdev

On 22/08/2025 08:28, Kurt Kanzenbach wrote:
> The current implementation uses schedule_work() which is executed by the
> system work queue to retrieve Tx timestamps. This increases latency and can
> lead to timeouts in case of heavy system load.
> 
> Therefore, switch to the PTP aux worker which can be prioritized and pinned
> according to use case. Tested on Intel i210.
> 
>    * igb_ptp_tx_work
> - * @work: pointer to work struct
> + * @ptp: pointer to ptp clock information
>    *
>    * This work function polls the TSYNCTXCTL valid bit to determine when a
>    * timestamp has been taken for the current stored skb.
>    **/
> -static void igb_ptp_tx_work(struct work_struct *work)
> +static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
>   {
> -	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
> -						   ptp_tx_work);
> +	struct igb_adapter *adapter = container_of(ptp, struct igb_adapter,
> +						   ptp_caps);
>   	struct e1000_hw *hw = &adapter->hw;
>   	u32 tsynctxctl;
>   
>   	if (!adapter->ptp_tx_skb)
> -		return;
> +		return -1;
>   
>   	if (time_is_before_jiffies(adapter->ptp_tx_start +
>   				   IGB_PTP_TX_TIMEOUT)) {
> @@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct *work)
>   		 */
>   		rd32(E1000_TXSTMPH);
>   		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
> -		return;
> +		return -1;
>   	}
>   
>   	tsynctxctl = rd32(E1000_TSYNCTXCTL);
> -	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
> +	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
>   		igb_ptp_tx_hwtstamp(adapter);
> -	else
> -		/* reschedule to check later */
> -		schedule_work(&adapter->ptp_tx_work);
> +		return -1;
> +	}
> +
> +	/* reschedule to check later */
> +	return 1;

do_aux_work is expected to return delay in jiffies to re-schedule the
work. it would be cleaner to use msec_to_jiffies macros to tell how much
time the driver has to wait before the next try of retrieving the
timestamp. AFAIU, the timestamp may come ASAP in this case, so it's
actually reasonable to request immediate re-schedule of the worker by
returning 0.

>   }

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22  7:52 ` Sebastian Andrzej Siewior
@ 2025-08-22 23:55   ` Jacob Keller
  2025-08-23  7:29   ` Kurt Kanzenbach
  1 sibling, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2025-08-22 23:55 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Kurt Kanzenbach
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 1799 bytes --]



On 8/22/2025 12:52 AM, Sebastian Andrzej Siewior wrote:
> On 2025-08-22 09:28:10 [+0200], Kurt Kanzenbach wrote:
>> The current implementation uses schedule_work() which is executed by the
>> system work queue to retrieve Tx timestamps. This increases latency and can
>> lead to timeouts in case of heavy system load.
>>
>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>> according to use case. Tested on Intel i210.
>>
>> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
>> ---
>> Changes in v2:
>> - Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
>> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
> 
> For the i210 it makes sense to read it directly from IRQ avoiding the
> context switch and the delay resulting for it. For the e1000_82576 it
> makes sense to avoid the system workqueue and use a dedicated thread
> which is not CPU bound and could prioritized/ isolated further if
> needed.
> I don't understand *why* reading the TS in IRQ is causing this packet
> loss. This is also what the igc does and the performance improved
> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
> 
> and here it causes the opposite?
> 
> Sebastian

Miroslav reported a test using ntpperf which showed a pretty significant
impact for higher rates. It was indeed better for low rates, but was
much worse for high rates.

See the following:

https://lore.kernel.org/all/aKMbekefL4mJ23kW@localhost/

In all cases the use of the thread was better, so this improves the
behavior without regressing in the case of many more packets.

It is unclear exactly why the ntpperf test fails so badly in that case
with the higher rate for i210.

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22  7:52 ` Sebastian Andrzej Siewior
  2025-08-22 23:55   ` Jacob Keller
@ 2025-08-23  7:29   ` Kurt Kanzenbach
  2025-08-25  7:53     ` Miroslav Lichvar
  2025-08-25 23:28     ` Jacob Keller
  1 sibling, 2 replies; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-23  7:29 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, Jacob Keller, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 1918 bytes --]

On Fri Aug 22 2025, Sebastian Andrzej Siewior wrote:
> On 2025-08-22 09:28:10 [+0200], Kurt Kanzenbach wrote:
>> The current implementation uses schedule_work() which is executed by the
>> system work queue to retrieve Tx timestamps. This increases latency and can
>> lead to timeouts in case of heavy system load.
>> 
>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>> according to use case. Tested on Intel i210.
>> 
>> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
>> ---
>> Changes in v2:
>> - Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
>> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
>
> For the i210 it makes sense to read it directly from IRQ avoiding the
> context switch and the delay resulting for it. For the e1000_82576 it
> makes sense to avoid the system workqueue and use a dedicated thread
> which is not CPU bound and could prioritized/ isolated further if
> needed.
> I don't understand *why* reading the TS in IRQ is causing this packet
> loss.

Me neither. I thought it could be the irqoff time. On my test systems
the TS IRQ takes about ~16us with reading the timestamp. In the
kworker/ptp aux thread scenario it takes about ~6us IRQ time + ~10us run
time for the threads. All of that looks reasonable to me.

Also I couldn't really see a performance degradation with ntpperf. In my
tests the IRQ variant reached an equal or higher rate. But sometimes I
get 'Could not send requests at rate X'. No idea what that means.

Anyway, this patch is basically a compromise. It works for Miroslav and
my use case.

> This is also what the igc does and the performance improved
> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>
> and here it causes the opposite?

As said above, I'm out of ideas here.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22 16:27 ` Vadim Fedorenko
@ 2025-08-23  7:44   ` Kurt Kanzenbach
  2025-08-25 13:18     ` Vadim Fedorenko
  0 siblings, 1 reply; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-23  7:44 UTC (permalink / raw)
  To: Vadim Fedorenko, Tony Nguyen, Przemek Kitszel
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Sebastian Andrzej Siewior, Paul Menzel, Miroslav Lichvar,
	Jacob Keller, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 2655 bytes --]

On Fri Aug 22 2025, Vadim Fedorenko wrote:
> On 22/08/2025 08:28, Kurt Kanzenbach wrote:
>> The current implementation uses schedule_work() which is executed by the
>> system work queue to retrieve Tx timestamps. This increases latency and can
>> lead to timeouts in case of heavy system load.
>> 
>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>> according to use case. Tested on Intel i210.
>> 
>>    * igb_ptp_tx_work
>> - * @work: pointer to work struct
>> + * @ptp: pointer to ptp clock information
>>    *
>>    * This work function polls the TSYNCTXCTL valid bit to determine when a
>>    * timestamp has been taken for the current stored skb.
>>    **/
>> -static void igb_ptp_tx_work(struct work_struct *work)
>> +static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
>>   {
>> -	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
>> -						   ptp_tx_work);
>> +	struct igb_adapter *adapter = container_of(ptp, struct igb_adapter,
>> +						   ptp_caps);
>>   	struct e1000_hw *hw = &adapter->hw;
>>   	u32 tsynctxctl;
>>   
>>   	if (!adapter->ptp_tx_skb)
>> -		return;
>> +		return -1;
>>   
>>   	if (time_is_before_jiffies(adapter->ptp_tx_start +
>>   				   IGB_PTP_TX_TIMEOUT)) {
>> @@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct *work)
>>   		 */
>>   		rd32(E1000_TXSTMPH);
>>   		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
>> -		return;
>> +		return -1;
>>   	}
>>   
>>   	tsynctxctl = rd32(E1000_TSYNCTXCTL);
>> -	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
>> +	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
>>   		igb_ptp_tx_hwtstamp(adapter);
>> -	else
>> -		/* reschedule to check later */
>> -		schedule_work(&adapter->ptp_tx_work);
>> +		return -1;
>> +	}
>> +
>> +	/* reschedule to check later */
>> +	return 1;
>
> do_aux_work is expected to return delay in jiffies to re-schedule the
> work. it would be cleaner to use msec_to_jiffies macros to tell how much
> time the driver has to wait before the next try of retrieving the
> timestamp. AFAIU, the timestamp may come ASAP in this case, so it's
> actually reasonable to request immediate re-schedule of the worker by
> returning 0.

I don't think so. The 'return 1' is only executed for 82576. For all
other NICs the TS is already available. For 82576 the packet is queued
to Tx ring and the worker is scheduled immediately. For example, in case
of other Tx traffic there's a chance that the TS is not available
yet. So we comeback one jiffy later, which is 10ms at worst. That looked
reasonable to me.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-23  7:29   ` Kurt Kanzenbach
@ 2025-08-25  7:53     ` Miroslav Lichvar
  2025-08-25  9:22       ` Kurt Kanzenbach
  2025-08-25 23:28     ` Jacob Keller
  1 sibling, 1 reply; 24+ messages in thread
From: Miroslav Lichvar @ 2025-08-25  7:53 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Sebastian Andrzej Siewior, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, Jacob Keller, intel-wired-lan, netdev

On Sat, Aug 23, 2025 at 09:29:36AM +0200, Kurt Kanzenbach wrote:
> Also I couldn't really see a performance degradation with ntpperf.

I was testing with an I350, not I210. Could that make a difference?

> In my
> tests the IRQ variant reached an equal or higher rate. But sometimes I
> get 'Could not send requests at rate X'. No idea what that means.

That's ntpperf giving up as the HW is too slow to send requests at
that rate (with a single process calling sendmmsg() in a loop). You
can add the -l option to force ntpperf to continue, but the printed
rate values will no longer be accurate, you would need to measure it
by some other way, e.g. by monitoring the interface packet counters.

-- 
Miroslav Lichvar


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-25  7:53     ` Miroslav Lichvar
@ 2025-08-25  9:22       ` Kurt Kanzenbach
  2025-08-25 23:23         ` [Intel-wired-lan] " Jacob Keller
  0 siblings, 1 reply; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-25  9:22 UTC (permalink / raw)
  To: Miroslav Lichvar
  Cc: Sebastian Andrzej Siewior, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, Jacob Keller, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 817 bytes --]

On Mon Aug 25 2025, Miroslav Lichvar wrote:
> On Sat, Aug 23, 2025 at 09:29:36AM +0200, Kurt Kanzenbach wrote:
>> Also I couldn't really see a performance degradation with ntpperf.
>
> I was testing with an I350, not I210. Could that make a difference?

Jup, it could make a difference.

>
>> In my
>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>> get 'Could not send requests at rate X'. No idea what that means.
>
> That's ntpperf giving up as the HW is too slow to send requests at
> that rate (with a single process calling sendmmsg() in a loop). You
> can add the -l option to force ntpperf to continue, but the printed
> rate values will no longer be accurate, you would need to measure it
> by some other way, e.g. by monitoring the interface packet counters.

I see.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [Intel-wired-lan] [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-22  7:28 [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker Kurt Kanzenbach
  2025-08-22  7:52 ` Sebastian Andrzej Siewior
  2025-08-22 16:27 ` Vadim Fedorenko
@ 2025-08-25 10:58 ` Loktionov, Aleksandr
  2 siblings, 0 replies; 24+ messages in thread
From: Loktionov, Aleksandr @ 2025-08-25 10:58 UTC (permalink / raw)
  To: Kurt Kanzenbach, Nguyen, Anthony L, Kitszel, Przemyslaw
  Cc: Paul Menzel, Vadim Fedorenko, Gomes, Vinicius,
	netdev@vger.kernel.org, Richard Cochran, Andrew Lunn,
	Eric Dumazet, intel-wired-lan@lists.osuosl.org, Keller, Jacob E,
	Jakub Kicinski, Paolo Abeni, David S. Miller,
	Sebastian Andrzej Siewior



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of Kurt Kanzenbach
> Sent: Friday, August 22, 2025 9:28 AM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>
> Cc: Paul Menzel <pmenzel@molgen.mpg.de>; Vadim Fedorenko
> <vadim.fedorenko@linux.dev>; Gomes, Vinicius
> <vinicius.gomes@intel.com>; netdev@vger.kernel.org; Richard Cochran
> <richardcochran@gmail.com>; Kurt Kanzenbach <kurt@linutronix.de>;
> Andrew Lunn <andrew+netdev@lunn.ch>; Eric Dumazet
> <edumazet@google.com>; intel-wired-lan@lists.osuosl.org; Keller, Jacob
> E <jacob.e.keller@intel.com>; Jakub Kicinski <kuba@kernel.org>; Paolo
> Abeni <pabeni@redhat.com>; David S. Miller <davem@davemloft.net>;
> Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Subject: [Intel-wired-lan] [PATCH iwl-next v2] igb: Convert Tx
> timestamping to PTP aux worker
> 
> The current implementation uses schedule_work() which is executed by
> the system work queue to retrieve Tx timestamps. This increases
> latency and can lead to timeouts in case of heavy system load.
> 
> Therefore, switch to the PTP aux worker which can be prioritized and
> pinned according to use case. Tested on Intel i210.
> 
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>


> ---
> Changes in v2:
> - Switch from IRQ to PTP aux worker due to NTP performance regression
> (Miroslav)
> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-
> 8c6fc0353422@linutronix.de
> ---
>  drivers/net/ethernet/intel/igb/igb.h      |  1 -
>  drivers/net/ethernet/intel/igb/igb_main.c |  6 +++---
> drivers/net/ethernet/intel/igb/igb_ptp.c  | 28 +++++++++++++++--------
> -----
>  3 files changed, 18 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igb/igb.h
> b/drivers/net/ethernet/intel/igb/igb.h
> index
> c3f4f7cd264e9b2ff70f03b580f95b15b528028c..f285def61f7a778f66944d6c52bb
> 31f11ff803cf 100644
> --- a/drivers/net/ethernet/intel/igb/igb.h
> +++ b/drivers/net/ethernet/intel/igb/igb.h
> @@ -624,7 +624,6 @@ struct igb_adapter {
>  	struct ptp_clock *ptp_clock;
>  	struct ptp_clock_info ptp_caps;
>  	struct delayed_work ptp_overflow_work;
> -	struct work_struct ptp_tx_work;
>  	struct sk_buff *ptp_tx_skb;
>  	struct kernel_hwtstamp_config tstamp_config;
>  	unsigned long ptp_tx_start;
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c
> b/drivers/net/ethernet/intel/igb/igb_main.c
> index
> a9a7a94ae61e93aa737b0103e00580e73601d62b..76467f0e53305188fcbbff27e21e
> 478e764ca552 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -6576,7 +6576,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff
> *skb,
>  			adapter->ptp_tx_skb = skb_get(skb);
>  			adapter->ptp_tx_start = jiffies;
>  			if (adapter->hw.mac.type == e1000_82576)
> -				schedule_work(&adapter->ptp_tx_work);
> +				ptp_schedule_worker(adapter->ptp_clock, 0);
>  		} else {
>  			adapter->tx_hwtstamp_skipped++;
>  		}
> @@ -6612,7 +6612,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff
> *skb,
>  		dev_kfree_skb_any(adapter->ptp_tx_skb);
>  		adapter->ptp_tx_skb = NULL;
>  		if (adapter->hw.mac.type == e1000_82576)
> -			cancel_work_sync(&adapter->ptp_tx_work);
> +			ptp_cancel_worker_sync(adapter->ptp_clock);
>  		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter-
> >state);
>  	}
> 
> @@ -7080,7 +7080,7 @@ static void igb_tsync_interrupt(struct
> igb_adapter *adapter)
> 
>  	if (tsicr & E1000_TSICR_TXTS) {
>  		/* retrieve hardware timestamp */
> -		schedule_work(&adapter->ptp_tx_work);
> +		ptp_schedule_worker(adapter->ptp_clock, 0);
>  	}
> 
>  	if (tsicr & TSINTR_TT0)
> diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c
> b/drivers/net/ethernet/intel/igb/igb_ptp.c
> index
> a7876882aeaf2b2a7fb9ec6ff5c83d8a1b06008a..8dabde01d09dcacc13e19fa4ce7a
> d0327077190a 100644
> --- a/drivers/net/ethernet/intel/igb/igb_ptp.c
> +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
> @@ -798,20 +798,20 @@ static int igb_ptp_verify_pin(struct
> ptp_clock_info *ptp, unsigned int pin,
> 
>  /**
>   * igb_ptp_tx_work
> - * @work: pointer to work struct
> + * @ptp: pointer to ptp clock information
>   *
>   * This work function polls the TSYNCTXCTL valid bit to determine
> when a
>   * timestamp has been taken for the current stored skb.
>   **/
> -static void igb_ptp_tx_work(struct work_struct *work)
> +static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
>  {
> -	struct igb_adapter *adapter = container_of(work, struct
> igb_adapter,
> -						   ptp_tx_work);
> +	struct igb_adapter *adapter = container_of(ptp, struct
> igb_adapter,
> +						   ptp_caps);
>  	struct e1000_hw *hw = &adapter->hw;
>  	u32 tsynctxctl;
> 
>  	if (!adapter->ptp_tx_skb)
> -		return;
> +		return -1;
> 
>  	if (time_is_before_jiffies(adapter->ptp_tx_start +
>  				   IGB_PTP_TX_TIMEOUT)) {
> @@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct
> *work)
>  		 */
>  		rd32(E1000_TXSTMPH);
>  		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp
> hang\n");
> -		return;
> +		return -1;
>  	}
> 
>  	tsynctxctl = rd32(E1000_TSYNCTXCTL);
> -	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
> +	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
>  		igb_ptp_tx_hwtstamp(adapter);
> -	else
> -		/* reschedule to check later */
> -		schedule_work(&adapter->ptp_tx_work);
> +		return -1;
> +	}
> +
> +	/* reschedule to check later */
> +	return 1;
>  }
> 
>  static void igb_ptp_overflow_check(struct work_struct *work) @@ -
> 915,7 +917,7 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter)
>  	 * timestamp bit when this occurs.
>  	 */
>  	if (timeout) {
> -		cancel_work_sync(&adapter->ptp_tx_work);
> +		ptp_cancel_worker_sync(adapter->ptp_clock);
>  		dev_kfree_skb_any(adapter->ptp_tx_skb);
>  		adapter->ptp_tx_skb = NULL;
>  		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter-
> >state); @@ -1381,6 +1383,7 @@ void igb_ptp_init(struct igb_adapter
> *adapter)
>  		return;
>  	}
> 
> +	adapter->ptp_caps.do_aux_work = igb_ptp_tx_work;
>  	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
>  						&adapter->pdev->dev);
>  	if (IS_ERR(adapter->ptp_clock)) {
> @@ -1392,7 +1395,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
>  		adapter->ptp_flags |= IGB_PTP_ENABLED;
> 
>  		spin_lock_init(&adapter->tmreg_lock);
> -		INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
> 
>  		if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
>  			INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
> @@ -1437,7 +1439,7 @@ void igb_ptp_suspend(struct igb_adapter
> *adapter)
>  	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
>  		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
> 
> -	cancel_work_sync(&adapter->ptp_tx_work);
> +	ptp_cancel_worker_sync(adapter->ptp_clock);
>  	if (adapter->ptp_tx_skb) {
>  		dev_kfree_skb_any(adapter->ptp_tx_skb);
>  		adapter->ptp_tx_skb = NULL;
> 
> ---
> base-commit: a7bd72158063740212344fad5d99dcef45bc70d6
> change-id: 20250813-igb_irq_ts-1aa77cc7b4cb
> 
> Best regards,
> --
> Kurt Kanzenbach <kurt@linutronix.de>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-23  7:44   ` Kurt Kanzenbach
@ 2025-08-25 13:18     ` Vadim Fedorenko
  2025-08-25 23:24       ` Jacob Keller
  0 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2025-08-25 13:18 UTC (permalink / raw)
  To: Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Sebastian Andrzej Siewior, Paul Menzel, Miroslav Lichvar,
	Jacob Keller, intel-wired-lan, netdev

On 23/08/2025 08:44, Kurt Kanzenbach wrote:
> On Fri Aug 22 2025, Vadim Fedorenko wrote:
>> On 22/08/2025 08:28, Kurt Kanzenbach wrote:
>>> The current implementation uses schedule_work() which is executed by the
>>> system work queue to retrieve Tx timestamps. This increases latency and can
>>> lead to timeouts in case of heavy system load.
>>>
>>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>>> according to use case. Tested on Intel i210.
>>>
>>>     * igb_ptp_tx_work
>>> - * @work: pointer to work struct
>>> + * @ptp: pointer to ptp clock information
>>>     *
>>>     * This work function polls the TSYNCTXCTL valid bit to determine when a
>>>     * timestamp has been taken for the current stored skb.
>>>     **/
>>> -static void igb_ptp_tx_work(struct work_struct *work)
>>> +static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
>>>    {
>>> -	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
>>> -						   ptp_tx_work);
>>> +	struct igb_adapter *adapter = container_of(ptp, struct igb_adapter,
>>> +						   ptp_caps);
>>>    	struct e1000_hw *hw = &adapter->hw;
>>>    	u32 tsynctxctl;
>>>    
>>>    	if (!adapter->ptp_tx_skb)
>>> -		return;
>>> +		return -1;
>>>    
>>>    	if (time_is_before_jiffies(adapter->ptp_tx_start +
>>>    				   IGB_PTP_TX_TIMEOUT)) {
>>> @@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct *work)
>>>    		 */
>>>    		rd32(E1000_TXSTMPH);
>>>    		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
>>> -		return;
>>> +		return -1;
>>>    	}
>>>    
>>>    	tsynctxctl = rd32(E1000_TSYNCTXCTL);
>>> -	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
>>> +	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
>>>    		igb_ptp_tx_hwtstamp(adapter);
>>> -	else
>>> -		/* reschedule to check later */
>>> -		schedule_work(&adapter->ptp_tx_work);
>>> +		return -1;
>>> +	}
>>> +
>>> +	/* reschedule to check later */
>>> +	return 1;
>>
>> do_aux_work is expected to return delay in jiffies to re-schedule the
>> work. it would be cleaner to use msec_to_jiffies macros to tell how much
>> time the driver has to wait before the next try of retrieving the
>> timestamp. AFAIU, the timestamp may come ASAP in this case, so it's
>> actually reasonable to request immediate re-schedule of the worker by
>> returning 0.
> 
> I don't think so. The 'return 1' is only executed for 82576. For all
> other NICs the TS is already available. For 82576 the packet is queued
> to Tx ring and the worker is scheduled immediately. For example, in case
> of other Tx traffic there's a chance that the TS is not available
> yet. So we comeback one jiffy later, which is 10ms at worst. That looked
> reasonable to me.

Ok, LGTM
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-wired-lan] [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-25  9:22       ` Kurt Kanzenbach
@ 2025-08-25 23:23         ` Jacob Keller
  0 siblings, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2025-08-25 23:23 UTC (permalink / raw)
  To: Kurt Kanzenbach, Miroslav Lichvar
  Cc: Sebastian Andrzej Siewior, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 1592 bytes --]



On 8/25/2025 2:22 AM, Kurt Kanzenbach wrote:
> On Mon Aug 25 2025, Miroslav Lichvar wrote:
>> On Sat, Aug 23, 2025 at 09:29:36AM +0200, Kurt Kanzenbach wrote:
>>> Also I couldn't really see a performance degradation with ntpperf.
>>
>> I was testing with an I350, not I210. Could that make a difference?
> 
> Jup, it could make a difference.
> 

Yes, that could make a significant difference. I350 is a different MAC
architecture. According to its datasheet, I can see that it uses a
similar TXTSTMP registers, and it appears to have an interrupt cause,
TIME_SYNC bit set which an additional cause register TSICR indicates
which specific event occurred.

It does look like the logic in the driver checks TSICR and doesn't just
randomly call this function each time, as it relies on the presence of
the TSICR register.. Hm.

At first I thought it might be because I350 also has an interrupt
trigger for receive timestamps, but this cause is masked in the TSIM
register, so it shouldn't be a concern.
>>
>>> In my
>>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>>> get 'Could not send requests at rate X'. No idea what that means.
>>
>> That's ntpperf giving up as the HW is too slow to send requests at
>> that rate (with a single process calling sendmmsg() in a loop). You
>> can add the -l option to force ntpperf to continue, but the printed
>> rate values will no longer be accurate, you would need to measure it
>> by some other way, e.g. by monitoring the interface packet counters.
> 
> I see.
> 
> Thanks,
> Kurt


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-25 13:18     ` Vadim Fedorenko
@ 2025-08-25 23:24       ` Jacob Keller
  0 siblings, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2025-08-25 23:24 UTC (permalink / raw)
  To: Vadim Fedorenko, Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Sebastian Andrzej Siewior, Paul Menzel, Miroslav Lichvar,
	intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 3039 bytes --]



On 8/25/2025 6:18 AM, Vadim Fedorenko wrote:
> On 23/08/2025 08:44, Kurt Kanzenbach wrote:
>> On Fri Aug 22 2025, Vadim Fedorenko wrote:
>>> On 22/08/2025 08:28, Kurt Kanzenbach wrote:
>>>> The current implementation uses schedule_work() which is executed by the
>>>> system work queue to retrieve Tx timestamps. This increases latency and can
>>>> lead to timeouts in case of heavy system load.
>>>>
>>>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>>>> according to use case. Tested on Intel i210.
>>>>
>>>>     * igb_ptp_tx_work
>>>> - * @work: pointer to work struct
>>>> + * @ptp: pointer to ptp clock information
>>>>     *
>>>>     * This work function polls the TSYNCTXCTL valid bit to determine when a
>>>>     * timestamp has been taken for the current stored skb.
>>>>     **/
>>>> -static void igb_ptp_tx_work(struct work_struct *work)
>>>> +static long igb_ptp_tx_work(struct ptp_clock_info *ptp)
>>>>    {
>>>> -	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
>>>> -						   ptp_tx_work);
>>>> +	struct igb_adapter *adapter = container_of(ptp, struct igb_adapter,
>>>> +						   ptp_caps);
>>>>    	struct e1000_hw *hw = &adapter->hw;
>>>>    	u32 tsynctxctl;
>>>>    
>>>>    	if (!adapter->ptp_tx_skb)
>>>> -		return;
>>>> +		return -1;
>>>>    
>>>>    	if (time_is_before_jiffies(adapter->ptp_tx_start +
>>>>    				   IGB_PTP_TX_TIMEOUT)) {
>>>> @@ -824,15 +824,17 @@ static void igb_ptp_tx_work(struct work_struct *work)
>>>>    		 */
>>>>    		rd32(E1000_TXSTMPH);
>>>>    		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
>>>> -		return;
>>>> +		return -1;
>>>>    	}
>>>>    
>>>>    	tsynctxctl = rd32(E1000_TSYNCTXCTL);
>>>> -	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
>>>> +	if (tsynctxctl & E1000_TSYNCTXCTL_VALID) {
>>>>    		igb_ptp_tx_hwtstamp(adapter);
>>>> -	else
>>>> -		/* reschedule to check later */
>>>> -		schedule_work(&adapter->ptp_tx_work);
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	/* reschedule to check later */
>>>> +	return 1;
>>>
>>> do_aux_work is expected to return delay in jiffies to re-schedule the
>>> work. it would be cleaner to use msec_to_jiffies macros to tell how much
>>> time the driver has to wait before the next try of retrieving the
>>> timestamp. AFAIU, the timestamp may come ASAP in this case, so it's
>>> actually reasonable to request immediate re-schedule of the worker by
>>> returning 0.
>>
>> I don't think so. The 'return 1' is only executed for 82576. For all
>> other NICs the TS is already available. For 82576 the packet is queued
>> to Tx ring and the worker is scheduled immediately. For example, in case
>> of other Tx traffic there's a chance that the TS is not available
>> yet. So we comeback one jiffy later, which is 10ms at worst. That looked
>> reasonable to me.
> 
> Ok, LGTM
> Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
> 

Agreed.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-23  7:29   ` Kurt Kanzenbach
  2025-08-25  7:53     ` Miroslav Lichvar
@ 2025-08-25 23:28     ` Jacob Keller
  2025-08-26 12:59       ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2025-08-25 23:28 UTC (permalink / raw)
  To: Kurt Kanzenbach, Sebastian Andrzej Siewior
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 2906 bytes --]



On 8/23/2025 12:29 AM, Kurt Kanzenbach wrote:
> On Fri Aug 22 2025, Sebastian Andrzej Siewior wrote:
>> On 2025-08-22 09:28:10 [+0200], Kurt Kanzenbach wrote:
>>> The current implementation uses schedule_work() which is executed by the
>>> system work queue to retrieve Tx timestamps. This increases latency and can
>>> lead to timeouts in case of heavy system load.
>>>
>>> Therefore, switch to the PTP aux worker which can be prioritized and pinned
>>> according to use case. Tested on Intel i210.
>>>
>>> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
>>> ---
>>> Changes in v2:
>>> - Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
>>> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
>>
>> For the i210 it makes sense to read it directly from IRQ avoiding the
>> context switch and the delay resulting for it. For the e1000_82576 it
>> makes sense to avoid the system workqueue and use a dedicated thread
>> which is not CPU bound and could prioritized/ isolated further if
>> needed.
>> I don't understand *why* reading the TS in IRQ is causing this packet
>> loss.
> 
> Me neither. I thought it could be the irqoff time. On my test systems
> the TS IRQ takes about ~16us with reading the timestamp. In the
> kworker/ptp aux thread scenario it takes about ~6us IRQ time + ~10us run
> time for the threads. All of that looks reasonable to me.
> 

Ya, I don't think we fully understand either. Miroslav said he tested on
I350 which is a different MAC from the I210, so it could be something
there. Theoretically we could handle just I210 directly in the interrupt
and leave the other variants to the kworker.. but I don't know how much
benefit we get from that. The data sheet for the I350 appears to have
more or less the same logic for Tx timestamps. It is significantly
different for Rx timestamps though.

> Also I couldn't really see a performance degradation with ntpperf. In my
> tests the IRQ variant reached an equal or higher rate. But sometimes I
> get 'Could not send requests at rate X'. No idea what that means.
> 
> Anyway, this patch is basically a compromise. It works for Miroslav and
> my use case.
> 
>> This is also what the igc does and the performance improved
>> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>>

igc supports several hardware variations which are all a lot similar to
i210 than i350 is to i210 in igb. I could see this working fine for i210
if it works fine in igb.. I honestly am at a loss currently why i350 is
much worse.

>> and here it causes the opposite?
> 
> As said above, I'm out of ideas here.
> 

Same. It may be one of those things where the effort to dig up precisely
what has gone wrong is so large that it becomes not feasible relative to
the gain :(

> Thanks,
> Kurt


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-25 23:28     ` Jacob Keller
@ 2025-08-26 12:59       ` Sebastian Andrzej Siewior
  2025-08-26 18:23         ` Jacob Keller
  2025-08-27 13:57         ` Miroslav Lichvar
  0 siblings, 2 replies; 24+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-08-26 12:59 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, Miroslav Lichvar, intel-wired-lan, netdev

On 2025-08-25 16:28:38 [-0700], Jacob Keller wrote:
> Ya, I don't think we fully understand either. Miroslav said he tested on
> I350 which is a different MAC from the I210, so it could be something
> there. Theoretically we could handle just I210 directly in the interrupt
> and leave the other variants to the kworker.. but I don't know how much
> benefit we get from that. The data sheet for the I350 appears to have
> more or less the same logic for Tx timestamps. It is significantly
> different for Rx timestamps though.

From logical point of view it makes sense to retrieve the HW timestamp
immediately when it becomes available and feed it to the stack. I can't
imagine how delaying it to yet another thread improves the situation.
The benchmark is about > 1k packets/ second while in reality you have
less than 20 packets a second. With multiple applications you usually
need a "second timestamp register" or you may lose packets.

Delaying it to the AUX worker makes sense for hardware which can't fire
an interrupt and polling is the only option left. This is sane in this
case but I don't like this solution as some kind compromise for
everyone. Simply because it adds overhead and requires additional
configuration.

> > Also I couldn't really see a performance degradation with ntpperf. In my
> > tests the IRQ variant reached an equal or higher rate. But sometimes I
> > get 'Could not send requests at rate X'. No idea what that means.
> > 
> > Anyway, this patch is basically a compromise. It works for Miroslav and
> > my use case.
> > 
> >> This is also what the igc does and the performance improved
> >> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
> >>
> 
> igc supports several hardware variations which are all a lot similar to
> i210 than i350 is to i210 in igb. I could see this working fine for i210
> if it works fine in igb.. I honestly am at a loss currently why i350 is
> much worse.
>
> >> and here it causes the opposite?
> > 
> > As said above, I'm out of ideas here.
> > 
> 
> Same. It may be one of those things where the effort to dig up precisely
> what has gone wrong is so large that it becomes not feasible relative to
> the gain :(

Could we please use the direct retrieval/ submission for HW which
supports it and fallback to the AUX worker (instead of the kworker) for
HW which does not have an interrupt for it?

> > Thanks,
> > Kurt

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-26 12:59       ` Sebastian Andrzej Siewior
@ 2025-08-26 18:23         ` Jacob Keller
  2025-08-27 12:57           ` Kurt Kanzenbach
  2025-08-27 13:57         ` Miroslav Lichvar
  1 sibling, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2025-08-26 18:23 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, Miroslav Lichvar, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 2956 bytes --]



On 8/26/2025 5:59 AM, Sebastian Andrzej Siewior wrote:
> On 2025-08-25 16:28:38 [-0700], Jacob Keller wrote:
>> Ya, I don't think we fully understand either. Miroslav said he tested on
>> I350 which is a different MAC from the I210, so it could be something
>> there. Theoretically we could handle just I210 directly in the interrupt
>> and leave the other variants to the kworker.. but I don't know how much
>> benefit we get from that. The data sheet for the I350 appears to have
>> more or less the same logic for Tx timestamps. It is significantly
>> different for Rx timestamps though.
> 
> From logical point of view it makes sense to retrieve the HW timestamp
> immediately when it becomes available and feed it to the stack. I can't
> imagine how delaying it to yet another thread improves the situation.
> The benchmark is about > 1k packets/ second while in reality you have
> less than 20 packets a second. With multiple applications you usually
> need a "second timestamp register" or you may lose packets.
> 
> Delaying it to the AUX worker makes sense for hardware which can't fire
> an interrupt and polling is the only option left. This is sane in this
> case but I don't like this solution as some kind compromise for
> everyone. Simply because it adds overhead and requires additional
> configuration.
> 

I agree. Its just frustrating that doing so appears to cause a
regression in at least one test setup on hardware which uses this method.

>>> Also I couldn't really see a performance degradation with ntpperf. In my
>>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>>> get 'Could not send requests at rate X'. No idea what that means.
>>>
>>> Anyway, this patch is basically a compromise. It works for Miroslav and
>>> my use case.
>>>
>>>> This is also what the igc does and the performance improved
>>>> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>>>>
>>
>> igc supports several hardware variations which are all a lot similar to
>> i210 than i350 is to i210 in igb. I could see this working fine for i210
>> if it works fine in igb.. I honestly am at a loss currently why i350 is
>> much worse.
>>
>>>> and here it causes the opposite?
>>>
>>> As said above, I'm out of ideas here.
>>>
>>
>> Same. It may be one of those things where the effort to dig up precisely
>> what has gone wrong is so large that it becomes not feasible relative to
>> the gain :(
> 
> Could we please use the direct retrieval/ submission for HW which
> supports it and fallback to the AUX worker (instead of the kworker) for
> HW which does not have an interrupt for it?
> 

I have no objection. Perhaps we could assume the high end of the ntpperf
benchmark is not reflective of normal use case? We *are* limited to only
one timestamp register, which the igb driver does protect by bitlock.

>>> Thanks,
>>> Kurt
> 
> Sebastian


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-26 18:23         ` Jacob Keller
@ 2025-08-27 12:57           ` Kurt Kanzenbach
  2025-08-27 13:39             ` Paul Menzel
  0 siblings, 1 reply; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-27 12:57 UTC (permalink / raw)
  To: Jacob Keller, Sebastian Andrzej Siewior
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Paul Menzel, Vadim Fedorenko,
	Miroslav Lichvar, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 3126 bytes --]

On Tue Aug 26 2025, Jacob Keller wrote:
> On 8/26/2025 5:59 AM, Sebastian Andrzej Siewior wrote:
>> On 2025-08-25 16:28:38 [-0700], Jacob Keller wrote:
>>> Ya, I don't think we fully understand either. Miroslav said he tested on
>>> I350 which is a different MAC from the I210, so it could be something
>>> there. Theoretically we could handle just I210 directly in the interrupt
>>> and leave the other variants to the kworker.. but I don't know how much
>>> benefit we get from that. The data sheet for the I350 appears to have
>>> more or less the same logic for Tx timestamps. It is significantly
>>> different for Rx timestamps though.
>> 
>> From logical point of view it makes sense to retrieve the HW timestamp
>> immediately when it becomes available and feed it to the stack. I can't
>> imagine how delaying it to yet another thread improves the situation.
>> The benchmark is about > 1k packets/ second while in reality you have
>> less than 20 packets a second. With multiple applications you usually
>> need a "second timestamp register" or you may lose packets.
>> 
>> Delaying it to the AUX worker makes sense for hardware which can't fire
>> an interrupt and polling is the only option left. This is sane in this
>> case but I don't like this solution as some kind compromise for
>> everyone. Simply because it adds overhead and requires additional
>> configuration.
>> 
>
> I agree. Its just frustrating that doing so appears to cause a
> regression in at least one test setup on hardware which uses this method.
>
>>>> Also I couldn't really see a performance degradation with ntpperf. In my
>>>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>>>> get 'Could not send requests at rate X'. No idea what that means.
>>>>
>>>> Anyway, this patch is basically a compromise. It works for Miroslav and
>>>> my use case.
>>>>
>>>>> This is also what the igc does and the performance improved
>>>>> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>>>>>
>>>
>>> igc supports several hardware variations which are all a lot similar to
>>> i210 than i350 is to i210 in igb. I could see this working fine for i210
>>> if it works fine in igb.. I honestly am at a loss currently why i350 is
>>> much worse.
>>>
>>>>> and here it causes the opposite?
>>>>
>>>> As said above, I'm out of ideas here.
>>>>
>>>
>>> Same. It may be one of those things where the effort to dig up precisely
>>> what has gone wrong is so large that it becomes not feasible relative to
>>> the gain :(
>> 
>> Could we please use the direct retrieval/ submission for HW which
>> supports it and fallback to the AUX worker (instead of the kworker) for
>> HW which does not have an interrupt for it?
>> 
>
> I have no objection. Perhaps we could assume the high end of the ntpperf
> benchmark is not reflective of normal use case? We *are* limited to only
> one timestamp register, which the igb driver does protect by bitlock.

Does that mean we're going back to v1 + the AUX worker for 82576? Let me
prepare v3 then.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 12:57           ` Kurt Kanzenbach
@ 2025-08-27 13:39             ` Paul Menzel
  2025-08-27 16:22               ` Jacob Keller
  0 siblings, 1 reply; 24+ messages in thread
From: Paul Menzel @ 2025-08-27 13:39 UTC (permalink / raw)
  To: Kurt Kanzenbach
  Cc: Jacob Keller, Sebastian Andrzej Siewior, Tony Nguyen,
	Przemek Kitszel, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Vinicius Costa Gomes, Vadim Fedorenko, Miroslav Lichvar,
	intel-wired-lan, netdev

Dear Linux folks,


A very interesting issue.

Am 27.08.25 um 14:57 schrieb Kurt Kanzenbach:
> On Tue Aug 26 2025, Jacob Keller wrote:
>> On 8/26/2025 5:59 AM, Sebastian Andrzej Siewior wrote:
>>> On 2025-08-25 16:28:38 [-0700], Jacob Keller wrote:
>>>> Ya, I don't think we fully understand either. Miroslav said he tested on
>>>> I350 which is a different MAC from the I210, so it could be something
>>>> there. Theoretically we could handle just I210 directly in the interrupt
>>>> and leave the other variants to the kworker.. but I don't know how much
>>>> benefit we get from that. The data sheet for the I350 appears to have
>>>> more or less the same logic for Tx timestamps. It is significantly
>>>> different for Rx timestamps though.
>>>
>>> From logical point of view it makes sense to retrieve the HW timestamp
>>> immediately when it becomes available and feed it to the stack. I can't
>>> imagine how delaying it to yet another thread improves the situation.
>>> The benchmark is about > 1k packets/ second while in reality you have
>>> less than 20 packets a second. With multiple applications you usually
>>> need a "second timestamp register" or you may lose packets.
>>>
>>> Delaying it to the AUX worker makes sense for hardware which can't fire
>>> an interrupt and polling is the only option left. This is sane in this
>>> case but I don't like this solution as some kind compromise for
>>> everyone. Simply because it adds overhead and requires additional
>>> configuration.
>>
>> I agree. Its just frustrating that doing so appears to cause a
>> regression in at least one test setup on hardware which uses this method.
>>
>>>>> Also I couldn't really see a performance degradation with ntpperf. In my
>>>>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>>>>> get 'Could not send requests at rate X'. No idea what that means.
>>>>>
>>>>> Anyway, this patch is basically a compromise. It works for Miroslav and
>>>>> my use case.
>>>>>
>>>>>> This is also what the igc does and the performance improved
>>>>>> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>>>>
>>>> igc supports several hardware variations which are all a lot similar to
>>>> i210 than i350 is to i210 in igb. I could see this working fine for i210
>>>> if it works fine in igb.. I honestly am at a loss currently why i350 is
>>>> much worse.
>>>>
>>>>>> and here it causes the opposite?
>>>>>
>>>>> As said above, I'm out of ideas here.
>>>>
>>>> Same. It may be one of those things where the effort to dig up precisely
>>>> what has gone wrong is so large that it becomes not feasible relative to
>>>> the gain :(
>>>
>>> Could we please use the direct retrieval/ submission for HW which
>>> supports it and fallback to the AUX worker (instead of the kworker) for
>>> HW which does not have an interrupt for it?
>>
>> I have no objection. Perhaps we could assume the high end of the ntpperf
>> benchmark is not reflective of normal use case? We *are* limited to only
>> one timestamp register, which the igb driver does protect by bitlock.
> 
> Does that mean we're going back to v1 + the AUX worker for 82576? Let me
> prepare v3 then.

Good question. Personally, I’d interpret Linux’ no-regression-policy 
that, if a possible regression is known, even for a synthetic benchmark, 
it must not be introduced unrelated how upsetting this is. So the 
current approach needs to be taken.


Kind regards,

Paul

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-26 12:59       ` Sebastian Andrzej Siewior
  2025-08-26 18:23         ` Jacob Keller
@ 2025-08-27 13:57         ` Miroslav Lichvar
  2025-08-27 14:05           ` Kurt Kanzenbach
  2025-08-27 14:10           ` Sebastian Andrzej Siewior
  1 sibling, 2 replies; 24+ messages in thread
From: Miroslav Lichvar @ 2025-08-27 13:57 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Jacob Keller, Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev

On Tue, Aug 26, 2025 at 02:59:12PM +0200, Sebastian Andrzej Siewior wrote:
> The benchmark is about > 1k packets/ second while in reality you have
> less than 20 packets a second.

I don't want to argue about which use case is more important, but it's
normal for NTP servers to receive requests at much higher rates than
that. In some countries, public servers get hundreds of thousands of
packets per second. A server in a local network may have clients
polling 128 times per second each.

Anyway, if anyone is still interested in finding out the cause of
the regression, there is a thing I forgot to mention for the
reproducer using ntpperf. chronyd needs to be configured with a larger
clientloglimit (e.g. clientloglimit 100000000), otherwise it won't be
able to respond to the large number of clients in interleaved mode
with a HW TX timestamp. The chronyc serverstats report would show
that. It should look like the outputs I posted here before.

-- 
Miroslav Lichvar


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 13:57         ` Miroslav Lichvar
@ 2025-08-27 14:05           ` Kurt Kanzenbach
  2025-08-27 14:10           ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 24+ messages in thread
From: Kurt Kanzenbach @ 2025-08-27 14:05 UTC (permalink / raw)
  To: Miroslav Lichvar, Sebastian Andrzej Siewior
  Cc: Jacob Keller, Tony Nguyen, Przemek Kitszel, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev

[-- Attachment #1: Type: text/plain, Size: 1023 bytes --]

On Wed Aug 27 2025, Miroslav Lichvar wrote:
> On Tue, Aug 26, 2025 at 02:59:12PM +0200, Sebastian Andrzej Siewior wrote:
>> The benchmark is about > 1k packets/ second while in reality you have
>> less than 20 packets a second.
>
> I don't want to argue about which use case is more important, but it's
> normal for NTP servers to receive requests at much higher rates than
> that. In some countries, public servers get hundreds of thousands of
> packets per second. A server in a local network may have clients
> polling 128 times per second each.
>
> Anyway, if anyone is still interested in finding out the cause of
> the regression, there is a thing I forgot to mention for the
> reproducer using ntpperf. chronyd needs to be configured with a larger
> clientloglimit (e.g. clientloglimit 100000000), otherwise it won't be
> able to respond to the large number of clients in interleaved mode
> with a HW TX timestamp.

Yeah, I realized that myself while testing :). The default
clientloglimit is too low.

Thanks,
Kurt

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 861 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 13:57         ` Miroslav Lichvar
  2025-08-27 14:05           ` Kurt Kanzenbach
@ 2025-08-27 14:10           ` Sebastian Andrzej Siewior
  2025-08-27 14:41             ` Miroslav Lichvar
  1 sibling, 1 reply; 24+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-08-27 14:10 UTC (permalink / raw)
  To: Miroslav Lichvar
  Cc: Jacob Keller, Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev

On 2025-08-27 15:57:01 [+0200], Miroslav Lichvar wrote:
> On Tue, Aug 26, 2025 at 02:59:12PM +0200, Sebastian Andrzej Siewior wrote:
> > The benchmark is about > 1k packets/ second while in reality you have
> > less than 20 packets a second.
> 
> I don't want to argue about which use case is more important, but it's
> normal for NTP servers to receive requests at much higher rates than
> that. In some countries, public servers get hundreds of thousands of
> packets per second. A server in a local network may have clients
> polling 128 times per second each.

There might be a misunderstanding here. You can't receive 1k packets a
second and each one with a HW timestamp for PTP. This does not work.
SW timestamps more likely.

> Anyway, if anyone is still interested in finding out the cause of
> the regression, there is a thing I forgot to mention for the
> reproducer using ntpperf. chronyd needs to be configured with a larger
> clientloglimit (e.g. clientloglimit 100000000), otherwise it won't be
> able to respond to the large number of clients in interleaved mode
> with a HW TX timestamp. The chronyc serverstats report would show
> that. It should look like the outputs I posted here before.

How does this work with HW timestamps vs SW? I can't believe that 1k
packets are sent and all of them receive a HW timestamp.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 14:10           ` Sebastian Andrzej Siewior
@ 2025-08-27 14:41             ` Miroslav Lichvar
  2025-08-27 14:52               ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 24+ messages in thread
From: Miroslav Lichvar @ 2025-08-27 14:41 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Jacob Keller, Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev

On Wed, Aug 27, 2025 at 04:10:47PM +0200, Sebastian Andrzej Siewior wrote:
> On 2025-08-27 15:57:01 [+0200], Miroslav Lichvar wrote:
> > Anyway, if anyone is still interested in finding out the cause of
> > the regression, there is a thing I forgot to mention for the
> > reproducer using ntpperf. chronyd needs to be configured with a larger
> > clientloglimit (e.g. clientloglimit 100000000), otherwise it won't be
> > able to respond to the large number of clients in interleaved mode
> > with a HW TX timestamp. The chronyc serverstats report would show
> > that. It should look like the outputs I posted here before.
> 
> How does this work with HW timestamps vs SW? I can't believe that 1k
> packets are sent and all of them receive a HW timestamp.

From the results I posted before, the machine (CPU Intel E3-1220) with
the I350 NIC can provide about 59k HW TX timestamps per second without
any of the patches, about 41k with the original patch, and about 52k
with this patch and pinned aux worker.

The difference between ptp4l and chronyd is that chronyd is
asynchronous. It saves the timestamps as they come from the error
queue and provides the best timestamp to the clients later in their
subsequent response (NTP interleaved mode). At higher rates it's
random, only some of the packets get a HW timestamp. But the clients
can see less accurate (SW) timestamps as an increase in the measured
delay and they can filter them out if their clock is sufficiently
stable and the interval between HW timestamps is not too long. 

-- 
Miroslav Lichvar


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 14:41             ` Miroslav Lichvar
@ 2025-08-27 14:52               ` Sebastian Andrzej Siewior
  2025-08-27 16:21                 ` Jacob Keller
  0 siblings, 1 reply; 24+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-08-27 14:52 UTC (permalink / raw)
  To: Miroslav Lichvar
  Cc: Jacob Keller, Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev

On 2025-08-27 16:41:09 [+0200], Miroslav Lichvar wrote:
> From the results I posted before, the machine (CPU Intel E3-1220) with
> the I350 NIC can provide about 59k HW TX timestamps per second without
> any of the patches, about 41k with the original patch, and about 52k
> with this patch and pinned aux worker.

I might have similar hardware with a i350 to give it a try.
The old worker approach and the pinned AUX worker are identical from
software design (or: I am not aware of any significant differences worth
to mention here). Therefore I don't understand why the one had 59k and
the other 52k.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 14:52               ` Sebastian Andrzej Siewior
@ 2025-08-27 16:21                 ` Jacob Keller
  0 siblings, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2025-08-27 16:21 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Miroslav Lichvar
  Cc: Kurt Kanzenbach, Tony Nguyen, Przemek Kitszel, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Richard Cochran, Vinicius Costa Gomes, Paul Menzel,
	Vadim Fedorenko, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 805 bytes --]



On 8/27/2025 7:52 AM, Sebastian Andrzej Siewior wrote:
> On 2025-08-27 16:41:09 [+0200], Miroslav Lichvar wrote:
>> From the results I posted before, the machine (CPU Intel E3-1220) with
>> the I350 NIC can provide about 59k HW TX timestamps per second without
>> any of the patches, about 41k with the original patch, and about 52k
>> with this patch and pinned aux worker.
> 
> I might have similar hardware with a i350 to give it a try.
> The old worker approach and the pinned AUX worker are identical from
> software design (or: I am not aware of any significant differences worth
> to mention here). Therefore I don't understand why the one had 59k and
> the other 52k.
> 
> Sebastian

Right. If we can get further reproduction of this setup that would be good.

Thanks,
Jake

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker
  2025-08-27 13:39             ` Paul Menzel
@ 2025-08-27 16:22               ` Jacob Keller
  0 siblings, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2025-08-27 16:22 UTC (permalink / raw)
  To: Paul Menzel, Kurt Kanzenbach
  Cc: Sebastian Andrzej Siewior, Tony Nguyen, Przemek Kitszel,
	Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Richard Cochran, Vinicius Costa Gomes,
	Vadim Fedorenko, Miroslav Lichvar, intel-wired-lan, netdev


[-- Attachment #1.1: Type: text/plain, Size: 3898 bytes --]



On 8/27/2025 6:39 AM, Paul Menzel wrote:
> Dear Linux folks,
> 
> 
> A very interesting issue.
> 
> Am 27.08.25 um 14:57 schrieb Kurt Kanzenbach:
>> On Tue Aug 26 2025, Jacob Keller wrote:
>>> On 8/26/2025 5:59 AM, Sebastian Andrzej Siewior wrote:
>>>> On 2025-08-25 16:28:38 [-0700], Jacob Keller wrote:
>>>>> Ya, I don't think we fully understand either. Miroslav said he tested on
>>>>> I350 which is a different MAC from the I210, so it could be something
>>>>> there. Theoretically we could handle just I210 directly in the interrupt
>>>>> and leave the other variants to the kworker.. but I don't know how much
>>>>> benefit we get from that. The data sheet for the I350 appears to have
>>>>> more or less the same logic for Tx timestamps. It is significantly
>>>>> different for Rx timestamps though.
>>>>
>>>> From logical point of view it makes sense to retrieve the HW timestamp
>>>> immediately when it becomes available and feed it to the stack. I can't
>>>> imagine how delaying it to yet another thread improves the situation.
>>>> The benchmark is about > 1k packets/ second while in reality you have
>>>> less than 20 packets a second. With multiple applications you usually
>>>> need a "second timestamp register" or you may lose packets.
>>>>
>>>> Delaying it to the AUX worker makes sense for hardware which can't fire
>>>> an interrupt and polling is the only option left. This is sane in this
>>>> case but I don't like this solution as some kind compromise for
>>>> everyone. Simply because it adds overhead and requires additional
>>>> configuration.
>>>
>>> I agree. Its just frustrating that doing so appears to cause a
>>> regression in at least one test setup on hardware which uses this method.
>>>
>>>>>> Also I couldn't really see a performance degradation with ntpperf. In my
>>>>>> tests the IRQ variant reached an equal or higher rate. But sometimes I
>>>>>> get 'Could not send requests at rate X'. No idea what that means.
>>>>>>
>>>>>> Anyway, this patch is basically a compromise. It works for Miroslav and
>>>>>> my use case.
>>>>>>
>>>>>>> This is also what the igc does and the performance improved
>>>>>>> 	afa141583d827 ("igc: Retrieve TX timestamp during interrupt handling")
>>>>>
>>>>> igc supports several hardware variations which are all a lot similar to
>>>>> i210 than i350 is to i210 in igb. I could see this working fine for i210
>>>>> if it works fine in igb.. I honestly am at a loss currently why i350 is
>>>>> much worse.
>>>>>
>>>>>>> and here it causes the opposite?
>>>>>>
>>>>>> As said above, I'm out of ideas here.
>>>>>
>>>>> Same. It may be one of those things where the effort to dig up precisely
>>>>> what has gone wrong is so large that it becomes not feasible relative to
>>>>> the gain :(
>>>>
>>>> Could we please use the direct retrieval/ submission for HW which
>>>> supports it and fallback to the AUX worker (instead of the kworker) for
>>>> HW which does not have an interrupt for it?
>>>
>>> I have no objection. Perhaps we could assume the high end of the ntpperf
>>> benchmark is not reflective of normal use case? We *are* limited to only
>>> one timestamp register, which the igb driver does protect by bitlock.
>>
>> Does that mean we're going back to v1 + the AUX worker for 82576? Let me
>> prepare v3 then.
> 
> Good question. Personally, I’d interpret Linux’ no-regression-policy 
> that, if a possible regression is known, even for a synthetic benchmark, 
> it must not be introduced unrelated how upsetting this is. So the 
> current approach needs to be taken.
> 
> 
> Kind regards,
> 
> Paul

Another option is a v3 with AUX worker for 82575 and i350, but direct
in-interrupt for i210 since so far that doesn't seem to have a regression.

Or perhaps Sebastian can figure out something further about the
reproduction.

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2025-08-27 16:22 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-22  7:28 [PATCH iwl-next v2] igb: Convert Tx timestamping to PTP aux worker Kurt Kanzenbach
2025-08-22  7:52 ` Sebastian Andrzej Siewior
2025-08-22 23:55   ` Jacob Keller
2025-08-23  7:29   ` Kurt Kanzenbach
2025-08-25  7:53     ` Miroslav Lichvar
2025-08-25  9:22       ` Kurt Kanzenbach
2025-08-25 23:23         ` [Intel-wired-lan] " Jacob Keller
2025-08-25 23:28     ` Jacob Keller
2025-08-26 12:59       ` Sebastian Andrzej Siewior
2025-08-26 18:23         ` Jacob Keller
2025-08-27 12:57           ` Kurt Kanzenbach
2025-08-27 13:39             ` Paul Menzel
2025-08-27 16:22               ` Jacob Keller
2025-08-27 13:57         ` Miroslav Lichvar
2025-08-27 14:05           ` Kurt Kanzenbach
2025-08-27 14:10           ` Sebastian Andrzej Siewior
2025-08-27 14:41             ` Miroslav Lichvar
2025-08-27 14:52               ` Sebastian Andrzej Siewior
2025-08-27 16:21                 ` Jacob Keller
2025-08-22 16:27 ` Vadim Fedorenko
2025-08-23  7:44   ` Kurt Kanzenbach
2025-08-25 13:18     ` Vadim Fedorenko
2025-08-25 23:24       ` Jacob Keller
2025-08-25 10:58 ` [Intel-wired-lan] " Loktionov, Aleksandr

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).