* [PATCH iwl-net v3 1/6] igc: Ensure the PTM cycle is reliably triggered
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts Christopher S M Hall
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen,
Michal Swiatkowski, Mor Bar-Gabay, Avigail Dahan
Writing to clear the PTM status 'valid' bit while the PTM cycle is
triggered results in unreliable PTM operation. To fix this, clear the
PTM 'trigger' and status after each PTM transaction.
The issue can be reproduced with the following:
$ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
quickly reproduce the issue.
PHC2SYS exits with:
"ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
fails
Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()")
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc_defines.h | 1 +
drivers/net/ethernet/intel/igc/igc_ptp.c | 70 ++++++++++++--------
2 files changed, 42 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 8e449904aa7d..2ff292f5f63b 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -593,6 +593,7 @@
#define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */
#define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */
#define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */
+#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */
/* PCIe PTM Cycle Control */
#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 946edbad4302..c640e346342b 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -974,13 +974,40 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
}
}
+static void igc_ptm_trigger(struct igc_hw *hw)
+{
+ u32 ctrl;
+
+ /* To "manually" start the PTM cycle we need to set the
+ * trigger (TRIG) bit
+ */
+ ctrl = rd32(IGC_PTM_CTRL);
+ ctrl |= IGC_PTM_CTRL_TRIG;
+ wr32(IGC_PTM_CTRL, ctrl);
+ /* Perform flush after write to CTRL register otherwise
+ * transaction may not start
+ */
+ wrfl();
+}
+
+static void igc_ptm_reset(struct igc_hw *hw)
+{
+ u32 ctrl;
+
+ ctrl = rd32(IGC_PTM_CTRL);
+ ctrl &= ~IGC_PTM_CTRL_TRIG;
+ wr32(IGC_PTM_CTRL, ctrl);
+ /* Write to clear all status */
+ wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL);
+}
+
static int igc_phc_get_syncdevicetime(ktime_t *device,
struct system_counterval_t *system,
void *ctx)
{
- u32 stat, t2_curr_h, t2_curr_l, ctrl;
struct igc_adapter *adapter = ctx;
struct igc_hw *hw = &adapter->hw;
+ u32 stat, t2_curr_h, t2_curr_l;
int err, count = 100;
ktime_t t1, t2_curr;
@@ -994,25 +1021,13 @@ static int igc_phc_get_syncdevicetime(ktime_t *device,
* are transitory. Repeating the process returns valid
* data eventually.
*/
-
- /* To "manually" start the PTM cycle we need to clear and
- * then set again the TRIG bit.
- */
- ctrl = rd32(IGC_PTM_CTRL);
- ctrl &= ~IGC_PTM_CTRL_TRIG;
- wr32(IGC_PTM_CTRL, ctrl);
- ctrl |= IGC_PTM_CTRL_TRIG;
- wr32(IGC_PTM_CTRL, ctrl);
-
- /* The cycle only starts "for real" when software notifies
- * that it has read the registers, this is done by setting
- * VALID bit.
- */
- wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+ igc_ptm_trigger(hw);
err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
stat, IGC_PTM_STAT_SLEEP,
IGC_PTM_STAT_TIMEOUT);
+ igc_ptm_reset(hw);
+
if (err < 0) {
netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
return err;
@@ -1021,15 +1036,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device,
if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID)
break;
- if (stat & ~IGC_PTM_STAT_VALID) {
- /* An error occurred, log it. */
- igc_ptm_log_error(adapter, stat);
- /* The STAT register is write-1-to-clear (W1C),
- * so write the previous error status to clear it.
- */
- wr32(IGC_PTM_STAT, stat);
- continue;
- }
+ igc_ptm_log_error(adapter, stat);
} while (--count);
if (!count) {
@@ -1255,7 +1262,7 @@ void igc_ptp_stop(struct igc_adapter *adapter)
void igc_ptp_reset(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
- u32 cycle_ctrl, ctrl;
+ u32 cycle_ctrl, ctrl, stat;
unsigned long flags;
u32 timadj;
@@ -1290,14 +1297,19 @@ void igc_ptp_reset(struct igc_adapter *adapter)
ctrl = IGC_PTM_CTRL_EN |
IGC_PTM_CTRL_START_NOW |
IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) |
- IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) |
- IGC_PTM_CTRL_TRIG;
+ IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT);
wr32(IGC_PTM_CTRL, ctrl);
/* Force the first cycle to run. */
- wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+ igc_ptm_trigger(hw);
+
+ if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat,
+ stat, IGC_PTM_STAT_SLEEP,
+ IGC_PTM_STAT_TIMEOUT))
+ netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
+ igc_ptm_reset(hw);
break;
default:
/* No work to do. */
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 1/6] igc: Ensure the PTM cycle is reliably triggered Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-06 23:13 ` [Intel-wired-lan] " Paul Menzel
2024-11-06 18:47 ` [PATCH iwl-net v3 3/6] igc: Move ktime snapshot into PTM retry loop Christopher S M Hall
` (5 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen,
Michal Swiatkowski, Mor Bar-Gabay, Avigail Dahan
Lengthen the hardware retry timer to four microseconds.
The i225/i226 hardware retries if it receives an inappropriate response
from the upstream device. If the device retries too quickly, the root
port does not respond.
The issue can be reproduced with the following:
$ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
quickly reproduce the issue.
PHC2SYS exits with:
"ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
fails
Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us")
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc_defines.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 2ff292f5f63b..84521a4c35b4 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -574,7 +574,7 @@
#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2)
#define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
-#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */
+#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */
#define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */
#define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
2024-11-06 18:47 ` [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts Christopher S M Hall
@ 2024-11-06 23:13 ` Paul Menzel
2024-11-06 23:53 ` Hall, Christopher S
0 siblings, 1 reply; 13+ messages in thread
From: Paul Menzel @ 2024-11-06 23:13 UTC (permalink / raw)
To: Christopher S M Hall
Cc: intel-wired-lan, david.zage, vinicius.gomes, netdev,
rodrigo.cadore, vinschen, Michal Swiatkowski, Mor Bar-Gabay,
Avigail Dahan
Dear Christopher,
Thank you for the patch.
I’d use the more specific summary/title below:
igc: Lengthen hardware retry time to 4 μs to prevent timeouts
Am 06.11.24 um 19:47 schrieb Christopher S M Hall:
> Lengthen the hardware retry timer to four microseconds.
>
> The i225/i226 hardware retries if it receives an inappropriate response
> from the upstream device. If the device retries too quickly, the root
> port does not respond.
Any idea why? Is it documented somewhere?
> The issue can be reproduced with the following:
>
> $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
>
> Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
> quickly reproduce the issue.
>
> PHC2SYS exits with:
>
> "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
> fails
Why four microseconds, and not some other value?
> Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us")
> Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
> Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
> Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
> Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
> ---
> drivers/net/ethernet/intel/igc/igc_defines.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
> index 2ff292f5f63b..84521a4c35b4 100644
> --- a/drivers/net/ethernet/intel/igc/igc_defines.h
> +++ b/drivers/net/ethernet/intel/igc/igc_defines.h
> @@ -574,7 +574,7 @@
> #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2)
> #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
>
> -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */
> +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */
> #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */
> #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */
Kind regards,
Paul
^ permalink raw reply [flat|nested] 13+ messages in thread* RE: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
2024-11-06 23:13 ` [Intel-wired-lan] " Paul Menzel
@ 2024-11-06 23:53 ` Hall, Christopher S
2024-11-07 5:48 ` Paul Menzel
0 siblings, 1 reply; 13+ messages in thread
From: Hall, Christopher S @ 2024-11-06 23:53 UTC (permalink / raw)
To: Paul Menzel
Cc: intel-wired-lan@lists.osuosl.org, Zage, David, Gomes, Vinicius,
netdev@vger.kernel.org, Cadore Cataldo, Rodrigo,
Vinschen, Corinna, Michal Swiatkowski, Bar Gabay, MorX,
Dahan, AvigailX
Hi Paul,
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Wednesday, November 06, 2024 3:14 PM
> Subject: Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the
> hardware retry time to prevent timeouts
>
> Dear Christopher,
>
>
> Thank you for the patch.
>
> I’d use the more specific summary/title below:
Will do.
>
> igc: Lengthen hardware retry time to 4 μs to prevent timeouts
>
> Am 06.11.24 um 19:47 schrieb Christopher S M Hall:
> > Lengthen the hardware retry timer to four microseconds.
> >
> > The i225/i226 hardware retries if it receives an inappropriate response
> > from the upstream device. If the device retries too quickly, the root
> > port does not respond.
>
> Any idea why? Is it documented somewhere?
I do not. Theoretically, 1 us should work, but it does not. It could be a root
port problem or an issue with i225/i226 NIC. I am not able to directly observe
the state of either. 4 us has worked in all my testing I am comfortable with
that value. 2 us also works, but given the limited hardware at my disposal
I doubled the value to 4 us to be safe. PTM is not time critical. Typically,
software initiates a transaction between 8 and 32 times per second. There
is no performance impact for PTM or any other function of the card. The
timeout occurs rarely, but if the retry time is too short the PTM state
machine does not recover.
> > The issue can be reproduced with the following:
> >
> > $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
> >
> > Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
> > quickly reproduce the issue.
> >
> > PHC2SYS exits with:
> >
> > "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM
> transaction
> > fails
>
> Why four microseconds, and not some other value?
See above.
> > Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us")
> >
> > -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle
> interval */
> > +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle
> interval */
> Kind regards,
>
> Paul
Thanks,
Chris
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
2024-11-06 23:53 ` Hall, Christopher S
@ 2024-11-07 5:48 ` Paul Menzel
2024-11-07 5:56 ` Paul Menzel
0 siblings, 1 reply; 13+ messages in thread
From: Paul Menzel @ 2024-11-07 5:48 UTC (permalink / raw)
To: Christopher S Hall
Cc: intel-wired-lan, David Zage, Vinicius Gomes, netdev,
Cadore Cataldo, Rodrigo, Corinna Vinschen, Michal Swiatkowski,
Mor Bar Gabay, Avigail Dahan, Sasha Neftin
[Cc: +Sasha]
Dear Christopher,
Am 07.11.24 um 00:53 schrieb Hall, Christopher S:
>> From: Paul Menzel <pmenzel@molgen.mpg.de>
>> Sent: Wednesday, November 06, 2024 3:14 PM
>
>> Subject: Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the
>> hardware retry time to prevent timeouts
>> I’d use the more specific summary/title below:
>
> Will do.
>
>> igc: Lengthen hardware retry time to 4 μs to prevent timeouts
>>
>> Am 06.11.24 um 19:47 schrieb Christopher S M Hall:
>>> Lengthen the hardware retry timer to four microseconds.
>>>
>>> The i225/i226 hardware retries if it receives an inappropriate response
>>> from the upstream device. If the device retries too quickly, the root
>>> port does not respond.
>>
>> Any idea why? Is it documented somewhere?
>
> I do not. Theoretically, 1 us should work, but it does not. It could be a root
> port problem or an issue with i225/i226 NIC. I am not able to directly observe
> the state of either. 4 us has worked in all my testing I am comfortable with
> that value. 2 us also works, but given the limited hardware at my disposal
> I doubled the value to 4 us to be safe. PTM is not time critical. Typically,
> software initiates a transaction between 8 and 32 times per second. There
> is no performance impact for PTM or any other function of the card. The
> timeout occurs rarely, but if the retry time is too short the PTM state
> machine does not recover.
Thank you for clearing this up. If it’s not time critical, why not
revert the original patch and go back to 10 μs.
The referenced commit 6b8aa753a9f9 (igc: Decrease PTM short interval
from 10 us to 1 us) also says, that 1 μs was suggested by the hardware
team. Were you able to talk to them?
>>> The issue can be reproduced with the following:
>>>
>>> $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
>>>
>>> Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
>>> quickly reproduce the issue.
>>>
>>> PHC2SYS exits with:
>>>
>>> "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
>>> fails
>>
>> Why four microseconds, and not some other value?
>
> See above.
It’d be great, if you extended the commit message.
>>> Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us")
>>>
>>> -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */
>>> +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */
Maybe also add a comment, that 1 μs should work, but does not.
Kind regards,
Paul
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
2024-11-07 5:48 ` Paul Menzel
@ 2024-11-07 5:56 ` Paul Menzel
0 siblings, 0 replies; 13+ messages in thread
From: Paul Menzel @ 2024-11-07 5:56 UTC (permalink / raw)
To: Christopher S Hall
Cc: intel-wired-lan, David Zage, Vinicius Gomes, netdev,
Cadore Cataldo, Rodrigo, Corinna Vinschen, Michal Swiatkowski,
Mor Bar Gabay, Avigail Dahan
[Cc: -Sasha, 550 #5.1.0 Address rejected.]
Am 07.11.24 um 06:48 schrieb Paul Menzel:
> [Cc: +Sasha]
>
> Dear Christopher,
>
>
> Am 07.11.24 um 00:53 schrieb Hall, Christopher S:
>
>>> From: Paul Menzel <pmenzel@molgen.mpg.de>
>>> Sent: Wednesday, November 06, 2024 3:14 PM
>>
>>> Subject: Re: [Intel-wired-lan] [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts
>
>>> I’d use the more specific summary/title below:
>>
>> Will do.
>>
>>> igc: Lengthen hardware retry time to 4 μs to prevent timeouts
>>>
>>> Am 06.11.24 um 19:47 schrieb Christopher S M Hall:
>>>> Lengthen the hardware retry timer to four microseconds.
>>>>
>>>> The i225/i226 hardware retries if it receives an inappropriate response
>>>> from the upstream device. If the device retries too quickly, the root
>>>> port does not respond.
>>>
>>> Any idea why? Is it documented somewhere?
>>
>> I do not. Theoretically, 1 us should work, but it does not. It could be a root
>> port problem or an issue with i225/i226 NIC. I am not able to directly observe
>> the state of either. 4 us has worked in all my testing I am comfortable with
>> that value. 2 us also works, but given the limited hardware at my disposal
>> I doubled the value to 4 us to be safe. PTM is not time critical. Typically,
>> software initiates a transaction between 8 and 32 times per second. There
>> is no performance impact for PTM or any other function of the card. The
>> timeout occurs rarely, but if the retry time is too short the PTM state
>> machine does not recover.
>
> Thank you for clearing this up. If it’s not time critical, why not
> revert the original patch and go back to 10 μs.
>
> The referenced commit 6b8aa753a9f9 (igc: Decrease PTM short interval
> from 10 us to 1 us) also says, that 1 μs was suggested by the hardware
> team. Were you able to talk to them?
>
>>>> The issue can be reproduced with the following:
>>>>
>>>> $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
>>>>
>>>> Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
>>>> quickly reproduce the issue.
>>>>
>>>> PHC2SYS exits with:
>>>>
>>>> "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
>>>> fails
>>>
>>> Why four microseconds, and not some other value?
>>
>> See above.
>
> It’d be great, if you extended the commit message.
>
>>>> Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to
>>>> 1 us")
>>>>
>>>> -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */
>>>> +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */
>
> Maybe also add a comment, that 1 μs should work, but does not.
>
>
> Kind regards,
>
> Paul
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH iwl-net v3 3/6] igc: Move ktime snapshot into PTM retry loop
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 1/6] igc: Ensure the PTM cycle is reliably triggered Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 2/6] igc: Lengthen the hardware retry time to prevent timeouts Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 4/6] igc: Handle the IGC_PTP_ENABLED flag correctly Christopher S M Hall
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen,
Michal Swiatkowski, Mor Bar-Gabay, Avigail Dahan
Move ktime_get_snapshot() into the loop. If a retry does occur, a more
recent snapshot will result in a more accurate cross-timestamp.
Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()")
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Mor Bar-Gabay <morx.bar.gabay@intel.com>
Tested-by: Avigail Dahan <avigailx.dahan@intel.com>
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc_ptp.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index c640e346342b..516abe7405de 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -1011,16 +1011,16 @@ static int igc_phc_get_syncdevicetime(ktime_t *device,
int err, count = 100;
ktime_t t1, t2_curr;
- /* Get a snapshot of system clocks to use as historic value. */
- ktime_get_snapshot(&adapter->snapshot);
-
+ /* Doing this in a loop because in the event of a
+ * badly timed (ha!) system clock adjustment, we may
+ * get PTM errors from the PCI root, but these errors
+ * are transitory. Repeating the process returns valid
+ * data eventually.
+ */
do {
- /* Doing this in a loop because in the event of a
- * badly timed (ha!) system clock adjustment, we may
- * get PTM errors from the PCI root, but these errors
- * are transitory. Repeating the process returns valid
- * data eventually.
- */
+ /* Get a snapshot of system clocks to use as historic value. */
+ ktime_get_snapshot(&adapter->snapshot);
+
igc_ptm_trigger(hw);
err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH iwl-net v3 4/6] igc: Handle the IGC_PTP_ENABLED flag correctly
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
` (2 preceding siblings ...)
2024-11-06 18:47 ` [PATCH iwl-net v3 3/6] igc: Move ktime snapshot into PTM retry loop Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 5/6] igc: Cleanup PTP module if probe fails Christopher S M Hall
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen
All functions in igc_ptp.c called from igc_main.c should check the
IGC_PTP_ENABLED flag. Adding check for this flag to stop and reset
functions.
Fixes: 5f2958052c58 ("igc: Add basic skeleton for PTP")
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc_ptp.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 516abe7405de..343205bffc35 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -1244,8 +1244,12 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
**/
void igc_ptp_stop(struct igc_adapter *adapter)
{
+ if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
+ return;
+
igc_ptp_suspend(adapter);
+ adapter->ptp_flags &= ~IGC_PTP_ENABLED;
if (adapter->ptp_clock) {
ptp_clock_unregister(adapter->ptp_clock);
netdev_info(adapter->netdev, "PHC removed\n");
@@ -1266,6 +1270,9 @@ void igc_ptp_reset(struct igc_adapter *adapter)
unsigned long flags;
u32 timadj;
+ if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
+ return;
+
/* reset the tstamp_config */
igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH iwl-net v3 5/6] igc: Cleanup PTP module if probe fails
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
` (3 preceding siblings ...)
2024-11-06 18:47 ` [PATCH iwl-net v3 4/6] igc: Handle the IGC_PTP_ENABLED flag correctly Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-06 18:47 ` [PATCH iwl-net v3 6/6] igc: Add lock preventing multiple simultaneous PTM transactions Christopher S M Hall
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen
Make sure that the PTP module is cleaned up if the igc_probe() fails by
calling igc_ptp_stop() on exit.
Fixes: d89f88419f99 ("igc: Add skeletal frame for Intel(R) 2.5G Ethernet Controller support")
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc_main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 6e70bca15db1..cc89b72c85af 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7169,6 +7169,7 @@ static int igc_probe(struct pci_dev *pdev,
err_register:
igc_release_hw_control(adapter);
+ igc_ptp_stop(adapter);
err_eeprom:
if (!igc_check_reset_block(hw))
igc_reset_phy(hw);
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH iwl-net v3 6/6] igc: Add lock preventing multiple simultaneous PTM transactions
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
` (4 preceding siblings ...)
2024-11-06 18:47 ` [PATCH iwl-net v3 5/6] igc: Cleanup PTP module if probe fails Christopher S M Hall
@ 2024-11-06 18:47 ` Christopher S M Hall
2024-11-12 15:39 ` [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Corinna Vinschen
2025-01-08 11:10 ` Corinna Vinschen
7 siblings, 0 replies; 13+ messages in thread
From: Christopher S M Hall @ 2024-11-06 18:47 UTC (permalink / raw)
To: intel-wired-lan
Cc: david.zage, vinicius.gomes, netdev, rodrigo.cadore, vinschen
Add a mutex around the PTM transaction to prevent multiple transactors
Multiple processes try to initiate a PTM transaction, one or all may
fail. This can be reproduced by running two instances of the
following:
$ sudo phc2sys -O 0 -i tsn0 -m
PHC2SYS exits with:
"ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
fails
Note: Normally two instance of PHC2SYS will not run, but one process
should not break another.
Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()")
Signed-off-by: Christopher S M Hall <christopher.s.hall@intel.com>
---
drivers/net/ethernet/intel/igc/igc.h | 1 +
drivers/net/ethernet/intel/igc/igc_ptp.c | 20 ++++++++++++++++++--
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index eac0f966e0e4..323db1e2be38 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -319,6 +319,7 @@ struct igc_adapter {
struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
ktime_t ptp_reset_start; /* Reset time in clock mono */
struct system_time_snapshot snapshot;
+ struct mutex ptm_lock; /* Only allow one PTM transaction at a time */
char fw_version[32];
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 343205bffc35..612ed26a29c5 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -974,6 +974,7 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
}
}
+/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */
static void igc_ptm_trigger(struct igc_hw *hw)
{
u32 ctrl;
@@ -990,6 +991,7 @@ static void igc_ptm_trigger(struct igc_hw *hw)
wrfl();
}
+/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */
static void igc_ptm_reset(struct igc_hw *hw)
{
u32 ctrl;
@@ -1068,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
{
struct igc_adapter *adapter = container_of(ptp, struct igc_adapter,
ptp_caps);
+ int ret;
- return get_device_system_crosststamp(igc_phc_get_syncdevicetime,
- adapter, &adapter->snapshot, cts);
+ /* This blocks until any in progress PTM transactions complete */
+ mutex_lock(&adapter->ptm_lock);
+
+ ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime,
+ adapter, &adapter->snapshot, cts);
+ mutex_unlock(&adapter->ptm_lock);
+
+ return ret;
}
static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp,
@@ -1169,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
spin_lock_init(&adapter->ptp_tx_lock);
spin_lock_init(&adapter->free_timer_lock);
spin_lock_init(&adapter->tmreg_lock);
+ mutex_init(&adapter->ptm_lock);
adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
@@ -1181,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
if (IS_ERR(adapter->ptp_clock)) {
adapter->ptp_clock = NULL;
netdev_err(netdev, "ptp_clock_register failed\n");
+ mutex_destroy(&adapter->ptm_lock);
} else if (adapter->ptp_clock) {
netdev_info(netdev, "PHC added\n");
adapter->ptp_flags |= IGC_PTP_ENABLED;
@@ -1210,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter)
struct igc_hw *hw = &adapter->hw;
u32 ctrl;
+ mutex_lock(&adapter->ptm_lock);
ctrl = rd32(IGC_PTM_CTRL);
ctrl &= ~IGC_PTM_CTRL_EN;
wr32(IGC_PTM_CTRL, ctrl);
+ mutex_unlock(&adapter->ptm_lock);
}
/**
@@ -1255,6 +1268,7 @@ void igc_ptp_stop(struct igc_adapter *adapter)
netdev_info(adapter->netdev, "PHC removed\n");
adapter->ptp_flags &= ~IGC_PTP_ENABLED;
}
+ mutex_destroy(&adapter->ptm_lock);
}
/**
@@ -1294,6 +1308,7 @@ void igc_ptp_reset(struct igc_adapter *adapter)
if (!igc_is_crosststamp_supported(adapter))
break;
+ mutex_lock(&adapter->ptm_lock);
wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT);
wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT);
@@ -1317,6 +1332,7 @@ void igc_ptp_reset(struct igc_adapter *adapter)
netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
igc_ptm_reset(hw);
+ mutex_unlock(&adapter->ptm_lock);
break;
default:
/* No work to do. */
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH iwl-net v3 0/6] igc: Fix PTM timeout
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
` (5 preceding siblings ...)
2024-11-06 18:47 ` [PATCH iwl-net v3 6/6] igc: Add lock preventing multiple simultaneous PTM transactions Christopher S M Hall
@ 2024-11-12 15:39 ` Corinna Vinschen
2025-01-08 11:10 ` Corinna Vinschen
7 siblings, 0 replies; 13+ messages in thread
From: Corinna Vinschen @ 2024-11-12 15:39 UTC (permalink / raw)
To: Christopher S M Hall
Cc: intel-wired-lan, david.zage, vinicius.gomes, netdev,
rodrigo.cadore
Hi Chris,
On Nov 6 18:47, Christopher S M Hall wrote:
> There have been sporadic reports of PTM timeouts using i225/i226 devices
>
> These timeouts have been root caused to:
>
> 1) Manipulating the PTM status register while PTM is enabled and triggered
> 2) The hardware retrying too quickly when an inappropriate response is
> received from the upstream device
>
> The issue can be reproduced with the following:
>
> $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
>
> Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
> quickly reproduce the issue.
>
> PHC2SYS exits with:
>
> "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
> fails
>
> Additional problem description tested by:
> Corinna Vinschen <vinschen@redhat.com>
>
> This patch also fixes a hang in igc_probe() when loading the igc
> driver in the kdump kernel on systems supporting PTM.
>
> The igc driver running in the base kernel enables PTM trigger in
> igc_probe(). Therefore the driver is always in PTM trigger mode,
> except in brief periods when manually triggering a PTM cycle.
>
> When a crash occurs, the NIC is reset while PTM trigger is enabled.
> Due to a hardware problem, the NIC is subsequently in a bad busmaster
> state and doesn't handle register reads/writes. When running
> igc_probe() in the kdump kernel, the first register access to a NIC
> register hangs driver probing and ultimately breaks kdump.
>
> With this patch, igc has PTM trigger disabled most of the time,
> and the trigger is only enabled for very brief (10 - 100 us) periods
> when manually triggering a PTM cycle. Chances that a crash occurs
> during a PTM trigger are not zero, but extremly reduced.
The patchset looks good to me, but I don't see that this description
will make it into the commit message of patch 1. Was that intended?
Other than that...
Reviewed-by: Corinna Vinschen <vinschen@redhat.com>
Thanks,
Corinna
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH iwl-net v3 0/6] igc: Fix PTM timeout
2024-11-06 18:47 [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Christopher S M Hall
` (6 preceding siblings ...)
2024-11-12 15:39 ` [PATCH iwl-net v3 0/6] igc: Fix PTM timeout Corinna Vinschen
@ 2025-01-08 11:10 ` Corinna Vinschen
7 siblings, 0 replies; 13+ messages in thread
From: Corinna Vinschen @ 2025-01-08 11:10 UTC (permalink / raw)
To: Christopher S M Hall
Cc: intel-wired-lan, david.zage, vinicius.gomes, netdev,
rodrigo.cadore
Hi Christopher,
is there any new development in terms of this issue?
Thanks,
Corinna
On Nov 6 18:47, Christopher S M Hall wrote:
> There have been sporadic reports of PTM timeouts using i225/i226 devices
>
> These timeouts have been root caused to:
>
> 1) Manipulating the PTM status register while PTM is enabled and triggered
> 2) The hardware retrying too quickly when an inappropriate response is
> received from the upstream device
>
> The issue can be reproduced with the following:
>
> $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m
>
> Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
> quickly reproduce the issue.
>
> PHC2SYS exits with:
>
> "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
> fails
>
> Additional problem description tested by:
> Corinna Vinschen <vinschen@redhat.com>
>
> This patch also fixes a hang in igc_probe() when loading the igc
> driver in the kdump kernel on systems supporting PTM.
>
> The igc driver running in the base kernel enables PTM trigger in
> igc_probe(). Therefore the driver is always in PTM trigger mode,
> except in brief periods when manually triggering a PTM cycle.
>
> When a crash occurs, the NIC is reset while PTM trigger is enabled.
> Due to a hardware problem, the NIC is subsequently in a bad busmaster
> state and doesn't handle register reads/writes. When running
> igc_probe() in the kdump kernel, the first register access to a NIC
> register hangs driver probing and ultimately breaks kdump.
>
> With this patch, igc has PTM trigger disabled most of the time,
> and the trigger is only enabled for very brief (10 - 100 us) periods
> when manually triggering a PTM cycle. Chances that a crash occurs
> during a PTM trigger are not zero, but extremly reduced.
>
>
> Changelog:
>
> v1 -> v2: -Removed patch modifying PTM retry loop count
> -Moved PTM mutex initialization from igc_reset() to igc_ptp_init()
> called once in igc_probe()
> v2 -> v3: -Added mutex_destroy() to clean up PTM lock
> -Added missing checks for PTP enabled flag called from igc_main.c
> -Cleanup PTP module if probe fails
> -Wrap all access to PTM registers with PTM lock/unlock
>
> Christopher S M Hall (6):
> igc: Ensure the PTM cycle is reliably triggered
> igc: Lengthen the hardware retry time to prevent timeouts
> igc: Move ktime snapshot into PTM retry loop
> igc: Handle the IGC_PTP_ENABLED flag correctly
> igc: Cleanup PTP module if probe fails
> igc: Add lock preventing multiple simultaneous PTM transactions
>
> drivers/net/ethernet/intel/igc/igc.h | 1 +
> drivers/net/ethernet/intel/igc/igc_defines.h | 3 +-
> drivers/net/ethernet/intel/igc/igc_main.c | 1 +
> drivers/net/ethernet/intel/igc/igc_ptp.c | 113 ++++++++++++-------
> 4 files changed, 78 insertions(+), 40 deletions(-)
>
> --
> 2.34.1
^ permalink raw reply [flat|nested] 13+ messages in thread