* [Intel-wired-lan] [net v2 1/5] igb: introduce ptp_flags variable and use it to replace IGB_FLAG_PTP
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
@ 2016-05-11 23:18 ` Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 2/5] igb: introduce IGB_PTP_OVERFLOW_CHECK flag Jacob Keller
` (4 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-05-11 23:18 UTC (permalink / raw)
To: intel-wired-lan
Upcoming patches will introduce new PTP specific flags. To avoid
cluttering the normal flags variable, introduce PTP specific "ptp_flags"
variable for this purpose, and move IGB_FLAG_PTP to become
IGB_PTP_ENABLED.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
drivers/net/ethernet/intel/igb/igb.h | 5 ++++-
drivers/net/ethernet/intel/igb/igb_ptp.c | 8 ++++----
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index b9609afa5ca3..1e18a9eb16e0 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -445,6 +445,7 @@ struct igb_adapter {
unsigned long ptp_tx_start;
unsigned long last_rx_ptp_check;
unsigned long last_rx_timestamp;
+ unsigned int ptp_flags;
spinlock_t tmreg_lock;
struct cyclecounter cc;
struct timecounter tc;
@@ -474,12 +475,14 @@ struct igb_adapter {
u16 eee_advert;
};
+/* flags controlling PTP/1588 function */
+#define IGB_PTP_ENABLED BIT(0)
+
#define IGB_FLAG_HAS_MSI BIT(0)
#define IGB_FLAG_DCA_ENABLED BIT(1)
#define IGB_FLAG_QUAD_PORT_A BIT(2)
#define IGB_FLAG_QUEUE_PAIRS BIT(3)
#define IGB_FLAG_DMAC BIT(4)
-#define IGB_FLAG_PTP BIT(5)
#define IGB_FLAG_RSS_FIELD_IPV4_UDP BIT(6)
#define IGB_FLAG_RSS_FIELD_IPV6_UDP BIT(7)
#define IGB_FLAG_WOL_SUPPORTED BIT(8)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index f097c5a8ab93..88e3b40415a1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -684,7 +684,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter)
u32 tsyncrxctl = rd32(E1000_TSYNCRXCTL);
unsigned long rx_event;
- if (hw->mac.type != e1000_82576)
+ if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
return;
/* If we don't have a valid timestamp in the registers, just update the
@@ -1156,7 +1156,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
} else {
dev_info(&adapter->pdev->dev, "added PHC on %s\n",
adapter->netdev->name);
- adapter->flags |= IGB_FLAG_PTP;
+ adapter->ptp_flags |= IGB_PTP_ENABLED;
}
}
@@ -1194,7 +1194,7 @@ void igb_ptp_stop(struct igb_adapter *adapter)
ptp_clock_unregister(adapter->ptp_clock);
dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
adapter->netdev->name);
- adapter->flags &= ~IGB_FLAG_PTP;
+ adapter->ptp_flags &= ~IGB_PTP_ENABLED;
}
}
@@ -1209,7 +1209,7 @@ void igb_ptp_reset(struct igb_adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
unsigned long flags;
- if (!(adapter->flags & IGB_FLAG_PTP))
+ if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
return;
/* reset the tstamp_config */
--
2.8.1.493.g7b22612
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 2/5] igb: introduce IGB_PTP_OVERFLOW_CHECK flag
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 1/5] igb: introduce ptp_flags variable and use it to replace IGB_FLAG_PTP Jacob Keller
@ 2016-05-11 23:18 ` Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 3/5] igb: introduce igb_ptp_resume function Jacob Keller
` (3 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-05-11 23:18 UTC (permalink / raw)
To: intel-wired-lan
Don't continue to use complex MAC type checks for handling various cases
where we have overflow check code. Make this code more obvious by
introducing a flag which is enabled for hardware that needs these
checks.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
drivers/net/ethernet/intel/igb/igb.h | 1 +
drivers/net/ethernet/intel/igb/igb_ptp.c | 21 ++++++++-------------
2 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1e18a9eb16e0..38daaaba0cdb 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -477,6 +477,7 @@ struct igb_adapter {
/* flags controlling PTP/1588 function */
#define IGB_PTP_ENABLED BIT(0)
+#define IGB_PTP_OVERFLOW_CHECK BIT(1)
#define IGB_FLAG_HAS_MSI BIT(0)
#define IGB_FLAG_DCA_ENABLED BIT(1)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 88e3b40415a1..e2c494ad1be0 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1066,6 +1066,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
/* Dial the nominal frequency. */
wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+ adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
break;
case e1000_82580:
case e1000_i354:
@@ -1086,6 +1087,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->cc.shift = 0;
/* Enable the timer functions by clearing bit 31. */
wr32(E1000_TSAUXC, 0x0);
+ adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
break;
case e1000_i210:
case e1000_i211:
@@ -1131,7 +1133,9 @@ void igb_ptp_init(struct igb_adapter *adapter)
} else {
timecounter_init(&adapter->tc, &adapter->cc,
ktime_to_ns(ktime_get_real()));
+ }
+ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) {
INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
igb_ptp_overflow_check);
@@ -1168,20 +1172,11 @@ void igb_ptp_init(struct igb_adapter *adapter)
**/
void igb_ptp_stop(struct igb_adapter *adapter)
{
- switch (adapter->hw.mac.type) {
- case e1000_82576:
- case e1000_82580:
- case e1000_i354:
- case e1000_i350:
- cancel_delayed_work_sync(&adapter->ptp_overflow_work);
- break;
- case e1000_i210:
- case e1000_i211:
- /* No delayed work to cancel. */
- break;
- default:
+ if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
return;
- }
+
+ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+ cancel_delayed_work_sync(&adapter->ptp_overflow_work);
cancel_work_sync(&adapter->ptp_tx_work);
if (adapter->ptp_tx_skb) {
--
2.8.1.493.g7b22612
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 3/5] igb: introduce igb_ptp_resume function
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 1/5] igb: introduce ptp_flags variable and use it to replace IGB_FLAG_PTP Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 2/5] igb: introduce IGB_PTP_OVERFLOW_CHECK flag Jacob Keller
@ 2016-05-11 23:18 ` Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 4/5] igb: implement igb_ptp_suspend Jacob Keller
` (2 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-05-11 23:18 UTC (permalink / raw)
To: intel-wired-lan
Modify igb_ptp_init to take advantage of igb_ptp_resume, and remove
duplicated work that was occurring in both igb_ptp_reset and
igb_ptp_init.
In total, resetting the TSAUXC register, and resetting the system time
both happen in igb_ptp_reset already, which the new igb_ptp_resume
calls. igb_ptp_resume also takes care of starting the delayed work item
for overflow checks, as well.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
drivers/net/ethernet/intel/igb/igb.h | 1 +
drivers/net/ethernet/intel/igb/igb_ptp.c | 57 ++++++++++++++++----------------
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 38daaaba0cdb..fcba5ebba5ab 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -550,6 +550,7 @@ void igb_set_fw_version(struct igb_adapter *);
void igb_ptp_init(struct igb_adapter *adapter);
void igb_ptp_stop(struct igb_adapter *adapter);
void igb_ptp_reset(struct igb_adapter *adapter);
+void igb_ptp_resume(struct igb_adapter *adapter);
void igb_ptp_rx_hang(struct igb_adapter *adapter);
void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e2c494ad1be0..6c7d05d35894 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1042,6 +1042,29 @@ int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
-EFAULT : 0;
}
+/**
+ * igb_ptp_resume - Restart PTP and associated work items
+ * @adapter: Board private structure
+ *
+ * This function restarts the overflow check if required, and calls
+ * igb_ptp_reset to restore proper functionality after the machine resumes.
+ */
+void igb_ptp_resume(struct igb_adapter *adapter)
+{
+ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+ schedule_delayed_work(&adapter->ptp_overflow_work,
+ IGB_SYSTIM_OVERFLOW_PERIOD);
+
+ igb_ptp_reset(adapter);
+}
+
+/**
+ * igb_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
void igb_ptp_init(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
@@ -1064,8 +1087,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->cc.mask = CYCLECOUNTER_MASK(64);
adapter->cc.mult = 1;
adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
- /* Dial the nominal frequency. */
- wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
break;
case e1000_82580:
@@ -1085,8 +1106,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580);
adapter->cc.mult = 1;
adapter->cc.shift = 0;
- /* Enable the timer functions by clearing bit 31. */
- wr32(E1000_TSAUXC, 0x0);
adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
break;
case e1000_i210:
@@ -1112,46 +1131,24 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
adapter->ptp_caps.verify = igb_ptp_verify_pin;
- /* Enable the timer functions by clearing bit 31. */
- wr32(E1000_TSAUXC, 0x0);
break;
default:
adapter->ptp_clock = NULL;
return;
}
- wrfl();
-
spin_lock_init(&adapter->tmreg_lock);
INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
- /* Initialize the clock and overflow work for devices that need it. */
- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
- struct timespec64 ts = ktime_to_timespec64(ktime_get_real());
-
- igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
- } else {
- timecounter_init(&adapter->tc, &adapter->cc,
- ktime_to_ns(ktime_get_real()));
- }
-
- if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) {
+ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
igb_ptp_overflow_check);
- schedule_delayed_work(&adapter->ptp_overflow_work,
- IGB_SYSTIM_OVERFLOW_PERIOD);
- }
-
- /* Initialize the time sync interrupts for devices that support it. */
- if (hw->mac.type >= e1000_82580) {
- wr32(E1000_TSIM, TSYNC_INTERRUPTS);
- wr32(E1000_IMS, E1000_IMS_TS);
- }
-
adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+ igb_ptp_resume(adapter);
+
adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
&adapter->pdev->dev);
if (IS_ERR(adapter->ptp_clock)) {
@@ -1243,4 +1240,6 @@ void igb_ptp_reset(struct igb_adapter *adapter)
}
out:
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ wrfl();
}
--
2.8.1.493.g7b22612
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 4/5] igb: implement igb_ptp_suspend
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
` (2 preceding siblings ...)
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 3/5] igb: introduce igb_ptp_resume function Jacob Keller
@ 2016-05-11 23:18 ` Jacob Keller
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 5/5] igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume cycle Jacob Keller
2016-05-17 1:57 ` [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Brown, Aaron F
5 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-05-11 23:18 UTC (permalink / raw)
To: intel-wired-lan
Make igb_ptp_stop take advantage of this new function to reduce code
duplication.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
drivers/net/ethernet/intel/igb/igb.h | 1 +
drivers/net/ethernet/intel/igb/igb_ptp.c | 22 +++++++++++++++++-----
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index fcba5ebba5ab..8f3e99cef9c2 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -550,6 +550,7 @@ void igb_set_fw_version(struct igb_adapter *);
void igb_ptp_init(struct igb_adapter *adapter);
void igb_ptp_stop(struct igb_adapter *adapter);
void igb_ptp_reset(struct igb_adapter *adapter);
+void igb_ptp_suspend(struct igb_adapter *adapter);
void igb_ptp_resume(struct igb_adapter *adapter);
void igb_ptp_rx_hang(struct igb_adapter *adapter);
void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 6c7d05d35894..a54c512deb45 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1162,12 +1162,13 @@ void igb_ptp_init(struct igb_adapter *adapter)
}
/**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
+ * igb_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
*
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igb_ptp_suspend(struct igb_adapter *adapter)
{
if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
return;
@@ -1181,6 +1182,17 @@ void igb_ptp_stop(struct igb_adapter *adapter)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
}
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+ igb_ptp_suspend(adapter);
if (adapter->ptp_clock) {
ptp_clock_unregister(adapter->ptp_clock);
--
2.8.1.493.g7b22612
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 5/5] igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume cycle
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
` (3 preceding siblings ...)
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 4/5] igb: implement igb_ptp_suspend Jacob Keller
@ 2016-05-11 23:18 ` Jacob Keller
2016-05-17 1:57 ` [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Brown, Aaron F
5 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-05-11 23:18 UTC (permalink / raw)
To: intel-wired-lan
Properly stop the extra workqueue items and ensure that we resume
cleanly. This is better than using igb_ptp_init and igb_ptp_stop since
these functions destroy the PHC device, which will cause other problems
if we do so.
Reported-By: Vidya Sagar <sagar.tv@gmail.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
drivers/net/ethernet/intel/igb/igb_main.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 21727692bef6..16b9eb9b94b5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -7527,6 +7527,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
if (netif_running(netdev))
__igb_close(netdev, true);
+ igb_ptp_suspend(adapter);
+
igb_clear_interrupt_scheme(adapter);
#ifdef CONFIG_PM
@@ -7637,7 +7639,7 @@ static int igb_resume(struct device *dev)
return -ENOMEM;
}
- igb_reset(adapter);
+ igb_ptp_resume(adapter);
/* let the f/w know that the h/w is now under the control of the
* driver.
--
2.8.1.493.g7b22612
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-11 23:18 [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Jacob Keller
` (4 preceding siblings ...)
2016-05-11 23:18 ` [Intel-wired-lan] [net v2 5/5] igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume cycle Jacob Keller
@ 2016-05-17 1:57 ` Brown, Aaron F
2016-05-17 2:29 ` Jeff Kirsher
5 siblings, 1 reply; 13+ messages in thread
From: Brown, Aaron F @ 2016-05-17 1:57 UTC (permalink / raw)
To: intel-wired-lan
> From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.osuosl.org] On
> Behalf Of Jacob Keller
> Sent: Wednesday, May 11, 2016 4:18 PM
> To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> Cc: Vidya Sagar <sagar.tv@gmail.com>
> Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
>
> This patch series (properly) fixes the issue with igb's workqueue item
> for overflow check from causing a surprise remove event. To do this,
> properly suspend the workqueue items in suspend and then resume them
> again during the resume flow.
>
> The patch series has a few extra steps to reduce code duplication and
> implement suspend and resume properly, which makes the overall fix a bit
> more complicated, and thus review is welcome.
>
> A smaller fix would be to implement suspend and resume irrespective of
> the current igb_ptp_stop and igb_ptp_init but this seems more likely to
> introduce bugs especially if either function ever changes in the future.
>
> In addition, the ptp_flags variable is added mostly to simplify the work
> of writing several complex MAC type checks in the ptp code while doing
> this.
>
> Jacob Keller (5):
> igb: introduce ptp_flags variable and use it to replace IGB_FLAG_PTP
> igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> igb: introduce igb_ptp_resume function
> igb: implement igb_ptp_suspend
> igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume cycle
>
> drivers/net/ethernet/intel/igb/igb.h | 8 ++-
> drivers/net/ethernet/intel/igb/igb_main.c | 4 +-
> drivers/net/ethernet/intel/igb/igb_ptp.c | 110 ++++++++++++++++-------------
> -
> 3 files changed, 68 insertions(+), 54 deletions(-)
I have not isolated it to the exact patch yet, but one of the patches in this series is causing my systems to lock up with a call trace. I am currently unable to capture the trace in any form other than a bitmap (which I'll send to Jacob but am not attaching here.) The trace is really several splats a few minutes apart. The exact text / procedure calls of the first one seems to vary, but it seems to be in a wakeup routing with "do_page_fault", "? _raw_spin_lock_irq", "? timecounter_read", "? _raw_spin_lock_irqsave", "igb_ptp_gettime_82576" and "igb_ptp_overflow_check" showing up prominently in at least a few instances. Usually it moves to the next trace before I can get a snapshot. The follow on trace is where it usually stops with a RIP:, bunch of hex, stack info and a Call Trace saying "arch_cpu_idle", "default_idle_call", "cpu_startup_entry" and "start_secondary" called out.
^ permalink raw reply [flat|nested] 13+ messages in thread* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 1:57 ` [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue Brown, Aaron F
@ 2016-05-17 2:29 ` Jeff Kirsher
2016-05-17 20:46 ` Keller, Jacob E
0 siblings, 1 reply; 13+ messages in thread
From: Jeff Kirsher @ 2016-05-17 2:29 UTC (permalink / raw)
To: intel-wired-lan
On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.osuosl.org]
> On
> > Behalf Of Jacob Keller
> > Sent: Wednesday, May 11, 2016 4:18 PM
> > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume
> issue
> >?
> > This patch series (properly) fixes the issue with igb's workqueue item
> > for overflow check from causing a surprise remove event. To do this,
> > properly suspend the workqueue items in suspend and then resume them
> > again during the resume flow.
> >?
> > The patch series has a few extra steps to reduce code duplication and
> > implement suspend and resume properly, which makes the overall fix a
> bit
> > more complicated, and thus review is welcome.
> >?
> > A smaller fix would be to implement suspend and resume irrespective of
> > the current igb_ptp_stop and igb_ptp_init but this seems more likely to
> > introduce bugs especially if either function ever changes in the
> future.
> >?
> > In addition, the ptp_flags variable is added mostly to simplify the
> work
> > of writing several complex MAC type checks in the ptp code while doing
> > this.
> >?
> > Jacob Keller (5):
> >?? igb: introduce ptp_flags variable and use it to replace IGB_FLAG_PTP
> >?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> >?? igb: introduce igb_ptp_resume function
> >?? igb: implement igb_ptp_suspend
> >?? igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume cycle
> >?
> >? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> >? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> >? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110 ++++++++++++++++----
> ---------
> > -
> >? 3 files changed, 68 insertions(+), 54 deletions(-)
>
> I have not isolated it to the exact patch yet, but one of the patches in
> this series is causing my systems to lock up with a call trace.? I am
> currently unable to capture the trace in any form other than a bitmap
> (which I'll send to Jacob but am not attaching here.)? The trace is
> really several splats a few minutes apart.? The exact text / procedure
> calls of the first one seems to vary, but it seems to be in a wakeup
> routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> timecounter_read", "? _raw_spin_lock_irqsave", "igb_ptp_gettime_82576"
> and "igb_ptp_overflow_check" showing up prominently in at least a few
> instances.? Usually it moves to the next trace before I can get a
> snapshot.? The follow on trace is where it usually stops with a RIP:,
> bunch of hex, stack info and a Call Trace saying "arch_cpu_idle",
> "default_idle_call", "cpu_startup_entry" and "start_secondary" called
> out.
Andrew thought it was with patch 3 in the series, at least that is what his
initial git bisect was telling him.
I am going to go ahead and drop the entire series for now, so that we can
work offline to resolve the issue.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.osuosl.org/pipermail/intel-wired-lan/attachments/20160516/17129f83/attachment.asc>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 2:29 ` Jeff Kirsher
@ 2016-05-17 20:46 ` Keller, Jacob E
2016-05-17 21:05 ` Brown, Aaron F
0 siblings, 1 reply; 13+ messages in thread
From: Keller, Jacob E @ 2016-05-17 20:46 UTC (permalink / raw)
To: intel-wired-lan
On Mon, 2016-05-16 at 19:29 -0700, Jeff Kirsher wrote:
> On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> >
> > >
> > > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.osuos
> > > l.org]
> > On
> > >
> > > Behalf Of Jacob Keller
> > > Sent: Wednesday, May 11, 2016 4:18 PM
> > > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > suspend/resume
> > issue
> > >
> > > ?
> > > This patch series (properly) fixes the issue with igb's workqueue
> > > item
> > > for overflow check from causing a surprise remove event. To do
> > > this,
> > > properly suspend the workqueue items in suspend and then resume
> > > them
> > > again during the resume flow.
> > > ?
> > > The patch series has a few extra steps to reduce code duplication
> > > and
> > > implement suspend and resume properly, which makes the overall
> > > fix a
> > bit
> > >
> > > more complicated, and thus review is welcome.
> > > ?
> > > A smaller fix would be to implement suspend and resume
> > > irrespective of
> > > the current igb_ptp_stop and igb_ptp_init but this seems more
> > > likely to
> > > introduce bugs especially if either function ever changes in the
> > future.
> > >
> > > ?
> > > In addition, the ptp_flags variable is added mostly to simplify
> > > the
> > work
> > >
> > > of writing several complex MAC type checks in the ptp code while
> > > doing
> > > this.
> > > ?
> > > Jacob Keller (5):
> > > ?? igb: introduce ptp_flags variable and use it to replace
> > > IGB_FLAG_PTP
> > > ?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> > > ?? igb: introduce igb_ptp_resume function
> > > ?? igb: implement igb_ptp_suspend
> > > ?? igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume
> > > cycle
> > > ?
> > > ? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> > > ? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> > > ? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110
> > > ++++++++++++++++----
> > ---------
> > >
> > > -
> > > ? 3 files changed, 68 insertions(+), 54 deletions(-)
> > I have not isolated it to the exact patch yet, but one of the
> > patches in
> > this series is causing my systems to lock up with a call trace.? I
> > am
> > currently unable to capture the trace in any form other than a
> > bitmap
> > (which I'll send to Jacob but am not attaching here.)? The trace is
> > really several splats a few minutes apart.? The exact text /
> > procedure
> > calls of the first one seems to vary, but it seems to be in a
> > wakeup
> > routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> > timecounter_read", "? _raw_spin_lock_irqsave",
> > "igb_ptp_gettime_82576"
> > and "igb_ptp_overflow_check" showing up prominently in at least a
> > few
> > instances.? Usually it moves to the next trace before I can get a
> > snapshot.? The follow on trace is where it usually stops with a
> > RIP:,
> > bunch of hex, stack info and a Call Trace saying "arch_cpu_idle",
> > "default_idle_call", "cpu_startup_entry" and "start_secondary"
> > called
> > out.
> Andrew thought it was with patch 3 in the series, at least that is
> what his
> initial git bisect was telling him.
>
> I am going to go ahead and drop the entire series for now, so that we
> can
> work offline to resolve the issue.
Yep. I'm investigating the traces. I'll focus my initial efforts at
patch 3.
Thanks,
Jake
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 20:46 ` Keller, Jacob E
@ 2016-05-17 21:05 ` Brown, Aaron F
2016-05-17 21:54 ` Keller, Jacob E
0 siblings, 1 reply; 13+ messages in thread
From: Brown, Aaron F @ 2016-05-17 21:05 UTC (permalink / raw)
To: intel-wired-lan
> From: Keller, Jacob E
> Sent: Tuesday, May 17, 2016 1:47 PM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown, Aaron F
> <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> Cc: sagar.tv at gmail.com
> Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
>
> On Mon, 2016-05-16 at 19:29 -0700, Jeff Kirsher wrote:
> > On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> > >
> > > >
> > > > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.osuos
> > > > l.org]
> > > On
> > > >
> > > > Behalf Of Jacob Keller
> > > > Sent: Wednesday, May 11, 2016 4:18 PM
> > > > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > > > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > > > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > > suspend/resume
> > > issue
> > > >
> > > >
> > > > This patch series (properly) fixes the issue with igb's workqueue
> > > > item
> > > > for overflow check from causing a surprise remove event. To do
> > > > this,
> > > > properly suspend the workqueue items in suspend and then resume
> > > > them
> > > > again during the resume flow.
> > > >
> > > > The patch series has a few extra steps to reduce code duplication
> > > > and
> > > > implement suspend and resume properly, which makes the overall
> > > > fix a
> > > bit
> > > >
> > > > more complicated, and thus review is welcome.
> > > >
> > > > A smaller fix would be to implement suspend and resume
> > > > irrespective of
> > > > the current igb_ptp_stop and igb_ptp_init but this seems more
> > > > likely to
> > > > introduce bugs especially if either function ever changes in the
> > > future.
> > > >
> > > >
> > > > In addition, the ptp_flags variable is added mostly to simplify
> > > > the
> > > work
> > > >
> > > > of writing several complex MAC type checks in the ptp code while
> > > > doing
> > > > this.
> > > >
> > > > Jacob Keller (5):
> > > > ?? igb: introduce ptp_flags variable and use it to replace
> > > > IGB_FLAG_PTP
> > > > ?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> > > > ?? igb: introduce igb_ptp_resume function
> > > > ?? igb: implement igb_ptp_suspend
> > > > ?? igb: call igb_ptp_suspend/igb_ptp_resume during suspend/resume
> > > > cycle
> > > >
> > > > ? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> > > > ? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> > > > ? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110
> > > > ++++++++++++++++----
> > > ---------
> > > >
> > > > -
> > > > ? 3 files changed, 68 insertions(+), 54 deletions(-)
> > > I have not isolated it to the exact patch yet, but one of the
> > > patches in
> > > this series is causing my systems to lock up with a call trace.? I
> > > am
> > > currently unable to capture the trace in any form other than a
> > > bitmap
> > > (which I'll send to Jacob but am not attaching here.)? The trace is
> > > really several splats a few minutes apart.? The exact text /
> > > procedure
> > > calls of the first one seems to vary, but it seems to be in a
> > > wakeup
> > > routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> > > timecounter_read", "? _raw_spin_lock_irqsave",
> > > "igb_ptp_gettime_82576"
> > > and "igb_ptp_overflow_check" showing up prominently in at least a
> > > few
> > > instances.? Usually it moves to the next trace before I can get a
> > > snapshot.? The follow on trace is where it usually stops with a
> > > RIP:,
> > > bunch of hex, stack info and a Call Trace saying "arch_cpu_idle",
> > > "default_idle_call", "cpu_startup_entry" and "start_secondary"
> > > called
> > > out.
> > Andrew thought it was with patch 3 in the series, at least that is
> > what his
> > initial git bisect was telling him.
> >
> > I am going to go ahead and drop the entire series for now, so that we
> > can
> > work offline to resolve the issue.
>
>
> Yep. I'm investigating the traces. I'll focus my initial efforts at
> patch 3.
I can confirm Andrew's bisection and that the issue does not start occurring until patch 3 is included.
>
> Thanks,
> Jake
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 21:05 ` Brown, Aaron F
@ 2016-05-17 21:54 ` Keller, Jacob E
2016-05-17 22:01 ` Brown, Aaron F
0 siblings, 1 reply; 13+ messages in thread
From: Keller, Jacob E @ 2016-05-17 21:54 UTC (permalink / raw)
To: intel-wired-lan
On Tue, 2016-05-17 at 21:05 +0000, Brown, Aaron F wrote:
> >
> > From: Keller, Jacob E
> > Sent: Tuesday, May 17, 2016 1:47 PM
> > To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown, Aaron
> > F
> > <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> > Cc: sagar.tv at gmail.com
> > Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > suspend/resume issue
> >
> > On Mon, 2016-05-16 at 19:29 -0700, Jeff Kirsher wrote:
> > >
> > > On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> > > >
> > > >
> > > > >
> > > > >
> > > > > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.o
> > > > > suos
> > > > > l.org]
> > > > On
> > > > >
> > > > >
> > > > > Behalf Of Jacob Keller
> > > > > Sent: Wednesday, May 11, 2016 4:18 PM
> > > > > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > > > > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > > > > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > > > suspend/resume
> > > > issue
> > > > >
> > > > >
> > > > >
> > > > > This patch series (properly) fixes the issue with igb's
> > > > > workqueue
> > > > > item
> > > > > for overflow check from causing a surprise remove event. To
> > > > > do
> > > > > this,
> > > > > properly suspend the workqueue items in suspend and then
> > > > > resume
> > > > > them
> > > > > again during the resume flow.
> > > > >
> > > > > The patch series has a few extra steps to reduce code
> > > > > duplication
> > > > > and
> > > > > implement suspend and resume properly, which makes the
> > > > > overall
> > > > > fix a
> > > > bit
> > > > >
> > > > >
> > > > > more complicated, and thus review is welcome.
> > > > >
> > > > > A smaller fix would be to implement suspend and resume
> > > > > irrespective of
> > > > > the current igb_ptp_stop and igb_ptp_init but this seems more
> > > > > likely to
> > > > > introduce bugs especially if either function ever changes in
> > > > > the
> > > > future.
> > > > >
> > > > >
> > > > >
> > > > > In addition, the ptp_flags variable is added mostly to
> > > > > simplify
> > > > > the
> > > > work
> > > > >
> > > > >
> > > > > of writing several complex MAC type checks in the ptp code
> > > > > while
> > > > > doing
> > > > > this.
> > > > >
> > > > > Jacob Keller (5):
> > > > > ?? igb: introduce ptp_flags variable and use it to replace
> > > > > IGB_FLAG_PTP
> > > > > ?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> > > > > ?? igb: introduce igb_ptp_resume function
> > > > > ?? igb: implement igb_ptp_suspend
> > > > > ?? igb: call igb_ptp_suspend/igb_ptp_resume during
> > > > > suspend/resume
> > > > > cycle
> > > > >
> > > > > ? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> > > > > ? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> > > > > ? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110
> > > > > ++++++++++++++++----
> > > > ---------
> > > > >
> > > > >
> > > > > -
> > > > > ? 3 files changed, 68 insertions(+), 54 deletions(-)
> > > > I have not isolated it to the exact patch yet, but one of the
> > > > patches in
> > > > this series is causing my systems to lock up with a call
> > > > trace.? I
> > > > am
> > > > currently unable to capture the trace in any form other than a
> > > > bitmap
> > > > (which I'll send to Jacob but am not attaching here.)? The
> > > > trace is
> > > > really several splats a few minutes apart.? The exact text /
> > > > procedure
> > > > calls of the first one seems to vary, but it seems to be in a
> > > > wakeup
> > > > routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> > > > timecounter_read", "? _raw_spin_lock_irqsave",
> > > > "igb_ptp_gettime_82576"
> > > > and "igb_ptp_overflow_check" showing up prominently in at least
> > > > a
> > > > few
> > > > instances.? Usually it moves to the next trace before I can get
> > > > a
> > > > snapshot.? The follow on trace is where it usually stops with a
> > > > RIP:,
> > > > bunch of hex, stack info and a Call Trace saying
> > > > "arch_cpu_idle",
> > > > "default_idle_call", "cpu_startup_entry" and "start_secondary"
> > > > called
> > > > out.
> > > Andrew thought it was with patch 3 in the series, at least that
> > > is
> > > what his
> > > initial git bisect was telling him.
> > >
> > > I am going to go ahead and drop the entire series for now, so
> > > that we
> > > can
> > > work offline to resolve the issue.
> >
> > Yep. I'm investigating the traces. I'll focus my initial efforts at
> > patch 3.
> I can confirm Andrew's bisection and that the issue does not start
> occurring until patch 3 is included.
Does this just occur from loading? or does it occur after some action
on your part?
Thanks,
Jake
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 21:54 ` Keller, Jacob E
@ 2016-05-17 22:01 ` Brown, Aaron F
2016-05-17 22:04 ` Keller, Jacob E
0 siblings, 1 reply; 13+ messages in thread
From: Brown, Aaron F @ 2016-05-17 22:01 UTC (permalink / raw)
To: intel-wired-lan
> From: Keller, Jacob E
> Sent: Tuesday, May 17, 2016 2:54 PM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown, Aaron F
> <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> Cc: sagar.tv at gmail.com
> Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
>
> On Tue, 2016-05-17 at 21:05 +0000, Brown, Aaron F wrote:
> > >
> > > From: Keller, Jacob E
> > > Sent: Tuesday, May 17, 2016 1:47 PM
> > > To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown, Aaron
> > > F
> > > <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> > > Cc: sagar.tv at gmail.com
> > > Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > suspend/resume issue
> > >
> > > On Mon, 2016-05-16 at 19:29 -0700, Jeff Kirsher wrote:
> > > >
> > > > On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lists.o
> > > > > > suos
> > > > > > l.org]
> > > > > On
> > > > > >
> > > > > >
> > > > > > Behalf Of Jacob Keller
> > > > > > Sent: Wednesday, May 11, 2016 4:18 PM
> > > > > > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > > > > > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > > > > > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > > > > suspend/resume
> > > > > issue
> > > > > >
> > > > > >
> > > > > >
> > > > > > This patch series (properly) fixes the issue with igb's
> > > > > > workqueue
> > > > > > item
> > > > > > for overflow check from causing a surprise remove event. To
> > > > > > do
> > > > > > this,
> > > > > > properly suspend the workqueue items in suspend and then
> > > > > > resume
> > > > > > them
> > > > > > again during the resume flow.
> > > > > >
> > > > > > The patch series has a few extra steps to reduce code
> > > > > > duplication
> > > > > > and
> > > > > > implement suspend and resume properly, which makes the
> > > > > > overall
> > > > > > fix a
> > > > > bit
> > > > > >
> > > > > >
> > > > > > more complicated, and thus review is welcome.
> > > > > >
> > > > > > A smaller fix would be to implement suspend and resume
> > > > > > irrespective of
> > > > > > the current igb_ptp_stop and igb_ptp_init but this seems more
> > > > > > likely to
> > > > > > introduce bugs especially if either function ever changes in
> > > > > > the
> > > > > future.
> > > > > >
> > > > > >
> > > > > >
> > > > > > In addition, the ptp_flags variable is added mostly to
> > > > > > simplify
> > > > > > the
> > > > > work
> > > > > >
> > > > > >
> > > > > > of writing several complex MAC type checks in the ptp code
> > > > > > while
> > > > > > doing
> > > > > > this.
> > > > > >
> > > > > > Jacob Keller (5):
> > > > > > ?? igb: introduce ptp_flags variable and use it to replace
> > > > > > IGB_FLAG_PTP
> > > > > > ?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> > > > > > ?? igb: introduce igb_ptp_resume function
> > > > > > ?? igb: implement igb_ptp_suspend
> > > > > > ?? igb: call igb_ptp_suspend/igb_ptp_resume during
> > > > > > suspend/resume
> > > > > > cycle
> > > > > >
> > > > > > ? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> > > > > > ? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> > > > > > ? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110
> > > > > > ++++++++++++++++----
> > > > > ---------
> > > > > >
> > > > > >
> > > > > > -
> > > > > > ? 3 files changed, 68 insertions(+), 54 deletions(-)
> > > > > I have not isolated it to the exact patch yet, but one of the
> > > > > patches in
> > > > > this series is causing my systems to lock up with a call
> > > > > trace.? I
> > > > > am
> > > > > currently unable to capture the trace in any form other than a
> > > > > bitmap
> > > > > (which I'll send to Jacob but am not attaching here.)? The
> > > > > trace is
> > > > > really several splats a few minutes apart.? The exact text /
> > > > > procedure
> > > > > calls of the first one seems to vary, but it seems to be in a
> > > > > wakeup
> > > > > routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> > > > > timecounter_read", "? _raw_spin_lock_irqsave",
> > > > > "igb_ptp_gettime_82576"
> > > > > and "igb_ptp_overflow_check" showing up prominently in at least
> > > > > a
> > > > > few
> > > > > instances.? Usually it moves to the next trace before I can get
> > > > > a
> > > > > snapshot.? The follow on trace is where it usually stops with a
> > > > > RIP:,
> > > > > bunch of hex, stack info and a Call Trace saying
> > > > > "arch_cpu_idle",
> > > > > "default_idle_call", "cpu_startup_entry" and "start_secondary"
> > > > > called
> > > > > out.
> > > > Andrew thought it was with patch 3 in the series, at least that
> > > > is
> > > > what his
> > > > initial git bisect was telling him.
> > > >
> > > > I am going to go ahead and drop the entire series for now, so
> > > > that we
> > > > can
> > > > work offline to resolve the issue.
> > >
> > > Yep. I'm investigating the traces. I'll focus my initial efforts at
> > > patch 3.
> > I can confirm Andrew's bisection and that the issue does not start
> > occurring until patch 3 is included.
>
>
> Does this just occur from loading? or does it occur after some action
> on your part?
I initially ran into it just loading the driver and launching a BAT. But after trying to figure out which section of the BAT was breaking it, I noticed another system I had simply loaded the driver on would lock up after a while as well. I do not know if it shows up sooner or later if the interface is getting traffic across it, but it shows up either way.
>
> Thanks,
> Jake
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Intel-wired-lan] [net v2 0/5] igb: fix ptp suspend/resume issue
2016-05-17 22:01 ` Brown, Aaron F
@ 2016-05-17 22:04 ` Keller, Jacob E
0 siblings, 0 replies; 13+ messages in thread
From: Keller, Jacob E @ 2016-05-17 22:04 UTC (permalink / raw)
To: intel-wired-lan
On Tue, 2016-05-17 at 22:01 +0000, Brown, Aaron F wrote:
> >
> > From: Keller, Jacob E
> > Sent: Tuesday, May 17, 2016 2:54 PM
> > To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown, Aaron
> > F
> > <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> > Cc: sagar.tv at gmail.com
> > Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > suspend/resume issue
> >
> > On Tue, 2016-05-17 at 21:05 +0000, Brown, Aaron F wrote:
> > >
> > > >
> > > >
> > > > From: Keller, Jacob E
> > > >
Sent: Tuesday, May 17, 2016 1:47 PM
> > > > To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Brown,
> > > > Aaron
> > > > F
> > > > <aaron.f.brown@intel.com>; intel-wired-lan at lists.osuosl.org
> > > > Cc: sagar.tv at gmail.com
> > > > Subject: Re: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > > suspend/resume issue
> > > >
> > > > On Mon, 2016-05-16 at 19:29 -0700, Jeff Kirsher wrote:
> > > > >
> > > > >
> > > > > On Tue, 2016-05-17 at 01:57 +0000, Brown, Aaron F wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > From: Intel-wired-lan [mailto:intel-wired-lan-bounces at lis
> > > > > > > ts.o
> > > > > > > suos
> > > > > > > l.org]
> > > > > > On
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > Behalf Of Jacob Keller
> > > > > > > Sent: Wednesday, May 11, 2016 4:18 PM
> > > > > > > To: Intel Wired LAN <intel-wired-lan@lists.osuosl.org>
> > > > > > > Cc: Vidya Sagar <sagar.tv@gmail.com>
> > > > > > > Subject: [Intel-wired-lan] [net v2 0/5] igb: fix ptp
> > > > > > > suspend/resume
> > > > > > issue
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > This patch series (properly) fixes the issue with igb's
> > > > > > > workqueue
> > > > > > > item
> > > > > > > for overflow check from causing a surprise remove event.
> > > > > > > To
> > > > > > > do
> > > > > > > this,
> > > > > > > properly suspend the workqueue items in suspend and then
> > > > > > > resume
> > > > > > > them
> > > > > > > again during the resume flow.
> > > > > > >
> > > > > > > The patch series has a few extra steps to reduce code
> > > > > > > duplication
> > > > > > > and
> > > > > > > implement suspend and resume properly, which makes the
> > > > > > > overall
> > > > > > > fix a
> > > > > > bit
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > more complicated, and thus review is welcome.
> > > > > > >
> > > > > > > A smaller fix would be to implement suspend and resume
> > > > > > > irrespective of
> > > > > > > the current igb_ptp_stop and igb_ptp_init but this seems
> > > > > > > more
> > > > > > > likely to
> > > > > > > introduce bugs especially if either function ever changes
> > > > > > > in
> > > > > > > the
> > > > > > future.
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > In addition, the ptp_flags variable is added mostly to
> > > > > > > simplify
> > > > > > > the
> > > > > > work
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > of writing several complex MAC type checks in the ptp
> > > > > > > code
> > > > > > >
while
> > > > > > > doing
> > > > > > > this.
> > > > > > >
> > > > > > > Jacob Keller (5):
> > > > > > > ?? igb: introduce ptp_flags variable and use it to
> > > > > > > replace
> > > > > > > IGB_FLAG_PTP
> > > > > > > ?? igb: introduce IGB_PTP_OVERFLOW_CHECK flag
> > > > > > > ?? igb: introduce igb_ptp_resume function
> > > > > > > ?? igb: implement igb_ptp_suspend
> > > > > > > ?? igb: call igb_ptp_suspend/igb_ptp_resume during
> > > > > > > suspend/resume
> > > > > > > cycle
> > > > > > >
> > > > > > > ? drivers/net/ethernet/intel/igb/igb.h????? |?? 8 ++-
> > > > > > > ? drivers/net/ethernet/intel/igb/igb_main.c |?? 4 +-
> > > > > > > ? drivers/net/ethernet/intel/igb/igb_ptp.c? | 110
> > > > > > > ++++++++++++++++----
> > > > > > ---------
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > -
> > > > > > > ? 3 files changed, 68 insertions(+), 54 deletions(-)
> > > > > > I have not isolated it to the exact patch yet, but one of
> > > > > > the
> > > > > > patches in
> > > > > > this series is causing my systems to lock up with a call
> > > > > > trace.? I
> > > > > > am
> > > > > > currently unable to capture the trace in any form other
> > > > > > than a
> > > > > > bitmap
> > > > > > (which I'll send to Jacob but am not attaching here.)? The
> > > > > > trace is
> > > > > > really several splats a few minutes apart.? The exact text
> > > > > > /
> > > > > > procedure
> > > > > > calls of the first one seems to vary, but it seems to be in
> > > > > > a
> > > > > > wakeup
> > > > > > routing with "do_page_fault", "? _raw_spin_lock_irq", "?
> > > > > > timecounter_read", "? _raw_spin_lock_irqsave",
> > > > > > "igb_ptp_gettime_82576"
> > > > > > and "igb_ptp_overflow_check" showing up prominently in at
> > > > > > least
> > > > > > a
> > > > > > few
> > > > > > instances.? Usually it moves to the next trace before I can
> > > > > > get
> > > > > > a
> > > > > > snapshot.? The follow on trace is where it usually stops
> > > > > > with a
> > > > > > RIP:,
> > > > > > bunch of hex, stack info and a Call Trace saying
> > > > > > "arch_cpu_idle",
> > > > > > "default_idle_call", "cpu_startup_entry" and
> > > > > > "start_secondary"
> > > > > > called
> > > > > > out.
> > > > > Andrew thought it was with patch 3 in the series, at least
> > > > > that
> > > > > is
> > > > > what his
> > > > > initial git bisect was telling him.
> > > > >
> > > > > I am going to go ahead and drop the entire series for now, so
> > > > > that we
> > > > > can
> > > > > work offline to resolve the issue.
> > > > Yep. I'm investigating the traces. I'll focus my initial
> > > > efforts at
> > > > patch 3.
> > > I can confirm Andrew's bisection and that the issue does not
> > > start
> > > occurring until patch 3 is included.
> >
> > Does this just occur from loading? or does it occur after some
> > action
> > on your part?
> I initially ran into it just loading the driver and launching a
> BAT.??But after trying to figure out which section of the BAT was
> breaking it, I noticed another system I had simply loaded the driver
> on would lock up after a while as well.??I do not know if it shows up
> sooner or later if the interface is getting traffic across it, but it
> shows up either way.
Ok, so you're not doing suspend resume, and it's just loading the
driver and waiting.
Thanks,
Jake
^ permalink raw reply [flat|nested] 13+ messages in thread