* [PATCH 1/3] Revert "net: fec: fix missing napi_disable call"
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
@ 2013-04-19 14:36 ` Lucas Stach
2013-04-19 14:36 ` [PATCH 2/3] Revert "net: fec: put tx to napi poll function to fix dead lock" Lucas Stach
` (4 subsequent siblings)
5 siblings, 0 replies; 18+ messages in thread
From: Lucas Stach @ 2013-04-19 14:36 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Frank Li, Shawn Guo, Fabio Estevam, Lucas Stach
This reverts commit 3f104c38259dcb3e5443c246f0805bc04d887cc3.
---
drivers/net/ethernet/freescale/fec.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index a23f298..5721ca1 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -1513,7 +1513,6 @@ fec_enet_close(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
/* Don't know what to do yet. */
- napi_disable(&fep->napi);
fep->opened = 0;
netif_stop_queue(ndev);
fec_stop(ndev);
--
1.8.2.rc2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 2/3] Revert "net: fec: put tx to napi poll function to fix dead lock"
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
2013-04-19 14:36 ` [PATCH 1/3] Revert "net: fec: fix missing napi_disable call" Lucas Stach
@ 2013-04-19 14:36 ` Lucas Stach
2013-04-19 14:36 ` [PATCH 3/3] Revert "net: fec: add napi support to improve proformance" Lucas Stach
` (3 subsequent siblings)
5 siblings, 0 replies; 18+ messages in thread
From: Lucas Stach @ 2013-04-19 14:36 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Frank Li, Shawn Guo, Fabio Estevam, Lucas Stach
This reverts commit de5fb0a053482d89262c3284b67884cd2c621adc.
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
drivers/net/ethernet/freescale/fec.c | 86 ++++++++++++++++++------------------
drivers/net/ethernet/freescale/fec.h | 3 ++
2 files changed, 47 insertions(+), 42 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 5721ca1..d2f7983 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -247,13 +247,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
struct bufdesc *bdp;
void *bufaddr;
unsigned short status;
- unsigned int index;
+ unsigned long flags;
if (!fep->link) {
/* Link is down or autonegotiation is in progress. */
return NETDEV_TX_BUSY;
}
+ spin_lock_irqsave(&fep->hw_lock, flags);
/* Fill in a Tx ring entry */
bdp = fep->cur_tx;
@@ -264,6 +265,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
* This should not happen, since ndev->tbusy should be set.
*/
printk("%s: tx queue full!.\n", ndev->name);
+ spin_unlock_irqrestore(&fep->hw_lock, flags);
return NETDEV_TX_BUSY;
}
@@ -279,13 +281,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
* 4-byte boundaries. Use bounce buffers to copy data
* and get it aligned. Ugh.
*/
- if (fep->bufdesc_ex)
- index = (struct bufdesc_ex *)bdp -
- (struct bufdesc_ex *)fep->tx_bd_base;
- else
- index = bdp - fep->tx_bd_base;
-
if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
+ unsigned int index;
+ if (fep->bufdesc_ex)
+ index = (struct bufdesc_ex *)bdp -
+ (struct bufdesc_ex *)fep->tx_bd_base;
+ else
+ index = bdp - fep->tx_bd_base;
memcpy(fep->tx_bounce[index], skb->data, skb->len);
bufaddr = fep->tx_bounce[index];
}
@@ -299,7 +301,10 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
swap_buffer(bufaddr, skb->len);
/* Save skb pointer */
- fep->tx_skbuff[index] = skb;
+ fep->tx_skbuff[fep->skb_cur] = skb;
+
+ ndev->stats.tx_bytes += skb->len;
+ fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK;
/* Push the data cache so the CPM does not get stale memory
* data.
@@ -327,22 +332,26 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
ebdp->cbd_esc = BD_ENET_TX_INT;
}
}
+ /* Trigger transmission start */
+ writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
/* If this was the last BD in the ring, start at the beginning again. */
if (status & BD_ENET_TX_WRAP)
bdp = fep->tx_bd_base;
else
bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex);
- fep->cur_tx = bdp;
-
- if (fep->cur_tx == fep->dirty_tx)
+ if (bdp == fep->dirty_tx) {
+ fep->tx_full = 1;
netif_stop_queue(ndev);
+ }
- /* Trigger transmission start */
- writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+ fep->cur_tx = bdp;
skb_tx_timestamp(skb);
+ spin_unlock_irqrestore(&fep->hw_lock, flags);
+
return NETDEV_TX_OK;
}
@@ -374,7 +383,7 @@ static void fec_enet_bd_init(struct net_device *dev)
/* ...and the same for transmit */
bdp = fep->tx_bd_base;
- fep->cur_tx = bdp;
+ fep->dirty_tx = fep->cur_tx = bdp;
for (i = 0; i < TX_RING_SIZE; i++) {
/* Initialize the BD for every fragment in the page. */
@@ -390,7 +399,6 @@ static void fec_enet_bd_init(struct net_device *dev)
/* Set the last buffer to wrap */
bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex);
bdp->cbd_sc |= BD_SC_WRAP;
- fep->dirty_tx = bdp;
}
/* This function is called to start or restart the FEC during a link
@@ -447,7 +455,8 @@ fec_restart(struct net_device *ndev, int duplex)
writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc)
* RX_RING_SIZE, fep->hwp + FEC_X_DES_START);
-
+ /* Reset SKB transmit buffers. */
+ fep->skb_cur = fep->skb_dirty = 0;
for (i = 0; i <= TX_RING_MOD_MASK; i++) {
if (fep->tx_skbuff[i]) {
dev_kfree_skb_any(fep->tx_skbuff[i]);
@@ -610,35 +619,20 @@ fec_enet_tx(struct net_device *ndev)
struct bufdesc *bdp;
unsigned short status;
struct sk_buff *skb;
- int index = 0;
fep = netdev_priv(ndev);
+ spin_lock(&fep->hw_lock);
bdp = fep->dirty_tx;
- /* get next bdp of dirty_tx */
- if (bdp->cbd_sc & BD_ENET_TX_WRAP)
- bdp = fep->tx_bd_base;
- else
- bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex);
-
while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) {
-
- /* current queue is empty */
- if (bdp == fep->cur_tx)
+ if (bdp == fep->cur_tx && fep->tx_full == 0)
break;
- if (fep->bufdesc_ex)
- index = (struct bufdesc_ex *)bdp -
- (struct bufdesc_ex *)fep->tx_bd_base;
- else
- index = bdp - fep->tx_bd_base;
-
dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE);
bdp->cbd_bufaddr = 0;
- skb = fep->tx_skbuff[index];
-
+ skb = fep->tx_skbuff[fep->skb_dirty];
/* Check for errors. */
if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
BD_ENET_TX_RL | BD_ENET_TX_UN |
@@ -683,9 +677,8 @@ fec_enet_tx(struct net_device *ndev)
/* Free the sk buffer associated with this last transmit */
dev_kfree_skb_any(skb);
- fep->tx_skbuff[index] = NULL;
-
- fep->dirty_tx = bdp;
+ fep->tx_skbuff[fep->skb_dirty] = NULL;
+ fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK;
/* Update pointer to next buffer descriptor to be transmitted */
if (status & BD_ENET_TX_WRAP)
@@ -695,12 +688,14 @@ fec_enet_tx(struct net_device *ndev)
/* Since we have freed up a buffer, the ring is no longer full
*/
- if (fep->dirty_tx != fep->cur_tx) {
+ if (fep->tx_full) {
+ fep->tx_full = 0;
if (netif_queue_stopped(ndev))
netif_wake_queue(ndev);
}
}
- return;
+ fep->dirty_tx = bdp;
+ spin_unlock(&fep->hw_lock);
}
@@ -867,7 +862,7 @@ fec_enet_interrupt(int irq, void *dev_id)
int_events = readl(fep->hwp + FEC_IEVENT);
writel(int_events, fep->hwp + FEC_IEVENT);
- if (int_events & (FEC_ENET_RXF | FEC_ENET_TXF)) {
+ if (int_events & FEC_ENET_RXF) {
ret = IRQ_HANDLED;
/* Disable the RX interrupt */
@@ -878,6 +873,15 @@ fec_enet_interrupt(int irq, void *dev_id)
}
}
+ /* Transmit OK, or non-fatal error. Update the buffer
+ * descriptors. FEC handles all errors, we just discover
+ * them as part of the transmit process.
+ */
+ if (int_events & FEC_ENET_TXF) {
+ ret = IRQ_HANDLED;
+ fec_enet_tx(ndev);
+ }
+
if (int_events & FEC_ENET_MII) {
ret = IRQ_HANDLED;
complete(&fep->mdio_done);
@@ -893,8 +897,6 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
int pkts = fec_enet_rx(ndev, budget);
struct fec_enet_private *fep = netdev_priv(ndev);
- fec_enet_tx(ndev);
-
if (pkts < budget) {
napi_complete(napi);
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index fdc0037..ca20331 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -213,6 +213,8 @@ struct fec_enet_private {
unsigned char *tx_bounce[TX_RING_SIZE];
struct sk_buff *tx_skbuff[TX_RING_SIZE];
struct sk_buff *rx_skbuff[RX_RING_SIZE];
+ ushort skb_cur;
+ ushort skb_dirty;
/* CPM dual port RAM relative addresses */
dma_addr_t bd_dma;
@@ -224,6 +226,7 @@ struct fec_enet_private {
/* The ring entries to be free()ed */
struct bufdesc *dirty_tx;
+ uint tx_full;
/* hold while accessing the HW like ringbuffer for tx/rx but not MAC */
spinlock_t hw_lock;
--
1.8.2.rc2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 3/3] Revert "net: fec: add napi support to improve proformance"
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
2013-04-19 14:36 ` [PATCH 1/3] Revert "net: fec: fix missing napi_disable call" Lucas Stach
2013-04-19 14:36 ` [PATCH 2/3] Revert "net: fec: put tx to napi poll function to fix dead lock" Lucas Stach
@ 2013-04-19 14:36 ` Lucas Stach
2013-04-19 17:38 ` [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction David Miller
` (2 subsequent siblings)
5 siblings, 0 replies; 18+ messages in thread
From: Lucas Stach @ 2013-04-19 14:36 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Frank Li, Shawn Guo, Fabio Estevam, Lucas Stach
This reverts commit dc975382d2ef36be7e78fac3717927de1a5abcd8.
Conflicts:
drivers/net/ethernet/freescale/fec.c
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
drivers/net/ethernet/freescale/fec.c | 41 ++++++------------------------------
drivers/net/ethernet/freescale/fec.h | 2 --
2 files changed, 7 insertions(+), 36 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index d2f7983..b01c612 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -68,7 +68,6 @@
#endif
#define DRIVER_NAME "fec"
-#define FEC_NAPI_WEIGHT 64
/* Pause frame feild and FIFO threshold */
#define FEC_ENET_FCE (1 << 5)
@@ -170,7 +169,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_ENET_EBERR ((uint)0x00400000) /* SDMA bus error */
#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
-#define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
/* The FEC stores dest/src/type, data, and checksum for receive packets.
*/
@@ -704,8 +702,8 @@ fec_enet_tx(struct net_device *ndev)
* not been given to the system, we just set the empty indicator,
* effectively tossing the packet.
*/
-static int
-fec_enet_rx(struct net_device *ndev, int budget)
+static void
+fec_enet_rx(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
const struct platform_device_id *id_entry =
@@ -715,12 +713,13 @@ fec_enet_rx(struct net_device *ndev, int budget)
struct sk_buff *skb;
ushort pkt_len;
__u8 *data;
- int pkt_received = 0;
#ifdef CONFIG_M532x
flush_cache_all();
#endif
+ spin_lock(&fep->hw_lock);
+
/* First, grab all of the stats for the incoming packet.
* These get messed up if we get called due to a busy condition.
*/
@@ -728,10 +727,6 @@ fec_enet_rx(struct net_device *ndev, int budget)
while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
- if (pkt_received >= budget)
- break;
- pkt_received++;
-
/* Since we have allocated space to hold a complete frame,
* the last indicator should be set.
*/
@@ -813,7 +808,7 @@ fec_enet_rx(struct net_device *ndev, int budget)
}
if (!skb_defer_rx_timestamp(skb))
- napi_gro_receive(&fep->napi, skb);
+ netif_rx(skb);
}
bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
@@ -847,7 +842,7 @@ rx_processing_done:
}
fep->cur_rx = bdp;
- return pkt_received;
+ spin_unlock(&fep->hw_lock);
}
static irqreturn_t
@@ -864,13 +859,7 @@ fec_enet_interrupt(int irq, void *dev_id)
if (int_events & FEC_ENET_RXF) {
ret = IRQ_HANDLED;
-
- /* Disable the RX interrupt */
- if (napi_schedule_prep(&fep->napi)) {
- writel(FEC_RX_DISABLED_IMASK,
- fep->hwp + FEC_IMASK);
- __napi_schedule(&fep->napi);
- }
+ fec_enet_rx(ndev);
}
/* Transmit OK, or non-fatal error. Update the buffer
@@ -891,18 +880,7 @@ fec_enet_interrupt(int irq, void *dev_id)
return ret;
}
-static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
-{
- struct net_device *ndev = napi->dev;
- int pkts = fec_enet_rx(ndev, budget);
- struct fec_enet_private *fep = netdev_priv(ndev);
- if (pkts < budget) {
- napi_complete(napi);
- writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
- }
- return pkts;
-}
/* ------------------------------------------------------------------------- */
static void fec_get_mac(struct net_device *ndev)
@@ -1487,8 +1465,6 @@ fec_enet_open(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
int ret;
- napi_enable(&fep->napi);
-
/* I should reset the ring buffers here, but I don't yet know
* a simple way to do that.
*/
@@ -1700,9 +1676,6 @@ static int fec_enet_init(struct net_device *ndev)
ndev->netdev_ops = &fec_netdev_ops;
ndev->ethtool_ops = &fec_enet_ethtool_ops;
- writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
- netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT);
-
fec_restart(ndev, 0);
return 0;
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index ca20331..4195842 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -250,8 +250,6 @@ struct fec_enet_private {
int bufdesc_ex;
int pause_flag;
- struct napi_struct napi;
-
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_caps;
unsigned long last_overflow_check;
--
1.8.2.rc2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
` (2 preceding siblings ...)
2013-04-19 14:36 ` [PATCH 3/3] Revert "net: fec: add napi support to improve proformance" Lucas Stach
@ 2013-04-19 17:38 ` David Miller
2013-04-25 12:44 ` Lucas Stach
2013-04-19 18:22 ` Fabio Estevam
2013-04-19 20:05 ` Fabio Estevam
5 siblings, 1 reply; 18+ messages in thread
From: David Miller @ 2013-04-19 17:38 UTC (permalink / raw)
To: l.stach; +Cc: netdev, Frank.Li, shawn.guo, festevam
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 19 Apr 2013 16:36:01 +0200
> As it's way too late in the cycle to try and fix this up just revert the
> relevant patches for now.
I disagree, fix this properly, the revert is more intrusive.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 17:38 ` [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction David Miller
@ 2013-04-25 12:44 ` Lucas Stach
2013-04-25 19:14 ` David Miller
0 siblings, 1 reply; 18+ messages in thread
From: Lucas Stach @ 2013-04-25 12:44 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Frank.Li, shawn.guo, festevam
Hi David,
Am Freitag, den 19.04.2013, 13:38 -0400 schrieb David Miller:
> From: Lucas Stach <l.stach@pengutronix.de>
> Date: Fri, 19 Apr 2013 16:36:01 +0200
>
> > As it's way too late in the cycle to try and fix this up just revert the
> > relevant patches for now.
>
> I disagree, fix this properly, the revert is more intrusive.
In the last days I've tried to fix the failures I'm seeing with adding
back the locks and disabling NAPI while restarting the controller. Both
approaches are neither minimally intrusive, nor do they fix all the
failures. I suspect there is a subtler error in some of the changed ring
buffer handling code also, but I wasn't able to track this down further
to a specific single-line change.
The only way to reliably fix all the failures I'm seeing is really
reverting the changes, as done with this series. So David please
consider taking the reverts, even while they are a bit intrusive, they
are the only way to get a 3.9 release out of the door with a working FEC
driver.
Regards,
Lucas
--
Pengutronix e.K. | Lucas Stach |
Industrial Linux Solutions | http://www.pengutronix.de/ |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-25 12:44 ` Lucas Stach
@ 2013-04-25 19:14 ` David Miller
0 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2013-04-25 19:14 UTC (permalink / raw)
To: l.stach; +Cc: netdev, Frank.Li, shawn.guo, festevam
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 25 Apr 2013 14:44:14 +0200
> The only way to reliably fix all the failures I'm seeing is really
> reverting the changes, as done with this series. So David please
> consider taking the reverts, even while they are a bit intrusive, they
> are the only way to get a 3.9 release out of the door with a working FEC
> driver.
You'll need to formally resubmit it.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
` (3 preceding siblings ...)
2013-04-19 17:38 ` [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction David Miller
@ 2013-04-19 18:22 ` Fabio Estevam
2013-04-19 18:26 ` David Miller
2013-04-19 20:05 ` Fabio Estevam
5 siblings, 1 reply; 18+ messages in thread
From: Fabio Estevam @ 2013-04-19 18:22 UTC (permalink / raw)
To: Lucas Stach; +Cc: netdev, David Miller, Frank Li, Shawn Guo
Hi Lucas,
On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> Those patches introduce instability to the point of kernel OOPSes with
> NULL-ptr dereferences.
>
> The patches drop locks from the code without justifying why this would
> be safe at all. In fact it isn't safe as now the controller restart can
> happily free the RX and TX ring buffers while the NAPI poll function is
> still accessing them. So with a heavily loaded but slightly instable
> link we regularly end up with OOPSes because link change restarts
> the FEC and bombs away buffers still in use.
Can you please let us know how to reproduce the oopses? Which SoC did
you use in your tests?
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 18:22 ` Fabio Estevam
@ 2013-04-19 18:26 ` David Miller
0 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2013-04-19 18:26 UTC (permalink / raw)
To: festevam; +Cc: l.stach, netdev, Frank.Li, shawn.guo
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 19 Apr 2013 15:22:38 -0300
> Hi Lucas,
>
> On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
>> Those patches introduce instability to the point of kernel OOPSes with
>> NULL-ptr dereferences.
>>
>> The patches drop locks from the code without justifying why this would
>> be safe at all. In fact it isn't safe as now the controller restart can
>> happily free the RX and TX ring buffers while the NAPI poll function is
>> still accessing them. So with a heavily loaded but slightly instable
>> link we regularly end up with OOPSes because link change restarts
>> the FEC and bombs away buffers still in use.
>
> Can you please let us know how to reproduce the oopses? Which SoC did
> you use in your tests?
You don't need to know this, he told you exactly and in detail what
the race is, so you should work on fixing that based solely upon his
explanation.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 14:36 [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction Lucas Stach
` (4 preceding siblings ...)
2013-04-19 18:22 ` Fabio Estevam
@ 2013-04-19 20:05 ` Fabio Estevam
2013-04-20 12:35 ` Frank Li
2013-04-25 12:37 ` Lucas Stach
5 siblings, 2 replies; 18+ messages in thread
From: Fabio Estevam @ 2013-04-19 20:05 UTC (permalink / raw)
To: Lucas Stach; +Cc: netdev, David Miller, Frank Li, Shawn Guo
Lucas,
On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> Those patches introduce instability to the point of kernel OOPSes with
> NULL-ptr dereferences.
>
> The patches drop locks from the code without justifying why this would
> be safe at all. In fact it isn't safe as now the controller restart can
> happily free the RX and TX ring buffers while the NAPI poll function is
> still accessing them. So with a heavily loaded but slightly instable
> link we regularly end up with OOPSes because link change restarts
> the FEC and bombs away buffers still in use.
>
> Also the NAPI enabled interrupt handler ACKs the INT and only later
> masks it, this way introducing a window where new interrupts could sneak
> in while we are already in polling mode.
>
> As it's way too late in the cycle to try and fix this up just revert the
> relevant patches for now.
What about restoring the spinlocks and masking the int first?
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -247,12 +247,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
net_device *ndev)
void *bufaddr;
unsigned short status;
unsigned int index;
+ unsigned long flags;
if (!fep->link) {
/* Link is down or autonegotiation is in progress. */
return NETDEV_TX_BUSY;
}
+ spin_lock_irqsave(&fep->hw_lock, flags);
/* Fill in a Tx ring entry */
bdp = fep->cur_tx;
@@ -263,6 +265,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
net_device *ndev)
* This should not happen, since ndev->tbusy should be set.
*/
printk("%s: tx queue full!.\n", ndev->name);
+ spin_unlock_irqrestore(&fep->hw_lock, flags);
return NETDEV_TX_BUSY;
}
@@ -342,6 +345,8 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
net_device *ndev)
skb_tx_timestamp(skb);
+ spin_unlock_irqrestore(&fep->hw_lock, flags);
+
return NETDEV_TX_OK;
}
@@ -612,6 +617,7 @@ fec_enet_tx(struct net_device *ndev)
int index = 0;
fep = netdev_priv(ndev);
+ spin_lock(&fep->hw_lock);
bdp = fep->dirty_tx;
/* get next bdp of dirty_tx */
@@ -699,6 +705,7 @@ fec_enet_tx(struct net_device *ndev)
netif_wake_queue(ndev);
}
}
+ spin_unlock(&fep->hw_lock);
return;
}
@@ -892,12 +899,12 @@ static int fec_enet_rx_napi(struct napi_struct
*napi, int budget)
int pkts = fec_enet_rx(ndev, budget);
struct fec_enet_private *fep = netdev_priv(ndev);
- fec_enet_tx(ndev);
-
if (pkts < budget) {
napi_complete(napi);
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
}
+
+ fec_enet_tx(ndev);
return pkts;
}
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 20:05 ` Fabio Estevam
@ 2013-04-20 12:35 ` Frank Li
2013-04-22 8:56 ` Lucas Stach
2013-04-25 12:37 ` Lucas Stach
1 sibling, 1 reply; 18+ messages in thread
From: Frank Li @ 2013-04-20 12:35 UTC (permalink / raw)
To: Fabio Estevam
Cc: Lucas Stach, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
2013/4/20 Fabio Estevam <festevam@gmail.com>
>
> Lucas,
>
> On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> > Those patches introduce instability to the point of kernel OOPSes with
> > NULL-ptr dereferences.
> >
> > The patches drop locks from the code without justifying why this would
> > be safe at all. In fact it isn't safe as now the controller restart can
> > happily free the RX and TX ring buffers while the NAPI poll function is
> > still accessing them. So with a heavily loaded but slightly instable
I think a possible solution is disable NAPI in restart function.
So only one thread can reset BD queue.
BD queue is nolock design.
Can you provide test case?
> > link we regularly end up with OOPSes because link change restarts
> > the FEC and bombs away buffers still in use.
> >
> > Also the NAPI enabled interrupt handler ACKs the INT and only later
> > masks it, this way introducing a window where new interrupts could sneak
> > in while we are already in polling mode.
> >
> > As it's way too late in the cycle to try and fix this up just revert the
> > relevant patches for now.
>
> What about restoring the spinlocks and masking the int first?
>
> --- a/drivers/net/ethernet/freescale/fec.c
> +++ b/drivers/net/ethernet/freescale/fec.c
> @@ -247,12 +247,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> void *bufaddr;
> unsigned short status;
> unsigned int index;
> + unsigned long flags;
>
> if (!fep->link) {
> /* Link is down or autonegotiation is in progress. */
> return NETDEV_TX_BUSY;
> }
>
> + spin_lock_irqsave(&fep->hw_lock, flags);
> /* Fill in a Tx ring entry */
> bdp = fep->cur_tx;
>
> @@ -263,6 +265,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> * This should not happen, since ndev->tbusy should be set.
> */
> printk("%s: tx queue full!.\n", ndev->name);
> + spin_unlock_irqrestore(&fep->hw_lock, flags);
> return NETDEV_TX_BUSY;
> }
>
> @@ -342,6 +345,8 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>
> skb_tx_timestamp(skb);
>
> + spin_unlock_irqrestore(&fep->hw_lock, flags);
> +
> return NETDEV_TX_OK;
> }
>
> @@ -612,6 +617,7 @@ fec_enet_tx(struct net_device *ndev)
> int index = 0;
>
> fep = netdev_priv(ndev);
> + spin_lock(&fep->hw_lock);
> bdp = fep->dirty_tx;
>
> /* get next bdp of dirty_tx */
> @@ -699,6 +705,7 @@ fec_enet_tx(struct net_device *ndev)
> netif_wake_queue(ndev);
> }
> }
> + spin_unlock(&fep->hw_lock);
> return;
> }
>
> @@ -892,12 +899,12 @@ static int fec_enet_rx_napi(struct napi_struct
> *napi, int budget)
> int pkts = fec_enet_rx(ndev, budget);
> struct fec_enet_private *fep = netdev_priv(ndev);
>
> - fec_enet_tx(ndev);
> -
> if (pkts < budget) {
> napi_complete(napi);
> writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
> }
> +
> + fec_enet_tx(ndev);
> return pkts;
> }
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-20 12:35 ` Frank Li
@ 2013-04-22 8:56 ` Lucas Stach
2013-04-22 9:17 ` Frank Li
0 siblings, 1 reply; 18+ messages in thread
From: Lucas Stach @ 2013-04-22 8:56 UTC (permalink / raw)
To: Frank Li
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
Hi all,
Am Samstag, den 20.04.2013, 20:35 +0800 schrieb Frank Li:
> 2013/4/20 Fabio Estevam <festevam@gmail.com>
> >
> > Lucas,
> >
> > On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> > > Those patches introduce instability to the point of kernel OOPSes with
> > > NULL-ptr dereferences.
> > >
> > > The patches drop locks from the code without justifying why this would
> > > be safe at all. In fact it isn't safe as now the controller restart can
> > > happily free the RX and TX ring buffers while the NAPI poll function is
> > > still accessing them. So with a heavily loaded but slightly instable
>
> I think a possible solution is disable NAPI in restart function.
> So only one thread can reset BD queue.
>
> BD queue is nolock design.
>
It doesn't matter at all that the hardware BD queue is designed to be
operated lockless, you still have to synchronize the driver functions to
each other and explicit locks are a far better way to achieve this than
some implicit tunneling through a single thread or other such things.
Let us please try and concentrate on making things safe and easy to
understand and not introduce possibilities for breakage in the future,
when the next change goes into the driver.
> Can you provide test case?
>
The test case is already described in my original mail: heavily loaded
link, so NAPI has to do some spinning in the receive function while
having the link flapping.
> > > link we regularly end up with OOPSes because link change restarts
> > > the FEC and bombs away buffers still in use.
> > >
> > > Also the NAPI enabled interrupt handler ACKs the INT and only later
> > > masks it, this way introducing a window where new interrupts could sneak
> > > in while we are already in polling mode.
> > >
> > > As it's way too late in the cycle to try and fix this up just revert the
> > > relevant patches for now.
> >
> > What about restoring the spinlocks and masking the int first?
> >
While reintroducing the spinlocks might fix the problem (I'll retest
that today) we are now holding a big lock for extended periods of time,
so while we are spinning in the receive poll function we are not able to
enqueue new TX buffers. This is also a problem with the original
patches, as they are mashing together the TX and RX interrupts.
Dave, even if the reverts are intrusive I'm still not convinced that we
should try and fix this up in the short period of time we have left
until the 3.9 final release.
To fix all this properly we would have to fix at least the following
things:
1. Split up the spinlock into two independent locks for RX and TX.
Interrupt handlers should only take their respective lock, things like
the FEC restart, who want to mess with both queues have to take both
locks.
2. Move locking to the right places, there is zero reason why the
adjust_link PHY callback has to take the locks, but rather FEC restart
should take them.
3. Introduce separate NAPI contexts for RX and TX, to get around one of
them blocking the other.
I doubt this will be less intrusive than reverting the offending patches
for now and taking a new stab at NAPI support in the next cycle.
Also I suspect the patch "net: fec: put tx to napi poll function to fix
dead lock" to introduce a more subtle problem in the ring buffer
accounting (why does this patch even change the way ring buffers are
tracked?) which triggers on rarer occasions, but I have to test if this
is still there with the lock added back.
Regards,
Lucas
--
Pengutronix e.K. | Lucas Stach |
Industrial Linux Solutions | http://www.pengutronix.de/ |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-22 8:56 ` Lucas Stach
@ 2013-04-22 9:17 ` Frank Li
2013-04-25 12:31 ` Lucas Stach
0 siblings, 1 reply; 18+ messages in thread
From: Frank Li @ 2013-04-22 9:17 UTC (permalink / raw)
To: Lucas Stach
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
2013/4/22 Lucas Stach <l.stach@pengutronix.de>:
> Hi all,
>
> Am Samstag, den 20.04.2013, 20:35 +0800 schrieb Frank Li:
>> 2013/4/20 Fabio Estevam <festevam@gmail.com>
>> >
>> > Lucas,
>> >
>> > On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
>> > > Those patches introduce instability to the point of kernel OOPSes with
>> > > NULL-ptr dereferences.
>> > >
>> > > The patches drop locks from the code without justifying why this would
>> > > be safe at all. In fact it isn't safe as now the controller restart can
>> > > happily free the RX and TX ring buffers while the NAPI poll function is
>> > > still accessing them. So with a heavily loaded but slightly instable
>>
>> I think a possible solution is disable NAPI in restart function.
>> So only one thread can reset BD queue.
>>
>> BD queue is nolock design.
>>
> It doesn't matter at all that the hardware BD queue is designed to be
> operated lockless, you still have to synchronize the driver functions to
> each other and explicit locks are a far better way to achieve this than
> some implicit tunneling through a single thread or other such things.
Not hardware BD queue.
I redesign software BD queue as lockless queue.
After put actual queue process work to NAPI, interrupt handle will
not interrupt xmit and NAPI function again.
There are just one entry xmit to push new data to bd queue.
One entry fec_enet_tx to pull old data from bd queue.
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
}
}
HARD_TX_UNLOCK(dev, txq);
Restart function will only called at suspend/resume, init, and speed change.
So risk should not in heave loading.
The other reason of remove lock is that fix deadlock detected by kernel.
>
> Let us please try and concentrate on making things safe and easy to
> understand and not introduce possibilities for breakage in the future,
> when the next change goes into the driver.
>
>> Can you provide test case?
>>
> The test case is already described in my original mail: heavily loaded
> link, so NAPI has to do some spinning in the receive function while
> having the link flapping.
>
>> > > link we regularly end up with OOPSes because link change restarts
>> > > the FEC and bombs away buffers still in use.
>> > >
>> > > Also the NAPI enabled interrupt handler ACKs the INT and only later
>> > > masks it, this way introducing a window where new interrupts could sneak
>> > > in while we are already in polling mode.
>> > >
>> > > As it's way too late in the cycle to try and fix this up just revert the
>> > > relevant patches for now.
>> >
>> > What about restoring the spinlocks and masking the int first?
>> >
> While reintroducing the spinlocks might fix the problem (I'll retest
> that today) we are now holding a big lock for extended periods of time,
> so while we are spinning in the receive poll function we are not able to
> enqueue new TX buffers. This is also a problem with the original
> patches, as they are mashing together the TX and RX interrupts.
>
> Dave, even if the reverts are intrusive I'm still not convinced that we
> should try and fix this up in the short period of time we have left
> until the 3.9 final release.
>
> To fix all this properly we would have to fix at least the following
> things:
> 1. Split up the spinlock into two independent locks for RX and TX.
> Interrupt handlers should only take their respective lock, things like
> the FEC restart, who want to mess with both queues have to take both
> locks.
> 2. Move locking to the right places, there is zero reason why the
> adjust_link PHY callback has to take the locks, but rather FEC restart
> should take them.
> 3. Introduce separate NAPI contexts for RX and TX, to get around one of
> them blocking the other.
>
> I doubt this will be less intrusive than reverting the offending patches
> for now and taking a new stab at NAPI support in the next cycle.
>
> Also I suspect the patch "net: fec: put tx to napi poll function to fix
> dead lock" to introduce a more subtle problem in the ring buffer
> accounting (why does this patch even change the way ring buffers are
> tracked?) which triggers on rarer occasions, but I have to test if this
> is still there with the lock added back.
>
> Regards,
> Lucas
> --
> Pengutronix e.K. | Lucas Stach |
> Industrial Linux Solutions | http://www.pengutronix.de/ |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
> Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-22 9:17 ` Frank Li
@ 2013-04-25 12:31 ` Lucas Stach
2013-04-25 14:45 ` Frank Li
0 siblings, 1 reply; 18+ messages in thread
From: Lucas Stach @ 2013-04-25 12:31 UTC (permalink / raw)
To: Frank Li
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
Am Montag, den 22.04.2013, 17:17 +0800 schrieb Frank Li:
> 2013/4/22 Lucas Stach <l.stach@pengutronix.de>:
> > Hi all,
> >
> > Am Samstag, den 20.04.2013, 20:35 +0800 schrieb Frank Li:
> >> 2013/4/20 Fabio Estevam <festevam@gmail.com>
> >> >
> >> > Lucas,
> >> >
> >> > On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> >> > > Those patches introduce instability to the point of kernel OOPSes with
> >> > > NULL-ptr dereferences.
> >> > >
> >> > > The patches drop locks from the code without justifying why this would
> >> > > be safe at all. In fact it isn't safe as now the controller restart can
> >> > > happily free the RX and TX ring buffers while the NAPI poll function is
> >> > > still accessing them. So with a heavily loaded but slightly instable
> >>
> >> I think a possible solution is disable NAPI in restart function.
> >> So only one thread can reset BD queue.
> >>
> >> BD queue is nolock design.
> >>
> > It doesn't matter at all that the hardware BD queue is designed to be
> > operated lockless, you still have to synchronize the driver functions to
> > each other and explicit locks are a far better way to achieve this than
> > some implicit tunneling through a single thread or other such things.
>
> Not hardware BD queue.
> I redesign software BD queue as lockless queue.
>
> After put actual queue process work to NAPI, interrupt handle will
> not interrupt xmit and NAPI function again.
>
> There are just one entry xmit to push new data to bd queue.
> One entry fec_enet_tx to pull old data from bd queue.
>
> HARD_TX_LOCK(dev, txq, cpu);
>
> if (!netif_xmit_stopped(txq)) {
> __this_cpu_inc(xmit_recursion);
> rc = dev_hard_start_xmit(skb, dev, txq);
> __this_cpu_dec(xmit_recursion);
> if (dev_xmit_complete(rc)) {
> HARD_TX_UNLOCK(dev, txq);
> goto out;
> }
> }
> HARD_TX_UNLOCK(dev, txq);
>
> Restart function will only called at suspend/resume, init, and speed change.
> So risk should not in heave loading.
>
> The other reason of remove lock is that fix deadlock detected by kernel.
While I agree that lockless queues and NAPI disable while doing FEC
restart is the way to go for further development, I tried to implement
that yesterday and it needs some bigger changes to the driver to split
things up properly, otherwise we need a lot of ad-hoc hackery to check
if NAPI is enabled, which seems really error prone.
think such a change in the driver is not acceptable in the current state
of the cycle. I for one will not submit this change, as I'm not sure at
all that it won't regress in other situations.
Regards,
Lucas
--
Pengutronix e.K. | Lucas Stach |
Industrial Linux Solutions | http://www.pengutronix.de/ |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-25 12:31 ` Lucas Stach
@ 2013-04-25 14:45 ` Frank Li
2013-04-25 14:57 ` Lucas Stach
0 siblings, 1 reply; 18+ messages in thread
From: Frank Li @ 2013-04-25 14:45 UTC (permalink / raw)
To: Lucas Stach
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
2013/4/25 Lucas Stach <l.stach@pengutronix.de>:
> Am Montag, den 22.04.2013, 17:17 +0800 schrieb Frank Li:
>> 2013/4/22 Lucas Stach <l.stach@pengutronix.de>:
>> > Hi all,
>> >
>> > Am Samstag, den 20.04.2013, 20:35 +0800 schrieb Frank Li:
>> >> 2013/4/20 Fabio Estevam <festevam@gmail.com>
>> >> >
>> >> > Lucas,
>> >> >
>> >> > On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
>> >> > > Those patches introduce instability to the point of kernel OOPSes with
>> >> > > NULL-ptr dereferences.
>> >> > >
>> >> > > The patches drop locks from the code without justifying why this would
>> >> > > be safe at all. In fact it isn't safe as now the controller restart can
>> >> > > happily free the RX and TX ring buffers while the NAPI poll function is
>> >> > > still accessing them. So with a heavily loaded but slightly instable
>> >>
>> >> I think a possible solution is disable NAPI in restart function.
>> >> So only one thread can reset BD queue.
>> >>
>> >> BD queue is nolock design.
>> >>
>> > It doesn't matter at all that the hardware BD queue is designed to be
>> > operated lockless, you still have to synchronize the driver functions to
>> > each other and explicit locks are a far better way to achieve this than
>> > some implicit tunneling through a single thread or other such things.
>>
>> Not hardware BD queue.
>> I redesign software BD queue as lockless queue.
>>
>> After put actual queue process work to NAPI, interrupt handle will
>> not interrupt xmit and NAPI function again.
>>
>> There are just one entry xmit to push new data to bd queue.
>> One entry fec_enet_tx to pull old data from bd queue.
>>
>> HARD_TX_LOCK(dev, txq, cpu);
>>
>> if (!netif_xmit_stopped(txq)) {
>> __this_cpu_inc(xmit_recursion);
>> rc = dev_hard_start_xmit(skb, dev, txq);
>> __this_cpu_dec(xmit_recursion);
>> if (dev_xmit_complete(rc)) {
>> HARD_TX_UNLOCK(dev, txq);
>> goto out;
>> }
>> }
>> HARD_TX_UNLOCK(dev, txq);
>>
>> Restart function will only called at suspend/resume, init, and speed change.
>> So risk should not in heave loading.
>>
>> The other reason of remove lock is that fix deadlock detected by kernel.
>
> While I agree that lockless queues and NAPI disable while doing FEC
> restart is the way to go for further development, I tried to implement
> that yesterday and it needs some bigger changes to the driver to split
> things up properly, otherwise we need a lot of ad-hoc hackery to check
> if NAPI is enabled, which seems really error prone.
>
> think such a change in the driver is not acceptable in the current state
> of the cycle. I for one will not submit this change, as I'm not sure at
> all that it won't regress in other situations.
NAPI is direction. Can you send me your detail test step? run what command?
So we can easily reproduce your problem.
>
> Regards,
> Lucas
> --
> Pengutronix e.K. | Lucas Stach |
> Industrial Linux Solutions | http://www.pengutronix.de/ |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
> Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-25 14:45 ` Frank Li
@ 2013-04-25 14:57 ` Lucas Stach
2013-04-27 7:12 ` Frank Li
0 siblings, 1 reply; 18+ messages in thread
From: Lucas Stach @ 2013-04-25 14:57 UTC (permalink / raw)
To: Frank Li
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
Am Donnerstag, den 25.04.2013, 22:45 +0800 schrieb Frank Li:
> 2013/4/25 Lucas Stach <l.stach@pengutronix.de>:
> > Am Montag, den 22.04.2013, 17:17 +0800 schrieb Frank Li:
> >> 2013/4/22 Lucas Stach <l.stach@pengutronix.de>:
> >> > Hi all,
> >> >
> >> > Am Samstag, den 20.04.2013, 20:35 +0800 schrieb Frank Li:
> >> >> 2013/4/20 Fabio Estevam <festevam@gmail.com>
> >> >> >
> >> >> > Lucas,
> >> >> >
> >> >> > On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> >> >> > > Those patches introduce instability to the point of kernel OOPSes with
> >> >> > > NULL-ptr dereferences.
> >> >> > >
> >> >> > > The patches drop locks from the code without justifying why this would
> >> >> > > be safe at all. In fact it isn't safe as now the controller restart can
> >> >> > > happily free the RX and TX ring buffers while the NAPI poll function is
> >> >> > > still accessing them. So with a heavily loaded but slightly instable
> >> >>
> >> >> I think a possible solution is disable NAPI in restart function.
> >> >> So only one thread can reset BD queue.
> >> >>
> >> >> BD queue is nolock design.
> >> >>
> >> > It doesn't matter at all that the hardware BD queue is designed to be
> >> > operated lockless, you still have to synchronize the driver functions to
> >> > each other and explicit locks are a far better way to achieve this than
> >> > some implicit tunneling through a single thread or other such things.
> >>
> >> Not hardware BD queue.
> >> I redesign software BD queue as lockless queue.
> >>
> >> After put actual queue process work to NAPI, interrupt handle will
> >> not interrupt xmit and NAPI function again.
> >>
> >> There are just one entry xmit to push new data to bd queue.
> >> One entry fec_enet_tx to pull old data from bd queue.
> >>
> >> HARD_TX_LOCK(dev, txq, cpu);
> >>
> >> if (!netif_xmit_stopped(txq)) {
> >> __this_cpu_inc(xmit_recursion);
> >> rc = dev_hard_start_xmit(skb, dev, txq);
> >> __this_cpu_dec(xmit_recursion);
> >> if (dev_xmit_complete(rc)) {
> >> HARD_TX_UNLOCK(dev, txq);
> >> goto out;
> >> }
> >> }
> >> HARD_TX_UNLOCK(dev, txq);
> >>
> >> Restart function will only called at suspend/resume, init, and speed change.
> >> So risk should not in heave loading.
> >>
> >> The other reason of remove lock is that fix deadlock detected by kernel.
> >
> > While I agree that lockless queues and NAPI disable while doing FEC
> > restart is the way to go for further development, I tried to implement
> > that yesterday and it needs some bigger changes to the driver to split
> > things up properly, otherwise we need a lot of ad-hoc hackery to check
> > if NAPI is enabled, which seems really error prone.
> >
> > think such a change in the driver is not acceptable in the current state
> > of the cycle. I for one will not submit this change, as I'm not sure at
> > all that it won't regress in other situations.
>
> NAPI is direction. Can you send me your detail test step? run what command?
> So we can easily reproduce your problem.
>
I'm working with a i.MX6q, connected to a fast-ethernet port (100MBit).
For me all that is need to trigger the failures is generating load on
the port by using a floodping (ping -f -i 0 -s 1400) from a remote host
and then bringing down the link by means of physically removing the plug
from the connector.
As soon as the PHY signals a link down things start to fall apart.
Regards,
Lucas
--
Pengutronix e.K. | Lucas Stach |
Industrial Linux Solutions | http://www.pengutronix.de/ |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-25 14:57 ` Lucas Stach
@ 2013-04-27 7:12 ` Frank Li
0 siblings, 0 replies; 18+ messages in thread
From: Frank Li @ 2013-04-27 7:12 UTC (permalink / raw)
To: Lucas Stach
Cc: Fabio Estevam, netdev@vger.kernel.org, David Miller, Frank Li,
Shawn Guo
2013/4/25 Lucas Stach <l.stach@pengutronix.de>:
> s soon as the PHY signals a link down things start to fall apart.
Sorry, I can't reproduce your problem.
I use two MX6.
Run below command in one MX6
> ping -f -i 0 -s 1400 10.192.242.122
unplug/plug cable at the other boards.
>uname -a
Linux freescale 3.9.0-rc8+#214
I base on net tree.
I static analyse code. I know the possible problem.
fec_enet_adjust_link run at delay work without hold any lock.
fec_enet_adjust_link will call fec_restart reset BD queue.
fec_resart will reset BD pointer.
I think disable napi and TX queue by netif_stop_queue should resolve
your problem.
Can you give detail guide to reproduce your problem?
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 10/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
root@freescale /proc/irq/150$ libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
libphy: 2188000.ethernet:01 - Link is Down
libphy: 2188000.ethernet:01 - Link is Up - 100/Full
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 0/3] URGENT for 3.9: net: fec: revert NAPI introduction
2013-04-19 20:05 ` Fabio Estevam
2013-04-20 12:35 ` Frank Li
@ 2013-04-25 12:37 ` Lucas Stach
1 sibling, 0 replies; 18+ messages in thread
From: Lucas Stach @ 2013-04-25 12:37 UTC (permalink / raw)
To: Fabio Estevam; +Cc: netdev, David Miller, Frank Li, Shawn Guo
Am Freitag, den 19.04.2013, 17:05 -0300 schrieb Fabio Estevam:
> Lucas,
>
> On Fri, Apr 19, 2013 at 11:36 AM, Lucas Stach <l.stach@pengutronix.de> wrote:
> > Those patches introduce instability to the point of kernel OOPSes with
> > NULL-ptr dereferences.
> >
> > The patches drop locks from the code without justifying why this would
> > be safe at all. In fact it isn't safe as now the controller restart can
> > happily free the RX and TX ring buffers while the NAPI poll function is
> > still accessing them. So with a heavily loaded but slightly instable
> > link we regularly end up with OOPSes because link change restarts
> > the FEC and bombs away buffers still in use.
> >
> > Also the NAPI enabled interrupt handler ACKs the INT and only later
> > masks it, this way introducing a window where new interrupts could sneak
> > in while we are already in polling mode.
> >
> > As it's way too late in the cycle to try and fix this up just revert the
> > relevant patches for now.
>
> What about restoring the spinlocks and masking the int first?
>
> --- a/drivers/net/ethernet/freescale/fec.c
> +++ b/drivers/net/ethernet/freescale/fec.c
> @@ -247,12 +247,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> void *bufaddr;
> unsigned short status;
> unsigned int index;
> + unsigned long flags;
>
> if (!fep->link) {
> /* Link is down or autonegotiation is in progress. */
> return NETDEV_TX_BUSY;
> }
>
> + spin_lock_irqsave(&fep->hw_lock, flags);
> /* Fill in a Tx ring entry */
> bdp = fep->cur_tx;
>
> @@ -263,6 +265,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> * This should not happen, since ndev->tbusy should be set.
> */
> printk("%s: tx queue full!.\n", ndev->name);
> + spin_unlock_irqrestore(&fep->hw_lock, flags);
> return NETDEV_TX_BUSY;
> }
>
> @@ -342,6 +345,8 @@ fec_enet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>
> skb_tx_timestamp(skb);
>
> + spin_unlock_irqrestore(&fep->hw_lock, flags);
> +
> return NETDEV_TX_OK;
> }
>
> @@ -612,6 +617,7 @@ fec_enet_tx(struct net_device *ndev)
> int index = 0;
>
> fep = netdev_priv(ndev);
> + spin_lock(&fep->hw_lock);
> bdp = fep->dirty_tx;
>
> /* get next bdp of dirty_tx */
> @@ -699,6 +705,7 @@ fec_enet_tx(struct net_device *ndev)
> netif_wake_queue(ndev);
> }
> }
> + spin_unlock(&fep->hw_lock);
> return;
> }
>
> @@ -892,12 +899,12 @@ static int fec_enet_rx_napi(struct napi_struct
> *napi, int budget)
> int pkts = fec_enet_rx(ndev, budget);
> struct fec_enet_private *fep = netdev_priv(ndev);
>
> - fec_enet_tx(ndev);
> -
> if (pkts < budget) {
> napi_complete(napi);
> writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
> }
> +
> + fec_enet_tx(ndev);
> return pkts;
> }
This change isn't enough to fix the kernel OOPSes. The RX function is
also operating on the same buffers, but simply adding back the locks
there doesn't fix all the failures I'm seeing.
Regards,
Lucas
--
Pengutronix e.K. | Lucas Stach |
Industrial Linux Solutions | http://www.pengutronix.de/ |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-5076 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply [flat|nested] 18+ messages in thread