* [PATCH 0/2] e1000e msi-x fixes
@ 2015-10-23 0:32 Benjamin Poirier
2015-10-23 0:32 ` [PATCH 1/2] e1000e: remove unreachable code Benjamin Poirier
2015-10-23 0:32 ` [PATCH 2/2] e1000e: Fix msi-x interrupt automask Benjamin Poirier
0 siblings, 2 replies; 4+ messages in thread
From: Benjamin Poirier @ 2015-10-23 0:32 UTC (permalink / raw)
To: Jeff Kirsher
Cc: Jesse Brandeburg, Shannon Nelson, Carolyn Wyborny, Don Skidmore,
Matthew Vick, John Ronciak, Mitch Williams, intel-wired-lan,
netdev, linux-kernel
Hi,
For this series:
Benjamin Poirier (2):
e1000e: remove unreachable code
e1000e: Fix msi-x interrupt automask
drivers/net/ethernet/intel/e1000e/netdev.c | 18 ++++++------------
1 file changed, 6 insertions(+), 12 deletions(-)
The first patch is a cleanup but the second one is the real deal. Please
consider reading the description for that patch before proceeding. I
believe that the following simple tracing statements are helpful in
detecting the problem fixed by the second patch.
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 8881256..707a525 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1952,6 +1952,9 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *rx_ring = adapter->rx_ring;
+ struct e1000_hw *hw = &adapter->hw;
+
+ trace_printk("%s: rxq0 irq ims 0x%08x\n", netdev->name, er32(IMS));
/* Write the ITR value calculated at the end of the
* previous interrupt.
@@ -1966,6 +1969,7 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data)
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(&adapter->napi);
+ trace_printk("%s: scheduling napi\n", netdev->name);
}
return IRQ_HANDLED;
}
@@ -2672,6 +2676,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
struct net_device *poll_dev = adapter->netdev;
int tx_cleaned = 1, work_done = 0;
+ trace_printk("%s: poll starting ims 0x%08x\n", poll_dev->name,
+ er32(IMS));
adapter = netdev_priv(poll_dev);
if (!adapter->msix_entries ||
@@ -2689,6 +2695,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
e1000_set_itr(adapter);
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, &adapter->state)) {
+ trace_printk("%s: will enable rxq0 irq\n",
+ poll_dev->name);
if (adapter->msix_entries)
ew32(IMS, adapter->rx_ring->ims_val);
else
-------- 8< --------
With that patch but without the patches in this series we can see that rx irqs
occur at unexpected times:
<idle>-0 [000] .Ns. 1986.887517: e1000e_poll: eth1: will enable rxq0 irq
<idle>-0 [000] d.h. 1986.896654: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01500004
<idle>-0 [000] d.h. 1986.896657: e1000_intr_msix_rx: eth1: scheduling napi
<idle>-0 [000] d.H. 1986.896662: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01500004
<idle>-0 [000] ..s. 1986.896667: e1000e_poll: eth1: poll starting ims 0x01500004
Warning: many interrupts (2) before napi
<idle>-0 [000] ..s. 1986.896685: e1000e_poll: eth1: will enable rxq0 irq
<idle>-0 [000] d.h. 1990.688870: e1000_intr_msix_rx: eth1: scheduling napi
<idle>-0 [000] ..s. 1990.688875: e1000e_poll: eth1: poll starting ims 0x01500004
<idle>-0 [000] dNH. 1990.688913: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01500004
Warning: interrupt inside napi
<idle>-0 [000] .Ns. 1990.688916: e1000e_poll: eth1: will enable rxq0 irq
<idle>-0 [000] d.h. 1990.729688: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01500004
Here's a typical sequence after applying the patches in this series. Notice
that ims is changed. Another printk at the end of e1000e_poll would show it to
be 0x01500004.
<idle>-0 [000] d.h. 3896.134376: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01400004
<idle>-0 [000] d.h. 3896.134379: e1000_intr_msix_rx: eth1: scheduling napi
<idle>-0 [000] ..s. 3896.134384: e1000e_poll: eth1: poll starting ims 0x01400004
<idle>-0 [000] ..s. 3896.134398: e1000e_poll: eth1: will enable rxq0 irq
Finally, here's the script I used to generate the warnings above:
#!/usr/bin/python3
import sys
import re
import pprint
class NaE(Exception):
"Not an Event"
pass
class Event:
def __init__(self, line):
# sample events:
# <idle>-0 [000] d.h. 2025.256536: e1000_intr_msix_rx: eth1: rxq0 irq ims 0x01500004
# <idle>-0 [000] d.h. 2025.256539: e1000_intr_msix_rx: eth1: scheduling napi
# <idle>-0 [000] ..s. 2025.256544: e1000e_poll: eth1: poll starting ims 0x01500004
# <idle>-0 [000] ..s. 2025.256558: e1000e_poll: eth1: will enable rxq0 irq
retval = re.match(" +<?(?P<comm>.*)>?-(?P<pid>[0-9]+) +\[(?P<cpu>.*)\] (?P<flags>[^ ]+) +(?P<time>[0-9.]+): (?P<funcname>[^:]+): (?P<ifname>[^:]+): (?P<args>.*)", line)
if retval:
self.comm = retval.group("comm")
self.pid = retval.group("pid")
self.cpu = retval.group("cpu")
self.flags = retval.group("flags")
self.time = retval.group("time")
self.funcname = retval.group("funcname")
self.ifname = retval.group("ifname")
self.args = retval.group("args")
else:
raise NaE
class Machine:
pass
class State:
def __init__(self, machine):
self.machine = machine
def entry(self, event):
pass
def transition(self, event):
"self.machine should be considered read-only in transition"
return State(self.machine)
def run(self, event):
pass
def exit(self, event):
pass
def advance(self, event):
nextState = self.transition(event)
if (nextState != self):
self.exit(event)
nextState.entry(event)
nextState.run(event)
return nextState
# general receive processing machine
def enteringNapi(machine, event):
if event.args.startswith("poll starting"):
return True
else:
return False
def exitingNapi(machine, event):
if event.args.startswith("will enable"):
return True
else:
return False
class OutsideNapi(State):
def entry(self, event):
self.machine.intr = []
def transition(self, event):
if enteringNapi(self.machine, event):
return InsideNapi(self.machine)
else:
return self
def run(self, event):
if event.args.startswith("rxq0 irq"):
self.machine.intr.append(event)
def exit(self, event):
if len(self.machine.intr) > 1:
print("Warning: many interrupts (%d) before napi at %s" % (
len(self.machine.intr), event.time,))
class InsideNapi(State):
def transition(self, event):
if exitingNapi(self.machine, event):
return OutsideNapi(self.machine)
else:
return self
def run(self, event):
if event.args.startswith("rxq0 irq"):
print("Warning: interrupt inside napi")
class UnknownState(State):
def transition(self, event):
if enteringNapi(self.machine, event):
return InsideNapi(self.machine)
elif exitingNapi(self.machine, event):
return ExitingNapi(self.machine)
else:
return self
if __name__ == '__main__':
debug = False
state = UnknownState(Machine())
for line in sys.stdin:
print(line, end='')
try:
event = Event(line)
except NaE:
pass
else:
if debug:
pprinter.pprint(event)
state = state.advance(event)
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 1/2] e1000e: remove unreachable code
2015-10-23 0:32 [PATCH 0/2] e1000e msi-x fixes Benjamin Poirier
@ 2015-10-23 0:32 ` Benjamin Poirier
2015-10-23 0:32 ` [PATCH 2/2] e1000e: Fix msi-x interrupt automask Benjamin Poirier
1 sibling, 0 replies; 4+ messages in thread
From: Benjamin Poirier @ 2015-10-23 0:32 UTC (permalink / raw)
To: Jeff Kirsher
Cc: Jesse Brandeburg, Shannon Nelson, Carolyn Wyborny, Don Skidmore,
Matthew Vick, John Ronciak, Mitch Williams, intel-wired-lan,
netdev, linux-kernel
msi-x interrupts are not shared so there's no need to check if the
interrupt was really from this adapter.
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
---
drivers/net/ethernet/intel/e1000e/netdev.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0a854a4..a228167 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
struct e1000_hw *hw = &adapter->hw;
u32 icr = er32(ICR);
- if (!(icr & E1000_ICR_INT_ASSERTED)) {
- if (!test_bit(__E1000_DOWN, &adapter->state))
- ew32(IMS, E1000_IMS_OTHER);
- return IRQ_NONE;
- }
-
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 2/2] e1000e: Fix msi-x interrupt automask
2015-10-23 0:32 [PATCH 0/2] e1000e msi-x fixes Benjamin Poirier
2015-10-23 0:32 ` [PATCH 1/2] e1000e: remove unreachable code Benjamin Poirier
@ 2015-10-23 0:32 ` Benjamin Poirier
2015-10-28 23:08 ` Alexander Duyck
1 sibling, 1 reply; 4+ messages in thread
From: Benjamin Poirier @ 2015-10-23 0:32 UTC (permalink / raw)
To: Jeff Kirsher
Cc: Jesse Brandeburg, Shannon Nelson, Carolyn Wyborny, Don Skidmore,
Matthew Vick, John Ronciak, Mitch Williams, intel-wired-lan,
netdev, linux-kernel
Since the introduction of 82574 support in e1000e, the driver has worked on
the assumption that msi-x interrupt generation is automatically disabled
after each irq. As it turns out, this is not the case. Currently, rx
interrupts can fire multiple times before and during napi processing. This
can be a problem for users because frames that arrive in a certain window
(after adapter->clean_rx() but before napi_complete_done() has cleared
NAPI_STATE_SCHED) generate an interrupt which does not lead to
napi_schedule(). These frames sit in the rx queue until another frame
arrives (a tcp retransmit for example).
While the EIAC and CTRL_EXT registers are properly configured for irq
automask, the modification of IAM in e1000_configure_msix() is what
prevents automask from working as intended.
This patch removes that erroneous write and fixes interrupt rearming for tx
and "other" interrupts. Since e1000_msix_other() reads ICR, all interrupts
must be rearmed in that function.
Reported-by: Frank Steiner <steiner-reg@bio.ifi.lmu.de>
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
---
drivers/net/ethernet/intel/e1000e/netdev.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a228167..8881256 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1921,7 +1921,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
no_link_interrupt:
if (!test_bit(__E1000_DOWN, &adapter->state))
- ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
+ ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+ E1000_IMS_LSC);
return IRQ_HANDLED;
}
@@ -1940,6 +1941,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data)
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+ ew32(IMS, E1000_IMS_TXQ0);
+
return IRQ_HANDLED;
}
@@ -2027,11 +2031,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
/* enable MSI-X PBA support */
ctrl_ext = er32(CTRL_EXT);
- ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
- /* Auto-Mask Other interrupts upon ICR read */
- ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
- ctrl_ext |= E1000_CTRL_EXT_EIAME;
+ ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
}
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] e1000e: Fix msi-x interrupt automask
2015-10-23 0:32 ` [PATCH 2/2] e1000e: Fix msi-x interrupt automask Benjamin Poirier
@ 2015-10-28 23:08 ` Alexander Duyck
0 siblings, 0 replies; 4+ messages in thread
From: Alexander Duyck @ 2015-10-28 23:08 UTC (permalink / raw)
To: Benjamin Poirier, Jeff Kirsher
Cc: Jesse Brandeburg, Shannon Nelson, Carolyn Wyborny, Don Skidmore,
Matthew Vick, John Ronciak, Mitch Williams, intel-wired-lan,
netdev, linux-kernel
On 10/22/2015 05:32 PM, Benjamin Poirier wrote:
> Since the introduction of 82574 support in e1000e, the driver has worked on
> the assumption that msi-x interrupt generation is automatically disabled
> after each irq. As it turns out, this is not the case. Currently, rx
> interrupts can fire multiple times before and during napi processing. This
> can be a problem for users because frames that arrive in a certain window
> (after adapter->clean_rx() but before napi_complete_done() has cleared
> NAPI_STATE_SCHED) generate an interrupt which does not lead to
> napi_schedule(). These frames sit in the rx queue until another frame
> arrives (a tcp retransmit for example).
>
> While the EIAC and CTRL_EXT registers are properly configured for irq
> automask, the modification of IAM in e1000_configure_msix() is what
> prevents automask from working as intended.
>
> This patch removes that erroneous write and fixes interrupt rearming for tx
> and "other" interrupts. Since e1000_msix_other() reads ICR, all interrupts
> must be rearmed in that function.
>
> Reported-by: Frank Steiner <steiner-reg@bio.ifi.lmu.de>
> Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
> ---
> drivers/net/ethernet/intel/e1000e/netdev.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index a228167..8881256 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -1921,7 +1921,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
>
> no_link_interrupt:
> if (!test_bit(__E1000_DOWN, &adapter->state))
> - ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
> + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
> + E1000_IMS_LSC);
>
> return IRQ_HANDLED;
> }
I would argue your first patch probably didn't go far enough to remove
dead code. Specifically you should only ever get into this function if
LSC is set. There are no other causes that should trigger this. As
such you could probably remove the ICR read, and instead replace it with
an ICR write of the LSC bit since OTHER is already cleared via EIAC.
> @@ -1940,6 +1941,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data)
> /* Ring was not completely cleaned, so fire another interrupt */
> ew32(ICS, tx_ring->ims_val);
>
> + if (!test_bit(__E1000_DOWN, &adapter->state))
> + ew32(IMS, E1000_IMS_TXQ0);
> +
> return IRQ_HANDLED;
> }
>
I think what you need to set here is tx_ring->ims_val, not E1000_IMS_TXQ0.
> @@ -2027,11 +2031,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
>
> /* enable MSI-X PBA support */
> ctrl_ext = er32(CTRL_EXT);
> - ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
> -
> - /* Auto-Mask Other interrupts upon ICR read */
> - ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
> - ctrl_ext |= E1000_CTRL_EXT_EIAME;
> + ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
> ew32(CTRL_EXT, ctrl_ext);
> e1e_flush();
> }
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-10-28 23:08 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-23 0:32 [PATCH 0/2] e1000e msi-x fixes Benjamin Poirier
2015-10-23 0:32 ` [PATCH 1/2] e1000e: remove unreachable code Benjamin Poirier
2015-10-23 0:32 ` [PATCH 2/2] e1000e: Fix msi-x interrupt automask Benjamin Poirier
2015-10-28 23:08 ` Alexander Duyck
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox