From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
To: davem@davemloft.net
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>,
netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
jogreene@redhat.com, Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Subject: [net v2 1/3] i40e: fix un-necessary Tx hangs
Date: Tue, 6 Jan 2015 17:31:55 -0800 [thread overview]
Message-ID: <1420594317-6191-2-git-send-email-jeffrey.t.kirsher@intel.com> (raw)
In-Reply-To: <1420594317-6191-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
When the driver was polling with interrupts disabled the hardware
will occasionally not write back descriptors. This patch causes
the driver to detect this situation and force an interrupt to
fire which will flush the stuck descriptor. Does not conflict
with napi because if we are already polling the napi_schedule is
ignored. Additionally the extra interrupts are rate limited, so
don't cause a burden to the CPU.
Change-ID: Iba4616d2a71288672a5f08e4512e2704b97335e8
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
v2: fixed a bug where the interrupt rate impacted 4 port workloads by
reducing throughput.
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 56 ++++++++++++++++++++++++-----
drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 +
2 files changed, 49 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 04b4414..f145aaf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -658,6 +658,8 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
return le32_to_cpu(*(volatile __le32 *)head);
}
+#define WB_STRIDE 0x3
+
/**
* i40e_clean_tx_irq - Reclaim resources after transmit completes
* @tx_ring: tx ring to clean
@@ -759,6 +761,18 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
+ /* check to see if there are any non-cache aligned descriptors
+ * waiting to be written back, and kick the hardware to force
+ * them to be written back in case of napi polling
+ */
+ if (budget &&
+ !((i & WB_STRIDE) == WB_STRIDE) &&
+ !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
+ (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+ tx_ring->arm_wb = true;
+ else
+ tx_ring->arm_wb = false;
+
if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */
dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
@@ -777,13 +791,16 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
dev_info(tx_ring->dev,
- "tx hang detected on queue %d, resetting adapter\n",
+ "tx hang detected on queue %d, reset requested\n",
tx_ring->queue_index);
- tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
+ /* do not fire the reset immediately, wait for the stack to
+ * decide we are truly stuck, also prevents every queue from
+ * simultaneously requesting a reset
+ */
- /* the adapter is about to reset, no point in enabling stuff */
- return true;
+ /* the adapter is about to reset, no point in enabling polling */
+ budget = 1;
}
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
@@ -806,7 +823,25 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
}
}
- return budget > 0;
+ return !!budget;
+}
+
+/**
+ * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to force writeback
+ *
+ **/
+static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+ u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
+ /* allow 00 to be written to the index */;
+
+ wr32(&vsi->back->hw,
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
+ val);
}
/**
@@ -1581,6 +1616,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
struct i40e_vsi *vsi = q_vector->vsi;
struct i40e_ring *ring;
bool clean_complete = true;
+ bool arm_wb = false;
int budget_per_ring;
if (test_bit(__I40E_DOWN, &vsi->state)) {
@@ -1591,8 +1627,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
- i40e_for_each_ring(ring, q_vector->tx)
+ i40e_for_each_ring(ring, q_vector->tx) {
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+ arm_wb |= ring->arm_wb;
+ }
/* We attempt to distribute budget to each Rx queue fairly, but don't
* allow the budget to go below 1 because that would exit polling early.
@@ -1603,8 +1641,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
/* If work not completed, return budget and polling will return */
- if (!clean_complete)
+ if (!clean_complete) {
+ if (arm_wb)
+ i40e_force_wb(vsi, q_vector);
return budget;
+ }
/* Work is done so exit the polling mode and re-enable the interrupt */
napi_complete(napi);
@@ -2198,7 +2239,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Place RS bit on last descriptor of any packet that spans across the
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
*/
-#define WB_STRIDE 0x3
if (((i & WB_STRIDE) != WB_STRIDE) &&
(first <= &tx_ring->tx_bi[i]) &&
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e60d3ac..18b0023 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -241,6 +241,7 @@ struct i40e_ring {
unsigned long last_rx_timestamp;
bool ring_active; /* is ring online or not */
+ bool arm_wb; /* do something to arm write back */
/* stats structs */
struct i40e_queue_stats stats;
--
1.9.3
next prev parent reply other threads:[~2015-01-07 1:32 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-07 1:31 [net v2 0/3][pull request] Intel Wired LAN Driver Updates 2015-01-06 Jeff Kirsher
2015-01-07 1:31 ` Jeff Kirsher [this message]
2015-01-07 1:31 ` [net v2 2/3] i40e: Fix Rx checksum error counter Jeff Kirsher
2015-01-07 5:43 ` Tom Herbert
2015-01-07 20:14 ` Singhai, Anjali
2015-01-07 21:34 ` Tom Herbert
2015-01-08 20:33 ` Singhai, Anjali
2015-01-07 1:31 ` [net v2 3/3] i40e: Fix bug with TCP over IPv6 over VXLAN Jeff Kirsher
2015-01-09 3:43 ` [net v2 0/3][pull request] Intel Wired LAN Driver Updates 2015-01-06 David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1420594317-6191-2-git-send-email-jeffrey.t.kirsher@intel.com \
--to=jeffrey.t.kirsher@intel.com \
--cc=davem@davemloft.net \
--cc=jesse.brandeburg@intel.com \
--cc=jogreene@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=nhorman@redhat.com \
--cc=sassmann@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).