From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: "Liming Sun" <limings@nvidia.com>,
"Ilpo Järvinen" <ilpo.jarvinen@linux.intel.com>,
"Hans de Goede" <hdegoede@redhat.com>,
"Sasha Levin" <sashal@kernel.org>,
vadimp@nvidia.com, platform-driver-x86@vger.kernel.org
Subject: [PATCH AUTOSEL 6.7 06/23] platform/mellanox: mlxbf-tmfifo: Drop Tx network packet when Tx TmFIFO is full
Date: Fri, 2 Feb 2024 13:39:02 -0500 [thread overview]
Message-ID: <20240202183926.540467-6-sashal@kernel.org> (raw)
In-Reply-To: <20240202183926.540467-1-sashal@kernel.org>
From: Liming Sun <limings@nvidia.com>
[ Upstream commit 8cbc756b802605dee3dd40019bd75960772bacf5 ]
Starting from Linux 5.16 kernel, Tx timeout mechanism was added
in the virtio_net driver which prints the "Tx timeout" warning
message when a packet stays in Tx queue for too long. Below is an
example of the reported message:
"[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
last trans: 3079892256".
This issue could happen when external host driver which drains the
FIFO is restared, stopped or upgraded. To avoid such confusing
"Tx timeout" messages, this commit adds logic to drop the outstanding
Tx packet if it's not able to transmit in two seconds due to Tx FIFO
full, which can be considered as congestion or out-of-resource drop.
This commit also handles the special case that the packet is half-
transmitted into the Tx FIFO. In such case, the packet is discarded
with remaining length stored in vring->rem_padding. So paddings with
zeros can be sent out when Tx space is available to maintain the
integrity of the packet format. The padded packet will be dropped on
the receiving side.
Signed-off-by: Liming Sun <limings@nvidia.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20240111173106.96958-1-limings@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 5c683b4eaf10..f39b7b9d2bfe 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -47,6 +47,9 @@
/* Message with data needs at least two words (for header & data). */
#define MLXBF_TMFIFO_DATA_MIN_WORDS 2
+/* Tx timeout in milliseconds. */
+#define TMFIFO_TX_TIMEOUT 2000
+
/* ACPI UID for BlueField-3. */
#define TMFIFO_BF3_UID 1
@@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
* @drop_desc: dummy desc for packet dropping
* @cur_len: processed length of the current descriptor
* @rem_len: remaining length of the pending packet
+ * @rem_padding: remaining bytes to send as paddings
* @pkt_len: total length of the pending packet
* @next_avail: next avail descriptor id
* @num: vring size (number of descriptors)
* @align: vring alignment size
* @index: vring index
* @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @tx_timeout: expire time of last tx packet
* @fifo: pointer to the tmfifo structure
*/
struct mlxbf_tmfifo_vring {
@@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
struct vring_desc drop_desc;
int cur_len;
int rem_len;
+ int rem_padding;
u32 pkt_len;
u16 next_avail;
int num;
int align;
int index;
int vdev_id;
+ unsigned long tx_timeout;
struct mlxbf_tmfifo *fifo;
};
@@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
return true;
}
+static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
+{
+ unsigned long flags;
+
+ /* Only handle Tx timeout for network vdev. */
+ if (vring->vdev_id != VIRTIO_ID_NET)
+ return;
+
+ /* Initialize the timeout or return if not expired. */
+ if (!vring->tx_timeout) {
+ /* Initialize the timeout. */
+ vring->tx_timeout = jiffies +
+ msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
+ return;
+ } else if (time_before(jiffies, vring->tx_timeout)) {
+ /* Return if not timeout yet. */
+ return;
+ }
+
+ /*
+ * Drop the packet after timeout. The outstanding packet is
+ * released and the remaining bytes will be sent with padding byte 0x00
+ * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
+ * either dropped directly, or appended into existing outstanding packet
+ * thus dropped as corrupted network packet.
+ */
+ vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
+ mlxbf_tmfifo_release_pkt(vring);
+ vring->cur_len = 0;
+ vring->rem_len = 0;
+ vring->fifo->vring[0] = NULL;
+
+ /*
+ * Make sure the load/store are in order before
+ * returning back to virtio.
+ */
+ virtio_mb(false);
+
+ /* Notify upper layer. */
+ spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
+ vring_interrupt(0, vring->vq);
+ spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
+}
+
/* Rx & Tx processing of a queue. */
static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
{
@@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
return;
do {
+retry:
/* Get available FIFO space. */
if (avail == 0) {
if (is_rx)
@@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
break;
}
+ /* Insert paddings for discarded Tx packet. */
+ if (!is_rx) {
+ vring->tx_timeout = 0;
+ while (vring->rem_padding >= sizeof(u64)) {
+ writeq(0, vring->fifo->tx.data);
+ vring->rem_padding -= sizeof(u64);
+ if (--avail == 0)
+ goto retry;
+ }
+ }
+
/* Console output always comes from the Tx buffer. */
if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
mlxbf_tmfifo_console_tx(fifo, avail);
@@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
/* Handle one descriptor. */
more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
} while (more);
+
+ /* Check Tx timeout. */
+ if (avail <= 0 && !is_rx)
+ mlxbf_tmfifo_check_tx_timeout(vring);
}
/* Handle Rx or Tx queues. */
--
2.43.0
next prev parent reply other threads:[~2024-02-02 18:39 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-02 18:38 [PATCH AUTOSEL 6.7 01/23] wifi: cfg80211: fix missing interfaces when dumping Sasha Levin
2024-02-02 18:38 ` [PATCH AUTOSEL 6.7 02/23] wifi: mac80211: fix race condition on enabling fast-xmit Sasha Levin
2024-02-02 18:38 ` [PATCH AUTOSEL 6.7 03/23] fbdev: savage: Error out if pixclock equals zero Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 04/23] fbdev: sis: " Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 05/23] dpll: fix broken error path in dpll_pin_alloc(..) Sasha Levin
2024-02-02 18:39 ` Sasha Levin [this message]
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 07/23] spi: intel-pci: Add support for Arrow Lake SPI serial flash Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 08/23] x86/cpu: Add model number for Intel Clearwater Forest processor Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 09/23] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 10/23] block: Fix WARNING in _copy_from_iter Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 11/23] smb: Work around Clang __bdos() type confusion Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 12/23] cifs: cifs_pick_channel should try selecting active channels Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 13/23] cifs: translate network errors on send to -ECONNABORTED Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 14/23] cifs: helper function to check replayable error codes Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 15/23] ahci: asm1166: correct count of reported ports Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 16/23] aoe: avoid potential deadlock at set_capacity Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 17/23] spi: cs42l43: Handle error from devm_pm_runtime_enable Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 18/23] exec: Distinguish in_execve from in_exec Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 19/23] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers Sasha Levin
2024-02-22 14:58 ` Niklas Cassel
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 20/23] ARM: dts: Fix TPM schema violations Sasha Levin
2024-02-02 19:20 ` Lukas Wunner
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 21/23] drm/amd/display: Disable ips before dc interrupt setting Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 22/23] MIPS: reserve exception vector space ONLY ONCE Sasha Levin
2024-02-02 18:39 ` [PATCH AUTOSEL 6.7 23/23] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240202183926.540467-6-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=hdegoede@redhat.com \
--cc=ilpo.jarvinen@linux.intel.com \
--cc=limings@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=platform-driver-x86@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=vadimp@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox