From: Jakub Kicinski <kuba@kernel.org>
To: netdev@vger.kernel.org, davem@davemloft.net, edumazet@google.com,
pabeni@redhat.com
Cc: alexanderduyck@fb.com, roman.gushchin@linux.dev,
Jakub Kicinski <kuba@kernel.org>
Subject: [RFC net-next 1/3] net: provide macros for commonly copied lockless queue stop/wake code
Date: Fri, 10 Mar 2023 21:01:28 -0800 [thread overview]
Message-ID: <20230311050130.115138-1-kuba@kernel.org> (raw)
A lot of drivers follow the same scheme to stop / start queues
without introducing locks between xmit and NAPI tx completions.
I'm guessing they all copy'n'paste each other's code.
Smaller drivers shy away from the scheme and introduce a lock
which may cause deadlocks in netpoll.
Provide macros which encapsulate the necessary logic.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
include/net/netdev_queues.h | 166 ++++++++++++++++++++++++++++++++++++
1 file changed, 166 insertions(+)
create mode 100644 include/net/netdev_queues.h
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
new file mode 100644
index 000000000000..2a857faf28d8
--- /dev/null
+++ b/include/net/netdev_queues.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NET_QUEUES_H
+#define _LINUX_NET_QUEUES_H
+
+#include <linux/netdevice.h>
+
+/* Lockless queue stopping / waking helpers.
+ *
+ * These macroes are designed to safely implement stopping and waking
+ * netdev queues without any lock protection. We assume that there can
+ * be no concurrent stop attempts and no concurrent wake attempts.
+ * This is usually true as stop attempts happen from the xmit handler,
+ * while wake up is triggered from NAPI poll context. The two may run
+ * concurrently but are each protected by a lock (SPSC of sorts).
+ *
+ * All descriptor ring indexes (and other relevant shared state) must
+ * be updated before invoking the macros.
+ */
+
+#define netif_tx_queue_try_stop(txq, get_desc, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ netif_tx_stop_queue(txq); \
+ \
+ smp_mb(); \
+ \
+ /* We need to check again in a case another \
+ * CPU has just made room available. \
+ */ \
+ if (likely(get_desc < start_thrs)) { \
+ _res = 0; \
+ } else { \
+ netif_tx_wake_queue(txq); \
+ _res = -1; \
+ } \
+ _res; \
+ }) \
+
+/**
+ * netif_tx_queue_maybe_stop() - locklessly stop a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @stop_thrs: minimal number of available descriptors for queue to be left
+ * enabled
+ * @start_thrs: minimal number of descriptors to re-enable the queue, can be
+ * equal to @stop_thrs or higher to avoid frequent waking
+ *
+ * All arguments may be evaluated multiple times, beware of side effects.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ *
+ * Returns:
+ * 0 if the queue was stopped
+ * 1 if the queue was left enabled
+ * -1 if the queue was re-enabled (raced with waking)
+ */
+#define netif_tx_queue_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ int _res; \
+ \
+ if (likely(get_desc > stop_thrs)) \
+ _res = 1; \
+ else \
+ _res = netif_tx_queue_try_stop(txq, get_desc, \
+ start_thrs); \
+ _res; \
+ }) \
+
+#define __netif_tx_queue_try_wake(txq, get_desc, start_thrs, down_cond) \
+ ({ \
+ int _res; \
+ \
+ /* Make sure that anybody stopping the queue after \
+ * this sees the new next_to_clean. \
+ */ \
+ smp_mb(); \
+ if (netif_tx_queue_stopped(txq) && !(down_cond)) { \
+ netif_tx_wake_queue(txq); \
+ _res = 0; \
+ } else { \
+ _res = 1; \
+ } \
+ _res; \
+ })
+
+#define netif_tx_queue_try_wake(txq, get_desc, start_thrs) \
+ __netif_tx_queue_try_wake(txq, get_desc, start_thrs, false)
+
+/**
+ * __netif_tx_queue_maybe_wake() - locklessly wake a Tx queue, if needed
+ * @txq: struct netdev_queue to stop/start
+ * @get_desc: get current number of free descriptors (see requirements below!)
+ * @start_thrs: minimal number of descriptors to re-enable the queue
+ * @down_cond: down condition, perdicate indicating that the queue should
+ * not be woken up even if descriptors are available
+ *
+ * All arguments may be evaluated multiple times.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ *
+ * Returns:
+ * 0 if the queue was woken up
+ * 1 if the queue was already enabled (or disabled but @down_cond is true)
+ * -1 if the queue was left stopped
+ */
+#define __netif_tx_queue_maybe_wake(txq, get_desc, start_thrs, down_cond) \
+ ({ \
+ int _res; \
+ \
+ if (likely(get_desc < start_thrs)) \
+ _res = -1; \
+ else \
+ _res = __netif_tx_queue_try_wake(txq, get_desc, \
+ start_thrs, \
+ down_cond); \
+ _res; \
+ })
+
+#define netif_tx_queue_maybe_wake(txq, get_desc, start_thrs) \
+ __netif_tx_queue_maybe_wake(txq, get_desc, start_thrs, false)
+
+/* subqueue variants follow */
+
+#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_tx_queue_try_stop(txq, get_desc, start_thrs); \
+ })
+
+#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ netif_tx_queue_maybe_stop(txq, get_desc, \
+ stop_thrs, start_thrs); \
+ })
+
+#define __netif_subqueue_try_wake(dev, idx, get_desc, start_thrs, down_cond) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ __netif_tx_queue_try_wake(txq, get_desc, \
+ start_thrs, down_cond); \
+ })
+
+#define netif_subqueue_try_wake(dev, idx, get_desc, start_thrs) \
+ __netif_subqueue_try_wake(dev, idx, get_desc, start_thrs, false)
+
+#define __netif_subqueue_maybe_wake(dev, idx, get_desc, start_thrs, down_cond) \
+ ({ \
+ struct netdev_queue *txq; \
+ \
+ txq = netdev_get_tx_queue(dev, idx); \
+ __netif_tx_queue_maybe_wake(txq, get_desc, \
+ start_thrs, down_cond); \
+ })
+
+#define netif_subqueue_maybe_wake(dev, idx, get_desc, start_thrs) \
+ __netif_subqueue_maybe_wake(dev, idx, get_desc, start_thrs, false)
+
+#endif
--
2.39.2
next reply other threads:[~2023-03-11 5:01 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-11 5:01 Jakub Kicinski [this message]
2023-03-11 5:01 ` [RFC net-next 2/3] ixgbe: use new queue try_stop/try_wake macros Jakub Kicinski
2023-03-11 5:01 ` [RFC net-next 3/3] bnxt: " Jakub Kicinski
2023-03-11 16:28 ` [RFC net-next 1/3] net: provide macros for commonly copied lockless queue stop/wake code Stephen Hemminger
2023-03-13 1:37 ` Willem de Bruijn
2023-03-13 1:45 ` Stephen Hemminger
2023-03-13 20:56 ` Jakub Kicinski
2023-04-04 3:29 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230311050130.115138-1-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=alexanderduyck@fb.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=roman.gushchin@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.