From: John Fastabend <john.fastabend@gmail.com>
To: fw@strlen.de, jhs@mojatatu.com, alexei.starovoitov@gmail.com,
eric.dumazet@gmail.com, brouer@redhat.com
Cc: netdev@vger.kernel.org
Subject: [RFC PATCH v2 01/10] net: sched: allow qdiscs to handle locking
Date: Wed, 13 Jul 2016 23:19:57 -0700 [thread overview]
Message-ID: <20160714061956.8270.93434.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160714061852.8270.66271.stgit@john-Precision-Tower-5810>
This patch adds a flag for queueing disciplines to indicate the stack
does not need to use the qdisc lock to protect operations. This can
be used to build lockless scheduling algorithms and improving
performance.
The flag is checked in the tx path and the qdisc lock is only taken
if it is not set. For now use a conditional if statement. Later we
could be more aggressive if it proves worthwhile and use a static key
or wrap this in a likely().
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
include/net/sch_generic.h | 1 +
net/core/dev.c | 32 ++++++++++++++++++++++++++++----
net/sched/sch_generic.c | 24 ++++++++++++++++--------
3 files changed, 45 insertions(+), 12 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 909aff2..3de6a8c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -58,6 +58,7 @@ struct Qdisc {
#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy :
* qdisc_tree_decrease_qlen() should stop.
*/
+#define TCQ_F_NOLOCK 0x80 /* qdisc does not require locking */
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
diff --git a/net/core/dev.c b/net/core/dev.c
index b92d63b..f35d449 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3075,6 +3075,27 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int rc;
qdisc_calculate_pkt_len(skb, q);
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q)) {
+ qdisc_bstats_cpu_update(q, skb);
+ __qdisc_run(q);
+ if (sch_direct_xmit(skb, q, dev, txq, root_lock, true))
+ __qdisc_run(q);
+ rc = NET_XMIT_SUCCESS;
+ } else {
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ __qdisc_run(q);
+ }
+
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
+ return rc;
+ }
+
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3896,19 +3917,22 @@ static void net_tx_action(struct softirq_action *h)
while (head) {
struct Qdisc *q = head;
- spinlock_t *root_lock;
+ spinlock_t *root_lock = NULL;
head = head->next_sched;
- root_lock = qdisc_lock(q);
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK)) {
+ root_lock = qdisc_lock(q);
+ spin_lock(root_lock);
+ }
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
- spin_unlock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK))
+ spin_unlock(root_lock);
}
}
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e95b67c..2c3e23b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -170,7 +170,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
int ret = NETDEV_TX_BUSY;
/* And release qdisc */
- spin_unlock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK))
+ spin_unlock(root_lock);
/* Note that we validate skb (GSO, checksum, ...) outside of locks */
if (validate)
@@ -183,10 +184,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_UNLOCK(dev, txq);
} else {
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK))
+ spin_lock(root_lock);
return qdisc_qlen(q);
}
- spin_lock(root_lock);
+
+ if (!(q->flags & TCQ_F_NOLOCK))
+ spin_lock(root_lock);
if (dev_xmit_complete(ret)) {
/* Driver sent out skb successfully or skb was consumed */
@@ -868,14 +872,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
dev_queue = netdev_get_tx_queue(dev, i);
q = dev_queue->qdisc_sleeping;
- root_lock = qdisc_lock(q);
- spin_lock_bh(root_lock);
+ if (q->flags & TCQ_F_NOLOCK) {
+ val = test_bit(__QDISC_STATE_SCHED, &q->state);
+ } else {
+ root_lock = qdisc_lock(q);
+ spin_lock_bh(root_lock);
- val = (qdisc_is_running(q) ||
- test_bit(__QDISC_STATE_SCHED, &q->state));
+ val = (qdisc_is_running(q) ||
+ test_bit(__QDISC_STATE_SCHED, &q->state));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(root_lock);
+ }
if (val)
return true;
next prev parent reply other threads:[~2016-07-14 6:20 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-07-14 6:19 [RFC PATCH v2 00/10] running qdiscs without qdisc_lock John Fastabend
2016-07-14 6:19 ` John Fastabend [this message]
2016-07-14 6:44 ` [RFC PATCH v2 01/10] net: sched: allow qdiscs to handle locking John Fastabend
2016-07-14 6:20 ` [RFC PATCH v2 02/10] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-07-14 6:20 ` [RFC PATCH v2 03/10] net: sched: provide per cpu qstat helpers John Fastabend
2016-07-14 6:21 ` [RFC PATCH v2 04/10] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-07-14 6:21 ` [RFC PATCH v2 05/10] net: sched: per cpu gso handlers John Fastabend
2016-07-14 6:22 ` [RFC PATCH v2 06/10] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-07-14 6:22 ` [RFC PATCH v2 07/10] net: sched: support skb_bad_tx with lockless qdisc John Fastabend
2016-07-14 6:23 ` [RFC PATCH v2 08/10] net: sched: pfifo_fast use alf_queue John Fastabend
2016-07-14 15:11 ` Jesper Dangaard Brouer
2016-07-15 0:09 ` John Fastabend
2016-07-15 10:09 ` Jesper Dangaard Brouer
2016-07-15 17:29 ` John Fastabend
2016-07-14 23:42 ` Alexei Starovoitov
2016-07-15 0:07 ` John Fastabend
2016-07-15 11:23 ` Jesper Dangaard Brouer
2016-07-15 22:18 ` John Fastabend
2016-07-15 22:48 ` Alexei Starovoitov
2016-07-14 6:23 ` [RFC PATCH v2 09/10] net: sched: helper to sum qlen John Fastabend
2016-07-14 6:24 ` [RFC PATCH v2 10/10] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160714061956.8270.93434.stgit@john-Precision-Tower-5810 \
--to=john.fastabend@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=brouer@redhat.com \
--cc=eric.dumazet@gmail.com \
--cc=fw@strlen.de \
--cc=jhs@mojatatu.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox