From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, jhs@mojatatu.com,
alexei.starovoitov@gmail.com, eric.dumazet@gmail.com,
brouer@redhat.com
Cc: john.r.fastabend@intel.com, netdev@vger.kernel.org,
john.fastabend@gmail.com, davem@davemloft.net
Subject: [RFC PATCH 10/13] net: sched: lockless support for netif_schedule
Date: Wed, 17 Aug 2016 12:37:38 -0700 [thread overview]
Message-ID: <20160817193738.27032.25592.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160817193120.27032.20918.stgit@john-Precision-Tower-5810>
netif_schedule uses a bit QDISC_STATE_SCHED to tell the qdisc layer
if a run of the qdisc has been scheduler. This is important when
tearing down qdisc instances. We can rcu_free an instance for example
if its possible that we might have outstanding references to it.
Perhaps more importantly in the per cpu lockless case we need to
schedule a run of the qdisc on all qdiscs that are enqueu'ing packets
and hitting the gso_skb requeue logic or else the skb may get stuck
on the gso_skb queue without anything to finish the xmit.
This patch uses a reference counter instead of a bit to account for
the multiple CPUs.
---
include/net/sch_generic.h | 1 +
net/core/dev.c | 32 +++++++++++++++++++++++---------
net/sched/sch_api.c | 5 +++++
net/sched/sch_generic.c | 16 +++++++++++++++-
4 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cc28af0..2e0e5b0 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -94,6 +94,7 @@ struct Qdisc {
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
+ unsigned long __percpu *cpu_state;
struct Qdisc *next_sched;
struct sk_buff *skb_bad_txq;
struct rcu_head rcu_head;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5db395d..f491845 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2272,8 +2272,14 @@ static void __netif_reschedule(struct Qdisc *q)
void __netif_schedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+ if (q->flags & TCQ_F_NOLOCK) {
+ unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, s))
+ __netif_reschedule(q);
+ } else if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
__netif_reschedule(q);
+ }
}
EXPORT_SYMBOL(__netif_schedule);
@@ -3925,15 +3931,23 @@ static void net_tx_action(struct softirq_action *h)
if (!(q->flags & TCQ_F_NOLOCK)) {
root_lock = qdisc_lock(q);
spin_lock(root_lock);
- }
- /* We need to make sure head->next_sched is read
- * before clearing __QDISC_STATE_SCHED
- */
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED, &q->state);
- qdisc_run(q);
- if (!(q->flags & TCQ_F_NOLOCK))
+
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+
+ qdisc_run(q);
+
spin_unlock(root_lock);
+ } else {
+ unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, s);
+ __qdisc_run(q);
+ }
}
}
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6c5bf13..89989a6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -975,6 +975,10 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
alloc_percpu(struct bad_txq_cell);
if (!sch->skb_bad_txq_cpu)
goto err_out4;
+
+ sch->cpu_state = alloc_percpu(unsigned long);
+ if (!sch->cpu_state)
+ goto err_out4;
}
if (tca[TCA_STAB]) {
@@ -1027,6 +1031,7 @@ err_out4:
free_percpu(sch->cpu_qstats);
free_percpu(sch->gso_cpu_skb);
free_percpu(sch->skb_bad_txq_cpu);
+ free_percpu(sch->cpu_state);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d10b762..f5b7254 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -171,6 +171,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, nskb);
qdisc_qstats_cpu_qlen_inc(q);
+ set_thread_flag(TIF_NEED_RESCHED);
} else {
qdisc_qstats_backlog_inc(q, nskb);
q->q.qlen++;
@@ -768,6 +769,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
sch->skb_bad_txq_cpu = alloc_percpu(struct bad_txq_cell);
if (!sch->skb_bad_txq_cpu)
goto errout;
+
+ sch->cpu_state = alloc_percpu(unsigned long);
+ if (!sch->cpu_state)
+ goto errout;
}
return sch;
@@ -1037,7 +1042,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
q = dev_queue->qdisc_sleeping;
if (q->flags & TCQ_F_NOLOCK) {
- val = test_bit(__QDISC_STATE_SCHED, &q->state);
+ int i;
+
+ for_each_possible_cpu(i) {
+ unsigned long *s;
+
+ s = per_cpu_ptr(q->cpu_state, i);
+ val = test_bit(__QDISC_STATE_SCHED, s);
+ if (val)
+ break;
+ }
} else {
root_lock = qdisc_lock(q);
spin_lock_bh(root_lock);
next prev parent reply other threads:[~2016-08-17 19:38 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-17 19:33 [RFC PATCH 00/13] Series short description John Fastabend
2016-08-17 19:33 ` [RFC PATCH 01/13] net: sched: allow qdiscs to handle locking John Fastabend
2016-08-17 22:33 ` Eric Dumazet
2016-08-17 22:49 ` John Fastabend
2016-08-17 22:34 ` Eric Dumazet
2016-08-17 22:48 ` John Fastabend
2016-08-17 19:34 ` [RFC PATCH 02/13] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-08-17 19:34 ` [RFC PATCH 03/13] net: sched: provide per cpu qstat helpers John Fastabend
2016-08-17 19:35 ` [RFC PATCH 04/13] net: sched: provide atomic qlen helpers for bypass case John Fastabend
2016-08-17 19:35 ` [RFC PATCH 05/13] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-08-17 19:35 ` [RFC PATCH 06/13] net: sched: per cpu gso handlers John Fastabend
2016-08-17 19:36 ` [RFC PATCH 07/13] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-08-17 22:53 ` Eric Dumazet
2016-08-17 22:59 ` John Fastabend
2016-08-17 19:36 ` [RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc John Fastabend
2016-08-17 22:58 ` Eric Dumazet
2016-08-17 23:00 ` John Fastabend
2016-08-23 20:11 ` John Fastabend
2016-08-17 19:37 ` [RFC PATCH 09/13] net: sched: helper to sum qlen John Fastabend
2016-08-17 19:37 ` John Fastabend [this message]
2016-08-17 19:46 ` [RFC PATCH 10/13] net: sched: lockless support for netif_schedule John Fastabend
2016-08-17 23:01 ` Eric Dumazet
2016-08-17 23:17 ` John Fastabend
2016-08-17 23:33 ` Eric Dumazet
2016-08-17 19:38 ` [RFC PATCH 11/13] net: sched: pfifo_fast use alf_queue John Fastabend
2016-08-19 10:13 ` Jesper Dangaard Brouer
2016-08-19 15:44 ` John Fastabend
2016-08-17 19:38 ` [RFC PATCH 12/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2016-08-17 19:49 ` John Fastabend
2016-08-17 23:04 ` Eric Dumazet
2016-08-17 23:18 ` John Fastabend
2016-08-17 19:39 ` [RFC PATCH 13/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160817193738.27032.25592.stgit@john-Precision-Tower-5810 \
--to=john.fastabend@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jhs@mojatatu.com \
--cc=john.r.fastabend@intel.com \
--cc=netdev@vger.kernel.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox