From: John Fastabend <john.fastabend@gmail.com>
To: eric.dumazet@gmail.com, jhs@mojatatu.com, davem@davemloft.net,
brouer@redhat.com, xiyou.wangcong@gmail.com,
alexei.starovoitov@gmail.com
Cc: john.r.fastabend@intel.com, netdev@vger.kernel.org,
john.fastabend@gmail.com
Subject: [net-next PATCH 12/15] net: sched: lockless support for netif_schedule
Date: Tue, 23 Aug 2016 13:27:46 -0700 [thread overview]
Message-ID: <20160823202746.14368.6466.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160823202135.14368.62466.stgit@john-Precision-Tower-5810>
netif_schedule uses a bit QDISC_STATE_SCHED to tell the qdisc layer
if a run of the qdisc has been scheduler. This is important when
tearing down qdisc instances. We can not rcu_free an instance for
example if its possible that we might have outstanding references to
it.
Perhaps more importantly in the per cpu lockless case we need to
schedule a run of the qdisc on all qdiscs that are enqueu'ing packets
and hitting the gso_skb requeue logic or else the skb may get stuck
on the gso_skb queue without anything to finish the xmit.
This patch uses a reference counter instead of a bit to account for
the multiple CPUs.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
include/net/sch_generic.h | 1 +
net/core/dev.c | 32 +++++++++++++++++++++++---------
net/sched/sch_api.c | 5 +++++
net/sched/sch_generic.c | 15 ++++++++++++++-
4 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 12df73d..cf4b3ea 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -93,6 +93,7 @@ struct Qdisc {
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
+ unsigned long __percpu *cpu_state;
struct Qdisc *next_sched;
struct sk_buff *skb_bad_txq;
struct rcu_head rcu_head;
diff --git a/net/core/dev.c b/net/core/dev.c
index a0effa6..f638571 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2272,8 +2272,14 @@ static void __netif_reschedule(struct Qdisc *q)
void __netif_schedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+ if (q->flags & TCQ_F_NOLOCK) {
+ unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, s))
+ __netif_reschedule(q);
+ } else if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
__netif_reschedule(q);
+ }
}
EXPORT_SYMBOL(__netif_schedule);
@@ -3920,15 +3926,23 @@ static void net_tx_action(struct softirq_action *h)
if (!(q->flags & TCQ_F_NOLOCK)) {
root_lock = qdisc_lock(q);
spin_lock(root_lock);
- }
- /* We need to make sure head->next_sched is read
- * before clearing __QDISC_STATE_SCHED
- */
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED, &q->state);
- qdisc_run(q);
- if (!(q->flags & TCQ_F_NOLOCK))
+
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+
+ qdisc_run(q);
+
spin_unlock(root_lock);
+ } else {
+ unsigned long *s = this_cpu_ptr(q->cpu_state);
+
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, s);
+ __qdisc_run(q);
+ }
}
}
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6c5bf13..89989a6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -975,6 +975,10 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
alloc_percpu(struct bad_txq_cell);
if (!sch->skb_bad_txq_cpu)
goto err_out4;
+
+ sch->cpu_state = alloc_percpu(unsigned long);
+ if (!sch->cpu_state)
+ goto err_out4;
}
if (tca[TCA_STAB]) {
@@ -1027,6 +1031,7 @@ err_out4:
free_percpu(sch->cpu_qstats);
free_percpu(sch->gso_cpu_skb);
free_percpu(sch->skb_bad_txq_cpu);
+ free_percpu(sch->cpu_state);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0b61b14..a6ec01c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -769,6 +769,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
sch->skb_bad_txq_cpu = alloc_percpu(struct bad_txq_cell);
if (!sch->skb_bad_txq_cpu)
goto errout;
+
+ sch->cpu_state = alloc_percpu(unsigned long);
+ if (!sch->cpu_state)
+ goto errout;
}
return sch;
@@ -1038,7 +1042,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
q = dev_queue->qdisc_sleeping;
if (q->flags & TCQ_F_NOLOCK) {
- val = test_bit(__QDISC_STATE_SCHED, &q->state);
+ int i;
+
+ for_each_possible_cpu(i) {
+ unsigned long *s;
+
+ s = per_cpu_ptr(q->cpu_state, i);
+ val = test_bit(__QDISC_STATE_SCHED, s);
+ if (val)
+ break;
+ }
} else {
root_lock = qdisc_lock(q);
spin_lock_bh(root_lock);
next prev parent reply other threads:[~2016-08-23 20:28 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-23 20:22 [net-next PATCH 00/15] support lockless qdisc John Fastabend
2016-08-23 20:22 ` [net-next PATCH 01/15] net: sched: cleanup qdisc_run and __qdisc_run semantics John Fastabend
2016-08-23 20:38 ` Eric Dumazet
2016-08-23 20:23 ` [net-next PATCH 02/15] net: sched: allow qdiscs to handle locking John Fastabend
2016-08-23 21:08 ` Eric Dumazet
2016-08-23 22:32 ` John Fastabend
2016-08-23 20:23 ` [net-next PATCH 03/15] net: sched: remove remaining uses for qdisc_qlen in xmit path John Fastabend
2016-08-23 21:10 ` Eric Dumazet
2016-08-23 20:24 ` [net-next PATCH 04/15] net: sched: provide per cpu qstat helpers John Fastabend
2016-08-23 23:25 ` Eric Dumazet
2016-08-23 23:50 ` John Fastabend
2016-08-23 20:24 ` [net-next PATCH 05/15] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-08-24 16:29 ` Eric Dumazet
2016-08-24 16:41 ` Eric Dumazet
2016-08-24 17:13 ` John Fastabend
2016-08-24 17:26 ` Eric Dumazet
2016-08-24 17:50 ` John Fastabend
2016-08-24 19:08 ` Eric Dumazet
2016-08-23 20:25 ` [net-next PATCH 06/15] net: sched: per cpu gso handlers John Fastabend
2016-08-23 20:25 ` [net-next PATCH 07/15] net: sched: drop qdisc_reset from dev_graft_qdisc John Fastabend
2016-08-23 20:26 ` [net-next PATCH 08/15] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-08-23 20:26 ` [net-next PATCH 09/15] net: sched: support skb_bad_tx with lockless qdisc John Fastabend
2016-08-23 20:26 ` [net-next PATCH 10/15] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-08-23 20:27 ` [net-next PATCH 11/15] net: sched: helper to sum qlen John Fastabend
2016-08-23 20:27 ` John Fastabend [this message]
2016-09-07 14:50 ` [net-next PATCH 12/15] net: sched: lockless support for netif_schedule John Fastabend
2016-08-23 20:28 ` [net-next PATCH 13/15] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2016-08-23 20:28 ` [net-next PATCH 14/15] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend
2016-08-23 20:28 ` [net-next PATCH 15/15] net: sched: pfifo_fast use skb_array John Fastabend
2016-09-01 8:26 ` [lkp] [net] c4c75f963d: inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160823202746.14368.6466.stgit@john-Precision-Tower-5810 \
--to=john.fastabend@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jhs@mojatatu.com \
--cc=john.r.fastabend@intel.com \
--cc=netdev@vger.kernel.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).