From: John Fastabend <john.fastabend@gmail.com>
To: eric.dumazet@gmail.com, jhs@mojatatu.com, davem@davemloft.net,
brouer@redhat.com, xiyou.wangcong@gmail.com,
alexei.starovoitov@gmail.com
Cc: john.r.fastabend@intel.com, netdev@vger.kernel.org,
john.fastabend@gmail.com
Subject: [net-next PATCH 09/15] net: sched: support skb_bad_tx with lockless qdisc
Date: Tue, 23 Aug 2016 13:26:34 -0700 [thread overview]
Message-ID: <20160823202634.14368.16784.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160823202135.14368.62466.stgit@john-Precision-Tower-5810>
Similar to how gso is handled skb_bad_tx needs to be per cpu to handle
lockless qdisc with multiple writer/producers.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
include/net/sch_generic.h | 7 +++
net/sched/sch_api.c | 6 +++
net/sched/sch_generic.c | 95 +++++++++++++++++++++++++++++++++++++++++----
3 files changed, 99 insertions(+), 9 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 926da18..3597c63 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -40,6 +40,10 @@ struct gso_cell {
struct sk_buff *skb;
};
+struct bad_txq_cell {
+ struct sk_buff *skb;
+};
+
struct Qdisc {
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
@@ -77,7 +81,8 @@ struct Qdisc {
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
- struct gso_cell __percpu *gso_cpu_skb;
+ struct gso_cell __percpu *gso_cpu_skb;
+ struct bad_txq_cell __percpu *skb_bad_txq_cpu;
/*
* For performance sake on SMP, we put highly modified fields at the end
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d713052..b90a23a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -970,6 +970,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
if (!sch->gso_cpu_skb)
goto err_out4;
+
+ sch->skb_bad_txq_cpu =
+ alloc_percpu(struct bad_txq_cell);
+ if (!sch->skb_bad_txq_cpu)
+ goto err_out4;
}
if (tca[TCA_STAB]) {
@@ -1021,6 +1026,7 @@ err_out4:
free_percpu(sch->cpu_bstats);
free_percpu(sch->cpu_qstats);
free_percpu(sch->gso_cpu_skb);
+ free_percpu(sch->skb_bad_txq_cpu);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fd4a2b9..0b61b14 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,6 +44,43 @@ EXPORT_SYMBOL(default_qdisc_ops);
* - ingress filtering is also serialized via qdisc root lock
* - updates to tree and tree walking are only done under the rtnl mutex.
*/
+static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *sch)
+{
+ if (sch->skb_bad_txq_cpu) {
+ struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+ return cell->skb;
+ }
+
+ return sch->skb_bad_txq;
+}
+
+static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *sch,
+ struct sk_buff *skb)
+{
+ if (sch->skb_bad_txq_cpu) {
+ struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+ cell->skb = skb;
+ __netif_schedule(sch);
+ return;
+ }
+
+ sch->skb_bad_txq = skb;
+}
+
+static inline void qdisc_null_skb_bad_txq(struct Qdisc *sch)
+{
+ if (sch->skb_bad_txq_cpu) {
+ struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+ cell->skb = NULL;
+ return;
+ }
+
+ sch->skb_bad_txq = NULL;
+}
+
static inline struct sk_buff *qdisc_dequeue_gso_skb(struct Qdisc *sch)
{
if (sch->gso_cpu_skb)
@@ -129,9 +166,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
if (!nskb)
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
- q->skb_bad_txq = nskb;
- qdisc_qstats_backlog_inc(q, nskb);
- q->q.qlen++;
+ qdisc_enqueue_skb_bad_txq(q, nskb);
+
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_inc(q, nskb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ qdisc_qstats_backlog_inc(q, nskb);
+ q->q.qlen++;
+ }
break;
}
skb->next = nskb;
@@ -160,7 +203,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
qdisc_null_gso_skb(q);
if (qdisc_is_percpu_stats(q)) {
- qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_backlog_dec(q, skb);
qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
@@ -171,14 +214,19 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
return skb;
}
*validate = true;
- skb = q->skb_bad_txq;
+ skb = qdisc_dequeue_skb_bad_txq(q);
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
- q->skb_bad_txq = NULL;
- qdisc_qstats_backlog_dec(q, skb);
- q->q.qlen--;
+ qdisc_null_skb_bad_txq(q);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_dec(q, skb);
+ qdisc_qstats_cpu_qlen_dec(q);
+ } else {
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ }
goto bulk;
}
return NULL;
@@ -717,6 +765,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
if (!sch->gso_cpu_skb)
goto errout;
+
+ sch->skb_bad_txq_cpu = alloc_percpu(struct bad_txq_cell);
+ if (!sch->skb_bad_txq_cpu)
+ goto errout;
}
return sch;
@@ -752,6 +804,20 @@ void qdisc_reset(struct Qdisc *qdisc)
}
}
+ if (qdisc->skb_bad_txq_cpu) {
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct bad_txq_cell *cell;
+
+ cell = per_cpu_ptr(qdisc->skb_bad_txq_cpu, i);
+ if (cell) {
+ kfree_skb(cell->skb);
+ cell->skb = NULL;
+ }
+ }
+ }
+
if (qdisc->gso_skb) {
kfree_skb_list(qdisc->gso_skb);
qdisc->gso_skb = NULL;
@@ -782,6 +848,19 @@ static void qdisc_rcu_free(struct rcu_head *head)
free_percpu(qdisc->gso_cpu_skb);
}
+ if (qdisc->skb_bad_txq_cpu) {
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct bad_txq_cell *cell;
+
+ cell = per_cpu_ptr(qdisc->skb_bad_txq_cpu, i);
+ kfree_skb(cell->skb);
+ }
+
+ free_percpu(qdisc->skb_bad_txq_cpu);
+ }
+
kfree((char *) qdisc - qdisc->padded);
}
next prev parent reply other threads:[~2016-08-23 20:26 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-23 20:22 [net-next PATCH 00/15] support lockless qdisc John Fastabend
2016-08-23 20:22 ` [net-next PATCH 01/15] net: sched: cleanup qdisc_run and __qdisc_run semantics John Fastabend
2016-08-23 20:38 ` Eric Dumazet
2016-08-23 20:23 ` [net-next PATCH 02/15] net: sched: allow qdiscs to handle locking John Fastabend
2016-08-23 21:08 ` Eric Dumazet
2016-08-23 22:32 ` John Fastabend
2016-08-23 20:23 ` [net-next PATCH 03/15] net: sched: remove remaining uses for qdisc_qlen in xmit path John Fastabend
2016-08-23 21:10 ` Eric Dumazet
2016-08-23 20:24 ` [net-next PATCH 04/15] net: sched: provide per cpu qstat helpers John Fastabend
2016-08-23 23:25 ` Eric Dumazet
2016-08-23 23:50 ` John Fastabend
2016-08-23 20:24 ` [net-next PATCH 05/15] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-08-24 16:29 ` Eric Dumazet
2016-08-24 16:41 ` Eric Dumazet
2016-08-24 17:13 ` John Fastabend
2016-08-24 17:26 ` Eric Dumazet
2016-08-24 17:50 ` John Fastabend
2016-08-24 19:08 ` Eric Dumazet
2016-08-23 20:25 ` [net-next PATCH 06/15] net: sched: per cpu gso handlers John Fastabend
2016-08-23 20:25 ` [net-next PATCH 07/15] net: sched: drop qdisc_reset from dev_graft_qdisc John Fastabend
2016-08-23 20:26 ` [net-next PATCH 08/15] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-08-23 20:26 ` John Fastabend [this message]
2016-08-23 20:26 ` [net-next PATCH 10/15] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-08-23 20:27 ` [net-next PATCH 11/15] net: sched: helper to sum qlen John Fastabend
2016-08-23 20:27 ` [net-next PATCH 12/15] net: sched: lockless support for netif_schedule John Fastabend
2016-09-07 14:50 ` John Fastabend
2016-08-23 20:28 ` [net-next PATCH 13/15] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2016-08-23 20:28 ` [net-next PATCH 14/15] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend
2016-08-23 20:28 ` [net-next PATCH 15/15] net: sched: pfifo_fast use skb_array John Fastabend
2016-09-01 8:26 ` [lkp] [net] c4c75f963d: inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160823202634.14368.16784.stgit@john-Precision-Tower-5810 \
--to=john.fastabend@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=jhs@mojatatu.com \
--cc=john.r.fastabend@intel.com \
--cc=netdev@vger.kernel.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).