[RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc

Netdev List
 help / color / mirror / Atom feed

From: John Fastabend <john.fastabend@gmail.com>
To: xiyou.wangcong@gmail.com, jhs@mojatatu.com,
	alexei.starovoitov@gmail.com, eric.dumazet@gmail.com,
	brouer@redhat.com
Cc: john.r.fastabend@intel.com, netdev@vger.kernel.org,
	john.fastabend@gmail.com, davem@davemloft.net
Subject: [RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc
Date: Wed, 17 Aug 2016 12:36:46 -0700	[thread overview]
Message-ID: <20160817193646.27032.75592.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20160817193120.27032.20918.stgit@john-Precision-Tower-5810>

Similar to how gso is handled skb_bad_tx needs to be per cpu to handle
lockless qdisc with multiple writer/producers.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/net/sch_generic.h |    7 +++
 net/sched/sch_api.c       |    6 +++
 net/sched/sch_generic.c   |   95 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0864813..d465fb9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -40,6 +40,10 @@ struct gso_cell {
 	struct sk_buff *skb;
 };
 
+struct bad_txq_cell {
+	struct sk_buff *skb;
+};
+
 struct Qdisc {
 	int 			(*enqueue)(struct sk_buff *skb,
 					   struct Qdisc *sch,
@@ -77,7 +81,8 @@ struct Qdisc {
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 
-	struct gso_cell __percpu *gso_cpu_skb;
+	struct gso_cell     __percpu *gso_cpu_skb;
+	struct bad_txq_cell __percpu *skb_bad_txq_cpu;
 
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d713052..b90a23a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -970,6 +970,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 			sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
 			if (!sch->gso_cpu_skb)
 				goto err_out4;
+
+			sch->skb_bad_txq_cpu =
+				alloc_percpu(struct bad_txq_cell);
+			if (!sch->skb_bad_txq_cpu)
+				goto err_out4;
 		}
 
 		if (tca[TCA_STAB]) {
@@ -1021,6 +1026,7 @@ err_out4:
 	free_percpu(sch->cpu_bstats);
 	free_percpu(sch->cpu_qstats);
 	free_percpu(sch->gso_cpu_skb);
+	free_percpu(sch->skb_bad_txq_cpu);
 	/*
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 29238c4..d10b762 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,6 +44,43 @@ EXPORT_SYMBOL(default_qdisc_ops);
  * - ingress filtering is also serialized via qdisc root lock
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
+static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *sch)
+{
+	if (sch->skb_bad_txq_cpu) {
+		struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+		return cell->skb;
+	}
+
+	return sch->skb_bad_txq;
+}
+
+static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *sch,
+					     struct sk_buff *skb)
+{
+	if (sch->skb_bad_txq_cpu) {
+		struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+		cell->skb = skb;
+		__netif_schedule(sch);
+		return;
+	}
+
+	sch->skb_bad_txq = skb;
+}
+
+static inline void qdisc_null_skb_bad_txq(struct Qdisc *sch)
+{
+	if (sch->skb_bad_txq_cpu) {
+		struct bad_txq_cell *cell = this_cpu_ptr(sch->skb_bad_txq_cpu);
+
+		cell->skb = NULL;
+		return;
+	}
+
+	sch->skb_bad_txq = NULL;
+}
+
 static inline struct sk_buff *qdisc_dequeue_gso_skb(struct Qdisc *sch)
 {
 	if (sch->gso_cpu_skb)
@@ -129,9 +166,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 		if (!nskb)
 			break;
 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
-			q->skb_bad_txq = nskb;
-			qdisc_qstats_backlog_inc(q, nskb);
-			q->q.qlen++;
+			qdisc_enqueue_skb_bad_txq(q, nskb);
+
+			if (qdisc_is_percpu_stats(q)) {
+				qdisc_qstats_cpu_backlog_inc(q, nskb);
+				qdisc_qstats_cpu_qlen_inc(q);
+			} else {
+				qdisc_qstats_backlog_inc(q, nskb);
+				q->q.qlen++;
+			}
 			break;
 		}
 		skb->next = nskb;
@@ -160,7 +203,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			qdisc_null_gso_skb(q);
 
 			if (qdisc_is_percpu_stats(q)) {
-				qdisc_qstats_cpu_backlog_inc(q, skb);
+				qdisc_qstats_cpu_backlog_dec(q, skb);
 				qdisc_qstats_cpu_qlen_dec(q);
 			} else {
 				qdisc_qstats_backlog_dec(q, skb);
@@ -171,14 +214,19 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 		return skb;
 	}
 	*validate = true;
-	skb = q->skb_bad_txq;
+	skb = qdisc_dequeue_skb_bad_txq(q);
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
-			q->skb_bad_txq = NULL;
-			qdisc_qstats_backlog_dec(q, skb);
-			q->q.qlen--;
+			qdisc_null_skb_bad_txq(q);
+			if (qdisc_is_percpu_stats(q)) {
+				qdisc_qstats_cpu_backlog_dec(q, skb);
+				qdisc_qstats_cpu_qlen_dec(q);
+			} else {
+				qdisc_qstats_backlog_dec(q, skb);
+				q->q.qlen--;
+			}
 			goto bulk;
 		}
 		return NULL;
@@ -716,6 +764,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 		sch->gso_cpu_skb = alloc_percpu(struct gso_cell);
 		if (!sch->gso_cpu_skb)
 			goto errout;
+
+		sch->skb_bad_txq_cpu = alloc_percpu(struct bad_txq_cell);
+		if (!sch->skb_bad_txq_cpu)
+			goto errout;
 	}
 
 	return sch;
@@ -746,6 +798,20 @@ void qdisc_reset(struct Qdisc *qdisc)
 			cell = per_cpu_ptr(qdisc->gso_cpu_skb, i);
 			if (cell) {
 				kfree_skb_list(cell->skb);
+				cell->skb = NULL;
+			}
+		}
+	}
+
+	if (qdisc->skb_bad_txq_cpu) {
+		int i;
+
+		for_each_possible_cpu(i) {
+			struct bad_txq_cell *cell;
+
+			cell = per_cpu_ptr(qdisc->skb_bad_txq_cpu, i);
+			if (cell) {
+				kfree_skb(cell->skb);
 				cell = NULL;
 			}
 		}
@@ -781,6 +847,19 @@ static void qdisc_rcu_free(struct rcu_head *head)
 		free_percpu(qdisc->gso_cpu_skb);
 	}
 
+	if (qdisc->skb_bad_txq_cpu) {
+		int i;
+
+		for_each_possible_cpu(i) {
+			struct bad_txq_cell *cell;
+
+			cell = per_cpu_ptr(qdisc->skb_bad_txq_cpu, i);
+			kfree_skb(cell->skb);
+		}
+
+		free_percpu(qdisc->skb_bad_txq_cpu);
+	}
+
 	kfree((char *) qdisc - qdisc->padded);
 }

next prev parent reply	other threads:[~2016-08-17 19:37 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-17 19:33 [RFC PATCH 00/13] Series short description John Fastabend
2016-08-17 19:33 ` [RFC PATCH 01/13] net: sched: allow qdiscs to handle locking John Fastabend
2016-08-17 22:33   ` Eric Dumazet
2016-08-17 22:49     ` John Fastabend
2016-08-17 22:34   ` Eric Dumazet
2016-08-17 22:48     ` John Fastabend
2016-08-17 19:34 ` [RFC PATCH 02/13] net: sched: qdisc_qlen for per cpu logic John Fastabend
2016-08-17 19:34 ` [RFC PATCH 03/13] net: sched: provide per cpu qstat helpers John Fastabend
2016-08-17 19:35 ` [RFC PATCH 04/13] net: sched: provide atomic qlen helpers for bypass case John Fastabend
2016-08-17 19:35 ` [RFC PATCH 05/13] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2016-08-17 19:35 ` [RFC PATCH 06/13] net: sched: per cpu gso handlers John Fastabend
2016-08-17 19:36 ` [RFC PATCH 07/13] net: sched: support qdisc_reset on NOLOCK qdisc John Fastabend
2016-08-17 22:53   ` Eric Dumazet
2016-08-17 22:59     ` John Fastabend
2016-08-17 19:36 ` John Fastabend [this message]
2016-08-17 22:58   ` [RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc Eric Dumazet
2016-08-17 23:00     ` John Fastabend
2016-08-23 20:11       ` John Fastabend
2016-08-17 19:37 ` [RFC PATCH 09/13] net: sched: helper to sum qlen John Fastabend
2016-08-17 19:37 ` [RFC PATCH 10/13] net: sched: lockless support for netif_schedule John Fastabend
2016-08-17 19:46   ` John Fastabend
2016-08-17 23:01   ` Eric Dumazet
2016-08-17 23:17     ` John Fastabend
2016-08-17 23:33       ` Eric Dumazet
2016-08-17 19:38 ` [RFC PATCH 11/13] net: sched: pfifo_fast use alf_queue John Fastabend
2016-08-19 10:13   ` Jesper Dangaard Brouer
2016-08-19 15:44     ` John Fastabend
2016-08-17 19:38 ` [RFC PATCH 12/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2016-08-17 19:49   ` John Fastabend
2016-08-17 23:04   ` Eric Dumazet
2016-08-17 23:18     ` John Fastabend
2016-08-17 19:39 ` [RFC PATCH 13/13] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0864813 dfblob:d465fb9 dfblob:d713052 dfblob:b90a23a
dfblob:29238c4 dfblob:d10b762 )
 OR (
bs:"[RFC PATCH 08/13] net: sched: support skb_bad_tx with lockless qdisc" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160817193646.27032.75592.stgit@john-Precision-Tower-5810 \
    --to=john.fastabend@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=brouer@redhat.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=john.r.fastabend@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox