public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: John Fastabend <john.fastabend@gmail.com>
To: eric.dumazet@gmail.com
Cc: netdev@vger.kernel.org, john.fastabend@gmail.com
Subject: [RFC PATCH 08/17] net: sched: support skb_bad_tx with lockless qdisc
Date: Tue, 02 May 2017 08:27:21 -0700	[thread overview]
Message-ID: <20170502152720.8871.47840.stgit@john-Precision-Tower-5810> (raw)
In-Reply-To: <20170502151445.8871.43089.stgit@john-Precision-Tower-5810>

Similar to how gso is handled skb_bad_tx needs to be per cpu to handle
lockless qdisc with multiple writer/producers.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/net/sch_generic.h |    2 -
 net/sched/sch_generic.c   |  100 ++++++++++++++++++++++++++++++++++++---------
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3021bbb..4288222 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -92,7 +92,7 @@ struct Qdisc {
 	struct gnet_stats_queue	qstats;
 	unsigned long		state;
 	struct Qdisc            *next_sched;
-	struct sk_buff		*skb_bad_txq;
+	struct sk_buff_head	skb_bad_txq;
 	struct rcu_head		rcu_head;
 	int			padded;
 	atomic_t		refcnt;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37cd54e..d4194c2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,6 +44,68 @@
  * - ingress filtering is also serialized via qdisc root lock
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
+
+static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
+{
+	const struct netdev_queue *txq = q->dev_queue;
+	spinlock_t *lock = NULL;
+	struct sk_buff *skb;
+
+	if (q->flags & TCQ_F_NOLOCK) {
+		lock = qdisc_lock(q);
+		spin_lock(lock);
+	}
+
+	skb = skb_peek(&q->skb_bad_txq);
+	if (skb) {
+		/* check the reason of requeuing without tx lock first */
+		txq = skb_get_tx_queue(txq->dev, skb);
+		if (!netif_xmit_frozen_or_stopped(txq)) {
+			skb = __skb_dequeue(&q->skb_bad_txq);
+			if (qdisc_is_percpu_stats(q)) {
+				qdisc_qstats_cpu_backlog_dec(q, skb);
+				qdisc_qstats_cpu_qlen_dec(q);
+			} else {
+				qdisc_qstats_backlog_dec(q, skb);
+				q->q.qlen--;
+			}
+		} else {
+			skb = NULL;
+		}
+	}
+
+	if (lock)
+		spin_unlock(lock);
+
+	return skb;
+}
+
+static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
+{
+	struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
+
+	if (unlikely(skb))
+		skb = __skb_dequeue_bad_txq(q);
+
+	return skb;
+}
+
+static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
+					     struct sk_buff *skb)
+{
+	spinlock_t *lock = NULL;
+
+	if (q->flags & TCQ_F_NOLOCK) {
+		lock = qdisc_lock(q);
+		spin_lock(lock);
+	}
+
+	__skb_queue_tail(&q->skb_bad_txq, skb);
+
+	if (lock)
+		spin_unlock(lock);
+}
+
 static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	__skb_queue_head(&q->gso_skb, skb);
@@ -116,9 +178,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 		if (!nskb)
 			break;
 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
-			q->skb_bad_txq = nskb;
-			qdisc_qstats_backlog_inc(q, nskb);
-			q->q.qlen++;
+			qdisc_enqueue_skb_bad_txq(q, nskb);
+
+			if (qdisc_is_percpu_stats(q)) {
+				qdisc_qstats_cpu_backlog_inc(q, nskb);
+				qdisc_qstats_cpu_qlen_inc(q);
+			} else {
+				qdisc_qstats_backlog_inc(q, nskb);
+				q->q.qlen++;
+			}
 			break;
 		}
 		skb->next = nskb;
@@ -179,18 +247,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 	}
 validate:
 	*validate = true;
-	skb = q->skb_bad_txq;
-	if (unlikely(skb)) {
-		/* check the reason of requeuing without tx lock first */
-		txq = skb_get_tx_queue(txq->dev, skb);
-		if (!netif_xmit_frozen_or_stopped(txq)) {
-			q->skb_bad_txq = NULL;
-			qdisc_qstats_backlog_dec(q, skb);
-			q->q.qlen--;
-			goto bulk;
-		}
-		return NULL;
-	}
+	skb = qdisc_dequeue_skb_bad_txq(q);
+	if (unlikely(skb))
+		goto bulk;
 	if (!(q->flags & TCQ_F_ONETXQUEUE) ||
 	    !netif_xmit_frozen_or_stopped(txq))
 		skb = q->dequeue(q);
@@ -673,6 +732,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 		sch->padded = (char *) sch - (char *) p;
 	}
 	__skb_queue_head_init(&sch->gso_skb);
+	__skb_queue_head_init(&sch->skb_bad_txq);
 	qdisc_skb_head_init(&sch->q);
 	spin_lock_init(&sch->q.lock);
 
@@ -746,12 +806,12 @@ void qdisc_reset(struct Qdisc *qdisc)
 	if (ops->reset)
 		ops->reset(qdisc);
 
-	kfree_skb(qdisc->skb_bad_txq);
-	qdisc->skb_bad_txq = NULL;
-
 	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp)
 		kfree_skb_list(skb);
 
+	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp)
+		kfree_skb_list(skb);
+
 	qdisc->q.qlen = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
@@ -793,8 +853,9 @@ void qdisc_destroy(struct Qdisc *qdisc)
 
 	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp)
 		kfree_skb_list(skb);
+	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp)
+		kfree_skb_list(skb);
 
-	kfree_skb(qdisc->skb_bad_txq);
 	/*
 	 * gen_estimator est_timer() might access qdisc->q.lock,
 	 * wait a RCU grace period before freeing qdisc.
@@ -1036,6 +1097,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
 	dev_queue->qdisc_sleeping = qdisc;
 	__skb_queue_head_init(&qdisc->gso_skb);
+	__skb_queue_head_init(&qdisc->skb_bad_txq);
 }
 
 void dev_init_scheduler(struct net_device *dev)

  parent reply	other threads:[~2017-05-02 15:27 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-02 15:24 [RFC PATCH 00/17] latest qdisc patch series John Fastabend
2017-05-02 15:25 ` [RFC PATCH 01/17] net: sched: cleanup qdisc_run and __qdisc_run semantics John Fastabend
2017-05-02 15:25 ` [RFC PATCH 02/17] net: sched: allow qdiscs to handle locking John Fastabend
2017-05-02 15:25 ` [RFC PATCH 03/17] net: sched: remove remaining uses for qdisc_qlen in xmit path John Fastabend
2017-05-02 15:26 ` [RFC PATCH 04/17] net: sched: provide per cpu qstat helpers John Fastabend
2017-05-02 15:26 ` [RFC PATCH 05/17] net: sched: a dflt qdisc may be used with per cpu stats John Fastabend
2017-05-02 15:26 ` [RFC PATCH 06/17] net: sched: explicit locking in gso_cpu fallback John Fastabend
2017-05-02 15:27 ` [RFC PATCH 07/17] net: sched: drop qdisc_reset from dev_graft_qdisc John Fastabend
2017-05-02 15:27 ` John Fastabend [this message]
2017-05-02 15:27 ` [RFC PATCH 09/17] net: sched: check for frozen queue before skb_bad_txq check John Fastabend
2017-05-02 15:27 ` [RFC PATCH 10/17] net: sched: qdisc_qlen for per cpu logic John Fastabend
2017-05-02 15:28 ` [RFC PATCH 11/17] net: sched: helper to sum qlen John Fastabend
2017-05-02 15:28 ` [RFC PATCH 12/17] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mq John Fastabend
2017-05-02 15:28 ` [RFC PATCH 13/17] net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio John Fastabend
2017-05-02 15:29 ` [RFC PATCH 14/17] net: skb_array: expose peek API John Fastabend
2017-05-02 15:32 ` [RFC PATCH 00/17] latest qdisc patch series John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170502152720.8871.47840.stgit@john-Precision-Tower-5810 \
    --to=john.fastabend@gmail.com \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox