From: Ben Greear <greearb@candelatech.com>
To: netdev@vger.kernel.org
Cc: Ben Greear <greearb@candelatech.com>
Subject: [net-next 1/2] qdisc: Allow qdiscs to provide backpressure up the stack.
Date: Wed, 25 Aug 2010 12:00:50 -0700 [thread overview]
Message-ID: <1282762851-3612-1-git-send-email-greearb@candelatech.com> (raw)
Some qdiscs, in some instances, can reliably detect when they
are about to drop a packet in the dev_queue_xmit path. In
this case, it would be nice to provide backpressure up the
stack, and NOT free the skb in the qdisc logic.
Signed-off-by: Ben Greear <greearb@candelatech.com>
---
:100644 100644 59962db... 20be932... M include/linux/netdevice.h
:100644 100644 3c8728a... 146a97a... M include/net/sch_generic.h
:100644 100644 859e30f... f360a9b... M net/core/dev.c
:100644 100644 2aeb3a4... 0692717... M net/sched/sch_generic.c
include/linux/netdevice.h | 7 +++++++
include/net/sch_generic.h | 19 +++++++++++++++++++
net/core/dev.c | 19 ++++++++++++++-----
net/sched/sch_generic.c | 20 ++++++++++++++++++++
4 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59962db..20be932 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -97,6 +97,7 @@ struct wireless_dev;
#define NET_XMIT_DROP 0x01 /* skb dropped */
#define NET_XMIT_CN 0x02 /* congestion notification */
#define NET_XMIT_POLICED 0x03 /* skb is shot by police */
+#define NET_XMIT_BUSY 0x04 /* congestion, but skb was NOT freed */
#define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
@@ -1296,6 +1297,12 @@ extern int dev_open(struct net_device *dev);
extern int dev_close(struct net_device *dev);
extern void dev_disable_lro(struct net_device *dev);
extern int dev_queue_xmit(struct sk_buff *skb);
+
+/* Similar to dev_queue_xmit, but if try_no_consume != 0,
+ * it may return NET_XMIT_BUSY and NOT free the skb if it detects congestion
+ */
+extern int try_dev_queue_xmit(struct sk_buff *skb, int try_no_consume);
+
extern int register_netdevice(struct net_device *dev);
extern void unregister_netdevice_queue(struct net_device *dev,
struct list_head *head);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3c8728a..146a97a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -43,6 +43,7 @@ struct qdisc_size_table {
struct Qdisc {
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
+ int (*try_enqueue)(struct sk_buff *, struct Qdisc *dev); /* May return NET_XMIT_BUSY and NOT free skb. */
struct sk_buff * (*dequeue)(struct Qdisc *dev);
unsigned flags;
#define TCQ_F_BUILTIN 1
@@ -135,6 +136,7 @@ struct Qdisc_ops {
int priv_size;
int (*enqueue)(struct sk_buff *, struct Qdisc *);
+ int (*try_enqueue)(struct sk_buff *, struct Qdisc *); /* May return NET_XMIT_BUSY and NOT free skb. */
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
unsigned int (*drop)(struct Qdisc *);
@@ -426,6 +428,23 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
}
+static inline int try_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+#ifdef CONFIG_NET_SCHED
+ if (sch->stab)
+ qdisc_calculate_pkt_len(skb, sch->stab);
+#endif
+ if (sch->try_enqueue)
+ return sch->try_enqueue(skb, sch);
+ return sch->enqueue(skb, sch);
+}
+
+static inline int try_qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
+{
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ return try_qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
+}
+
static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
{
sch->bstats.bytes += len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 859e30f..f360a9b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2087,7 +2087,8 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
- struct netdev_queue *txq)
+ struct netdev_queue *txq,
+ bool try_no_consume)
{
spinlock_t *root_lock = qdisc_lock(q);
bool contended = qdisc_is_running(q);
@@ -2128,7 +2129,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
- rc = qdisc_enqueue_root(skb, q);
+ if (try_no_consume)
+ rc = try_qdisc_enqueue_root(skb, q);
+ else
+ rc = qdisc_enqueue_root(skb, q);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -2168,7 +2172,12 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
-int dev_queue_xmit(struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb) {
+ return try_dev_queue_xmit(skb, 0);
+}
+EXPORT_SYMBOL(dev_queue_xmit);
+
+int try_dev_queue_xmit(struct sk_buff *skb, int try_no_consume)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -2187,7 +2196,7 @@ int dev_queue_xmit(struct sk_buff *skb)
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
#endif
if (q->enqueue) {
- rc = __dev_xmit_skb(skb, q, dev, txq);
+ rc = __dev_xmit_skb(skb, q, dev, txq, try_no_consume);
goto out;
}
@@ -2239,7 +2248,7 @@ out:
rcu_read_unlock_bh();
return rc;
}
-EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(try_dev_queue_xmit);
/*=======================================================================
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4..0692717 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -460,6 +460,24 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
return qdisc_drop(skb, qdisc);
}
+static int pfifo_fast_try_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+ if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
+ int band = prio2band[skb->priority & TC_PRIO_MAX];
+ struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ struct sk_buff_head *list = band2list(priv, band);
+
+ priv->bitmap |= (1 << band);
+ qdisc->q.qlen++;
+ return __qdisc_enqueue_tail(skb, qdisc, list);
+ }
+
+ /* no room to enqueue, tell calling code to back off. Do NOT free skb, that is
+ * calling code's to deal with.
+ */
+ return NET_XMIT_BUSY;
+}
+
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -533,6 +551,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.id = "pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
.enqueue = pfifo_fast_enqueue,
+ .try_enqueue = pfifo_fast_try_enqueue,
.dequeue = pfifo_fast_dequeue,
.peek = pfifo_fast_peek,
.init = pfifo_fast_init,
@@ -564,6 +583,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
spin_lock_init(&sch->busylock);
sch->ops = ops;
sch->enqueue = ops->enqueue;
+ sch->try_enqueue = ops->try_enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
dev_hold(qdisc_dev(sch));
--
1.6.2.5
next reply other threads:[~2010-08-25 19:00 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-25 19:00 Ben Greear [this message]
2010-08-25 19:00 ` [net-next 2/2] macvlan: Enable qdisc backoff logic Ben Greear
2010-08-25 19:24 ` Arnd Bergmann
2010-08-25 19:27 ` Ben Greear
2010-08-25 19:38 ` Hagen Paul Pfeifer
2010-08-25 19:49 ` Ben Greear
2010-08-25 19:59 ` Arnd Bergmann
2010-08-25 20:49 ` Ben Greear
2010-08-26 13:55 ` Arnd Bergmann
2010-08-26 15:33 ` Ben Greear
2010-08-26 17:45 ` Ben Greear
2010-08-27 13:16 ` Arnd Bergmann
2010-08-25 20:44 ` [net-next 1/2] qdisc: Allow qdiscs to provide backpressure up the stack Stephen Hemminger
2010-08-25 20:56 ` Ben Greear
2010-08-26 22:59 ` David Miller
2010-08-27 4:14 ` Ben Greear
2010-08-27 4:34 ` David Miller
2010-08-27 5:22 ` Ben Greear
2010-08-27 5:36 ` David Miller
2010-08-27 5:58 ` Ben Greear
2010-08-27 6:11 ` David Miller
2010-08-27 15:26 ` Ben Greear
2010-08-27 15:59 ` Eric Dumazet
2010-08-27 17:00 ` Ben Greear
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1282762851-3612-1-git-send-email-greearb@candelatech.com \
--to=greearb@candelatech.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.