From: Daniel Borkmann <daniel@iogearbox.net>
To: Jiri Pirko <jiri@resnulli.us>, netdev@vger.kernel.org
Cc: davem@davemloft.net, jhs@mojatatu.com, xiyou.wangcong@gmail.com,
mlxsw@mellanox.com, edumazet@google.com,
alexander.h.duyck@intel.com, willemb@google.com,
john.fastabend@gmail.com, alexei.starovoitov@gmail.com
Subject: Re: [patch net-next] net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath
Date: Tue, 24 Oct 2017 12:50:21 +0200 [thread overview]
Message-ID: <59EF1AED.2010401@iogearbox.net> (raw)
In-Reply-To: <20171023212832.1332-1-jiri@resnulli.us>
On 10/23/2017 11:28 PM, Jiri Pirko wrote:
[...]
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 031dffd..c7ddbdb 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -143,6 +143,36 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
> #endif
> }
>
> +/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
> + * The fast path only needs to access filter list and to update stats
> + */
> +struct mini_Qdisc {
> + struct tcf_proto __rcu *filter_list;
> + struct gnet_stats_basic_cpu __percpu *cpu_bstats;
> + struct gnet_stats_queue __percpu *cpu_qstats;
> + struct mini_Qdisc __rcu **p_miniq;
> +};
> +
> +static inline void mini_qdisc_init(struct mini_Qdisc *miniq,
> + struct Qdisc *qdisc,
> + struct mini_Qdisc __rcu **p_miniq)
> +{
> + miniq->cpu_bstats = qdisc->cpu_bstats;
> + miniq->cpu_qstats = qdisc->cpu_qstats;
> + miniq->p_miniq = p_miniq;
> +}
> +
> +static inline void mini_qdisc_enable(struct mini_Qdisc *miniq)
> +{
> + rcu_assign_pointer(*miniq->p_miniq, miniq);
> +}
> +
> +static inline void mini_qdisc_disable(struct mini_Qdisc *miniq)
> +{
> + RCU_INIT_POINTER(*miniq->p_miniq, NULL);
> + rcu_barrier();
Can you add a comment against which call_rcu() above barrier
protects against?
> +}
> +
> struct Qdisc_class_ops {
> /* Child qdisc manipulation */
> struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *);
> @@ -259,9 +289,13 @@ struct qdisc_skb_cb {
> unsigned char data[QDISC_CB_PRIV_LEN];
> };
>
> +typedef void tcf_chain_change_empty_t(struct tcf_proto __rcu **p_filter_chain,
> + bool empty);
> +
> struct tcf_chain {
> struct tcf_proto __rcu *filter_chain;
> struct tcf_proto __rcu **p_filter_chain;
> + tcf_chain_change_empty_t *chain_change_empty;
> struct list_head list;
> struct tcf_block *block;
> u32 index; /* chain index */
> @@ -605,6 +639,12 @@ static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
> bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
> }
>
> +static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
> + const struct sk_buff *skb)
> +{
> + bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
> +}
> +
> static inline void qdisc_bstats_update(struct Qdisc *sch,
> const struct sk_buff *skb)
> {
> @@ -648,6 +688,11 @@ static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
> this_cpu_inc(sch->cpu_qstats->drops);
> }
>
> +static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
> +{
> + this_cpu_inc(miniq->cpu_qstats->drops);
> +}
> +
> static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
> {
> sch->qstats.overlimits++;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 24ac908..b4a5812 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3274,14 +3274,16 @@ EXPORT_SYMBOL(dev_loopback_xmit);
> static struct sk_buff *
> sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> {
> - struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
> + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_egress);
We already have dev passed here, so lets use it as done previously.
> struct tcf_result cl_res;
> + struct tcf_proto *cl;
>
> - if (!cl)
> + if (!miniq)
> return skb;
> + cl = rcu_dereference_bh(miniq->filter_list);
This one still has two RCU dereferences instead of just one. Could
we bind the lifetime of the miniq 1:1 to the filter_list head such
that we can then also get rid of the 2nd rcu_dereference_bh() and
piggy-back on the first one for the filter_list there, thus we push
this into control slow-path instead?
> /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
> - qdisc_bstats_cpu_update(cl->q, skb);
> + mini_qdisc_bstats_cpu_update(miniq, skb);
>
> switch (tcf_classify(skb, cl, &cl_res, false)) {
> case TC_ACT_OK:
> @@ -3289,7 +3291,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> skb->tc_index = TC_H_MIN(cl_res.classid);
> break;
> case TC_ACT_SHOT:
> - qdisc_qstats_cpu_drop(cl->q);
> + mini_qdisc_qstats_cpu_drop(miniq);
> *ret = NET_XMIT_DROP;
> kfree_skb(skb);
> return NULL;
> @@ -4189,16 +4191,19 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
> struct net_device *orig_dev)
> {
> #ifdef CONFIG_NET_CLS_ACT
> - struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
> + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
> struct tcf_result cl_res;
> + struct tcf_proto *cl;
>
> /* If there's at least one ingress present somewhere (so
> * we get here via enabled static key), remaining devices
> * that are not configured with an ingress qdisc will bail
> * out here.
> */
> - if (!cl)
> + if (!miniq)
> return skb;
> + cl = rcu_dereference_bh(miniq->filter_list);
> +
> if (*pt_prev) {
> *ret = deliver_skb(skb, *pt_prev, orig_dev);
> *pt_prev = NULL;
> @@ -4206,7 +4211,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
>
> qdisc_skb_cb(skb)->pkt_len = skb->len;
> skb->tc_at_ingress = 1;
> - qdisc_bstats_cpu_update(cl->q, skb);
> + mini_qdisc_bstats_cpu_update(miniq, skb);
>
> switch (tcf_classify(skb, cl, &cl_res, false)) {
> case TC_ACT_OK:
> @@ -4214,7 +4219,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
> skb->tc_index = TC_H_MIN(cl_res.classid);
> break;
> case TC_ACT_SHOT:
> - qdisc_qstats_cpu_drop(cl->q);
> + mini_qdisc_qstats_cpu_drop(miniq);
> kfree_skb(skb);
> return NULL;
> case TC_ACT_STOLEN:
Thanks,
Daniel
next prev parent reply other threads:[~2017-10-24 10:50 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-23 21:28 [patch net-next] net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath Jiri Pirko
2017-10-24 10:50 ` Daniel Borkmann [this message]
2017-10-24 14:30 ` Jiri Pirko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=59EF1AED.2010401@iogearbox.net \
--to=daniel@iogearbox.net \
--cc=alexander.h.duyck@intel.com \
--cc=alexei.starovoitov@gmail.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jhs@mojatatu.com \
--cc=jiri@resnulli.us \
--cc=john.fastabend@gmail.com \
--cc=mlxsw@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=willemb@google.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.