* [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
@ 2023-12-19 18:16 ` Victor Nogueira
2023-12-28 11:40 ` Ido Schimmel
2023-12-28 11:50 ` Ido Schimmel
2023-12-19 18:16 ` [PATCH net-next v8 2/5] net/sched: cls_api: Expose tc block to the datapath Victor Nogueira
` (4 subsequent siblings)
5 siblings, 2 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-19 18:16 UTC (permalink / raw)
To: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri
Cc: mleitner, vladbu, paulb, pctammela, netdev, kernel
This commit makes tc blocks track which ports have been added to them.
And, with that, we'll be able to use this new information to send
packets to the block's ports. Which will be done in the patch #3 of this
series.
Suggested-by: Jiri Pirko <jiri@nvidia.com>
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/sch_generic.h | 2 ++
net/sched/cls_api.c | 2 ++
net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++++++++++++
net/sched/sch_generic.c | 18 ++++++++++++++++-
4 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index dcb9160e6467..248692ec3697 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
+#include <linux/xarray.h>
struct Qdisc_ops;
struct qdisc_walker;
@@ -457,6 +458,7 @@ struct tcf_chain {
};
struct tcf_block {
+ struct xarray ports; /* datapath accessible */
/* Lock protects tcf_block and lifetime-management data of chains
* attached to the block (refcnt, action_refcnt, explicitly_created).
*/
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dc1c19a25882..6020a32ecff2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
mutex_destroy(&block->proto_destroy_lock);
+ xa_destroy(&block->ports);
kfree_rcu(block, rcu);
}
@@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
+ xa_init(&block->ports);
/* Don't store q pointer for blocks which are shared */
if (!tcf_block_shared(block))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9eaf637220e..299086bb6205 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1180,6 +1180,43 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
return 0;
}
+static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
+ struct tcf_block *block;
+ int err;
+
+ block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
+ if (block) {
+ err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ NL_SET_ERR_MSG(extack,
+ "ingress block dev insert failed");
+ return err;
+ }
+ }
+
+ block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
+ if (block) {
+ err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ NL_SET_ERR_MSG(extack,
+ "Egress block dev insert failed");
+ goto err_out;
+ }
+ }
+
+ return 0;
+
+err_out:
+ block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
+ if (block)
+ xa_erase(&block->ports, dev->ifindex);
+
+ return err;
+}
+
static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
struct netlink_ext_ack *extack)
{
@@ -1350,6 +1387,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);
+ err = qdisc_block_add_dev(sch, dev, extack);
+ if (err)
+ goto err_out4;
+
return sch;
err_out4:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8dd0e5925342..e33568df97a5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1051,6 +1051,9 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct net_device *dev = qdisc_dev(qdisc);
+ const struct Qdisc_class_ops *cops;
+ struct tcf_block *block;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -1061,11 +1064,24 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
+ cops = ops->cl_ops;
+ if (ops->ingress_block_get) {
+ block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+ if (block)
+ xa_erase(&block->ports, dev->ifindex);
+ }
+
+ if (ops->egress_block_get) {
+ block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL);
+ if (block)
+ xa_erase(&block->ports, dev->ifindex);
+ }
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra
2023-12-19 18:16 ` [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
@ 2023-12-28 11:40 ` Ido Schimmel
2023-12-28 11:50 ` Ido Schimmel
1 sibling, 0 replies; 11+ messages in thread
From: Ido Schimmel @ 2023-12-28 11:40 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri,
mleitner, vladbu, paulb, pctammela, netdev, kernel
On Tue, Dec 19, 2023 at 03:16:19PM -0300, Victor Nogueira wrote:
> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> + struct netlink_ext_ack *extack)
> +{
> + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> + struct tcf_block *block;
> + int err;
> +
> + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> + if (block) {
> + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack,
> + "ingress block dev insert failed");
> + return err;
> + }
> + }
> +
> + block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> + if (block) {
> + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack,
> + "Egress block dev insert failed");
> + goto err_out;
> + }
> + }
The following fails after this patch:
# tc qdisc add dev swp1 ingress
Error: Egress block dev insert failed.
Probably because ingress_tcf_block() ignores the 'cl' argument.
> +
> + return 0;
> +
> +err_out:
> + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> + if (block)
> + xa_erase(&block->ports, dev->ifindex);
> +
> + return err;
> +}
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra
2023-12-19 18:16 ` [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
2023-12-28 11:40 ` Ido Schimmel
@ 2023-12-28 11:50 ` Ido Schimmel
2023-12-28 12:35 ` Jamal Hadi Salim
1 sibling, 1 reply; 11+ messages in thread
From: Ido Schimmel @ 2023-12-28 11:50 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri,
mleitner, vladbu, paulb, pctammela, netdev, kernel
On Tue, Dec 19, 2023 at 03:16:19PM -0300, Victor Nogueira wrote:
> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> + struct netlink_ext_ack *extack)
> +{
> + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> + struct tcf_block *block;
> + int err;
> +
> + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
Another problem, shouldn't there be a check that these operations are
actually implemented? The following now crashes with a NULL pointer
dereference:
# tc qdisc replace dev swp1 root handle 1: tbf rate 1Mbit burst 256k limit 1M
> + if (block) {
> + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack,
> + "ingress block dev insert failed");
> + return err;
> + }
> + }
> +
> + block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> + if (block) {
> + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack,
> + "Egress block dev insert failed");
> + goto err_out;
> + }
> + }
> +
> + return 0;
> +
> +err_out:
> + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> + if (block)
> + xa_erase(&block->ports, dev->ifindex);
> +
> + return err;
> +}
> +
> static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> struct netlink_ext_ack *extack)
> {
> @@ -1350,6 +1387,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> qdisc_hash_add(sch, false);
> trace_qdisc_create(ops, dev, parent);
>
> + err = qdisc_block_add_dev(sch, dev, extack);
> + if (err)
> + goto err_out4;
> +
> return sch;
>
> err_out4:
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra
2023-12-28 11:50 ` Ido Schimmel
@ 2023-12-28 12:35 ` Jamal Hadi Salim
2023-12-28 14:13 ` Victor Nogueira
0 siblings, 1 reply; 11+ messages in thread
From: Jamal Hadi Salim @ 2023-12-28 12:35 UTC (permalink / raw)
To: Ido Schimmel
Cc: Victor Nogueira, davem, edumazet, kuba, pabeni, xiyou.wangcong,
jiri, mleitner, vladbu, paulb, pctammela, netdev, kernel
On Thu, Dec 28, 2023 at 6:50 AM Ido Schimmel <idosch@idosch.org> wrote:
>
> On Tue, Dec 19, 2023 at 03:16:19PM -0300, Victor Nogueira wrote:
> > +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> > + struct netlink_ext_ack *extack)
> > +{
> > + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> > + struct tcf_block *block;
> > + int err;
> > +
> > + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
>
> Another problem, shouldn't there be a check that these operations are
> actually implemented? The following now crashes with a NULL pointer
> dereference:
>
> # tc qdisc replace dev swp1 root handle 1: tbf rate 1Mbit burst 256k limit 1M
I think this broke from v7->v8. Thanks for catching this. We'll send a
fix shortly.
cheers,
jamal
> > + if (block) {
> > + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> > + if (err) {
> > + NL_SET_ERR_MSG(extack,
> > + "ingress block dev insert failed");
> > + return err;
> > + }
> > + }
> > +
> > + block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
> > + if (block) {
> > + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
> > + if (err) {
> > + NL_SET_ERR_MSG(extack,
> > + "Egress block dev insert failed");
> > + goto err_out;
> > + }
> > + }
> > +
> > + return 0;
> > +
> > +err_out:
> > + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
> > + if (block)
> > + xa_erase(&block->ports, dev->ifindex);
> > +
> > + return err;
> > +}
> > +
> > static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> > struct netlink_ext_ack *extack)
> > {
> > @@ -1350,6 +1387,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> > qdisc_hash_add(sch, false);
> > trace_qdisc_create(ops, dev, parent);
> >
> > + err = qdisc_block_add_dev(sch, dev, extack);
> > + if (err)
> > + goto err_out4;
> > +
> > return sch;
> >
> > err_out4:
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra
2023-12-28 12:35 ` Jamal Hadi Salim
@ 2023-12-28 14:13 ` Victor Nogueira
0 siblings, 0 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-28 14:13 UTC (permalink / raw)
To: Jamal Hadi Salim, Ido Schimmel
Cc: davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri, mleitner,
vladbu, paulb, pctammela, netdev, kernel
On 28/12/2023 09:35, Jamal Hadi Salim wrote:
> On Thu, Dec 28, 2023 at 6:50 AM Ido Schimmel <idosch@idosch.org> wrote:
>>
>> On Tue, Dec 19, 2023 at 03:16:19PM -0300, Victor Nogueira wrote:
>>> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
>>> + struct netlink_ext_ack *extack)
>>> +{
>>> + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
>>> + struct tcf_block *block;
>>> + int err;
>>> +
>>> + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
>>
>> Another problem, shouldn't there be a check that these operations are
>> actually implemented? The following now crashes with a NULL pointer
>> dereference:
>>
>> # tc qdisc replace dev swp1 root handle 1: tbf rate 1Mbit burst 256k limit 1M
>
>
> I think this broke from v7->v8. Thanks for catching this. We'll send a
> fix shortly.
Just sent a fix to net-next because the original patch hasn't been
propagated to net yet.
cheers,
Victor
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH net-next v8 2/5] net/sched: cls_api: Expose tc block to the datapath
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
@ 2023-12-19 18:16 ` Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 3/5] net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability Victor Nogueira
` (3 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-19 18:16 UTC (permalink / raw)
To: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri
Cc: mleitner, vladbu, paulb, pctammela, netdev, kernel
The datapath can now find the block of the port in which the packet arrived
at.
In the next patch we show a possible usage of this patch in a new
version of mirred that multicasts to all ports except for the port in
which the packet arrived on.
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/sch_generic.h | 2 ++
net/sched/cls_api.c | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 248692ec3697..3b2c5b03c4cc 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -485,6 +485,8 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);
+
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6020a32ecff2..618f68733012 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1011,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return block;
}
-static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
return idr_find(&tn->idr, block_index);
}
+EXPORT_SYMBOL(tcf_block_lookup);
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH net-next v8 3/5] net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 1/5] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 2/5] net/sched: cls_api: Expose tc block to the datapath Victor Nogueira
@ 2023-12-19 18:16 ` Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 4/5] net/sched: act_mirred: Add helper function tcf_mirred_replace_dev Victor Nogueira
` (2 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-19 18:16 UTC (permalink / raw)
To: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri
Cc: mleitner, vladbu, paulb, pctammela, netdev, kernel
As a preparation for adding block ID to mirred, separate the part of
mirred that redirect/mirrors to a dev into a specific function so that it
can be called by blockcast for each dev.
Also improve readability. Eg. rename use_reinsert to dont_clone and skb2
to skb_to_send.
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
net/sched/act_mirred.c | 129 +++++++++++++++++++++++------------------
1 file changed, 72 insertions(+), 57 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a711c184c29..6f2544c1e396 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -225,48 +225,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
return err;
}
-TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
- const struct tc_action *a,
- struct tcf_result *res)
+static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
+ struct net_device *dev,
+ const bool m_mac_header_xmit, int m_eaction,
+ int retval)
{
- struct tcf_mirred *m = to_mirred(a);
- struct sk_buff *skb2 = skb;
- bool m_mac_header_xmit;
- struct net_device *dev;
- unsigned int nest_level;
- int retval, err = 0;
- bool use_reinsert;
+ struct sk_buff *skb_to_send = skb;
bool want_ingress;
bool is_redirect;
bool expects_nh;
bool at_ingress;
- int m_eaction;
+ bool dont_clone;
int mac_len;
bool at_nh;
+ int err;
- nest_level = __this_cpu_inc_return(mirred_nest_level);
- if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
- net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
- netdev_name(skb->dev));
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_SHOT;
- }
-
- tcf_lastuse_update(&m->tcf_tm);
- tcf_action_update_bstats(&m->common, skb);
-
- m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
- m_eaction = READ_ONCE(m->tcfm_eaction);
- retval = READ_ONCE(m->tcf_action);
- dev = rcu_dereference_bh(m->tcfm_dev);
- if (unlikely(!dev)) {
- pr_notice_once("tc mirred: target device is gone\n");
- goto out;
- }
-
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
+ err = -ENODEV;
goto out;
}
@@ -274,61 +252,98 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
* since we can't easily detect the clsact caller, skip clone only for
* ingress - that covers the TC S/W datapath.
*/
- is_redirect = tcf_mirred_is_act_redirect(m_eaction);
at_ingress = skb_at_tc_ingress(skb);
- use_reinsert = at_ingress && is_redirect &&
- tcf_mirred_can_reinsert(retval);
- if (!use_reinsert) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
+ dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!dont_clone) {
+ skb_to_send = skb_clone(skb, GFP_ATOMIC);
+ if (!skb_to_send) {
+ err = -ENOMEM;
goto out;
+ }
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
/* All mirred/redirected skbs should clear previous ct info */
- nf_reset_ct(skb2);
+ nf_reset_ct(skb_to_send);
if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
- skb_dst_drop(skb2);
+ skb_dst_drop(skb_to_send);
expects_nh = want_ingress || !m_mac_header_xmit;
at_nh = skb->data == skb_network_header(skb);
if (at_nh != expects_nh) {
- mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ mac_len = at_ingress ? skb->mac_len :
skb_network_offset(skb);
if (expects_nh) {
/* target device/action expect data at nh */
- skb_pull_rcsum(skb2, mac_len);
+ skb_pull_rcsum(skb_to_send, mac_len);
} else {
/* target device/action expect data at mac */
- skb_push_rcsum(skb2, mac_len);
+ skb_push_rcsum(skb_to_send, mac_len);
}
}
- skb2->skb_iif = skb->dev->ifindex;
- skb2->dev = dev;
+ skb_to_send->skb_iif = skb->dev->ifindex;
+ skb_to_send->dev = dev;
- /* mirror is always swallowed */
if (is_redirect) {
- skb_set_redirected(skb2, skb2->tc_at_ingress);
-
- /* let's the caller reinsert the packet, if possible */
- if (use_reinsert) {
- err = tcf_mirred_forward(want_ingress, skb);
- if (err)
- tcf_action_inc_overlimit_qstats(&m->common);
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_CONSUMED;
- }
+ if (skb == skb_to_send)
+ retval = TC_ACT_CONSUMED;
+
+ skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
+
+ err = tcf_mirred_forward(want_ingress, skb_to_send);
+ } else {
+ err = tcf_mirred_forward(want_ingress, skb_to_send);
}
- err = tcf_mirred_forward(want_ingress, skb2);
if (err) {
out:
tcf_action_inc_overlimit_qstats(&m->common);
- if (tcf_mirred_is_act_redirect(m_eaction))
+ if (is_redirect)
retval = TC_ACT_SHOT;
}
+
+ return retval;
+}
+
+TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_mirred *m = to_mirred(a);
+ int retval = READ_ONCE(m->tcf_action);
+ unsigned int nest_level;
+ bool m_mac_header_xmit;
+ struct net_device *dev;
+ int m_eaction;
+
+ nest_level = __this_cpu_inc_return(mirred_nest_level);
+ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ retval = TC_ACT_SHOT;
+ goto dec_nest_level;
+ }
+
+ tcf_lastuse_update(&m->tcf_tm);
+ tcf_action_update_bstats(&m->common, skb);
+
+ dev = rcu_dereference_bh(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
+ tcf_action_inc_overlimit_qstats(&m->common);
+ goto dec_nest_level;
+ }
+
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+
+ retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction,
+ retval);
+
+dec_nest_level:
__this_cpu_dec(mirred_nest_level);
return retval;
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH net-next v8 4/5] net/sched: act_mirred: Add helper function tcf_mirred_replace_dev
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
` (2 preceding siblings ...)
2023-12-19 18:16 ` [PATCH net-next v8 3/5] net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability Victor Nogueira
@ 2023-12-19 18:16 ` Victor Nogueira
2023-12-19 18:16 ` [PATCH net-next v8 5/5] net/sched: act_mirred: Allow mirred to block Victor Nogueira
2023-12-26 21:30 ` [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use patchwork-bot+netdevbpf
5 siblings, 0 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-19 18:16 UTC (permalink / raw)
To: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri
Cc: mleitner, vladbu, paulb, pctammela, netdev, kernel
The act of replacing a device will be repeated by the init logic for the
block ID in the patch that allows mirred to a block. Therefore we
encapsulate this functionality in a function (tcf_mirred_replace_dev) so
that we can reuse it and avoid code repetition.
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
net/sched/act_mirred.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6f2544c1e396..a1be8f3c4a8e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -89,6 +89,16 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
static struct tc_action_ops act_mirred_ops;
+static void tcf_mirred_replace_dev(struct tcf_mirred *m,
+ struct net_device *ndev)
+{
+ struct net_device *odev;
+
+ odev = rcu_replace_pointer(m->tcfm_dev, ndev,
+ lockdep_is_held(&m->tcf_lock));
+ netdev_put(odev, &m->tcfm_dev_tracker);
+}
+
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp,
@@ -170,7 +180,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&m->tcf_lock);
if (parm->ifindex) {
- struct net_device *odev, *ndev;
+ struct net_device *ndev;
ndev = dev_get_by_index(net, parm->ifindex);
if (!ndev) {
@@ -179,9 +189,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(ndev);
- odev = rcu_replace_pointer(m->tcfm_dev, ndev,
- lockdep_is_held(&m->tcf_lock));
- netdev_put(odev, &m->tcfm_dev_tracker);
+ tcf_mirred_replace_dev(m, ndev);
netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH net-next v8 5/5] net/sched: act_mirred: Allow mirred to block
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
` (3 preceding siblings ...)
2023-12-19 18:16 ` [PATCH net-next v8 4/5] net/sched: act_mirred: Add helper function tcf_mirred_replace_dev Victor Nogueira
@ 2023-12-19 18:16 ` Victor Nogueira
2023-12-26 21:30 ` [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use patchwork-bot+netdevbpf
5 siblings, 0 replies; 11+ messages in thread
From: Victor Nogueira @ 2023-12-19 18:16 UTC (permalink / raw)
To: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri
Cc: mleitner, vladbu, paulb, pctammela, netdev, kernel
So far the mirred action has dealt with syntax that handles
mirror/redirection for netdev. A matching packet is redirected or mirrored
to a target netdev.
In this patch we enable mirred to mirror to a tc block as well.
IOW, the new syntax looks as follows:
... mirred <ingress | egress> <mirror | redirect> [index INDEX] < <blockid BLOCKID> | <dev <devname>> >
Examples of mirroring or redirecting to a tc block:
$ tc filter add block 22 protocol ip pref 25 \
flower dst_ip 192.168.0.0/16 action mirred egress mirror blockid 22
$ tc filter add block 22 protocol ip pref 25 \
flower dst_ip 10.10.10.10/32 action mirred egress redirect blockid 22
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/tc_act/tc_mirred.h | 1 +
include/uapi/linux/tc_act/tc_mirred.h | 1 +
net/sched/act_mirred.c | 119 +++++++++++++++++++++++++-
3 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 32ce8ea36950..75722d967bf2 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,6 +8,7 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
+ u32 tcfm_blockid;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
netdevice_tracker tcfm_dev_tracker;
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 2500a0005d05..c61e76f3c23b 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -21,6 +21,7 @@ enum {
TCA_MIRRED_TM,
TCA_MIRRED_PARMS,
TCA_MIRRED_PAD,
+ TCA_MIRRED_BLOCKID,
__TCA_MIRRED_MAX
};
#define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index a1be8f3c4a8e..d1f9794ca9b7 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -85,6 +85,7 @@ static void tcf_mirred_release(struct tc_action *a)
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
+ [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1),
};
static struct tc_action_ops act_mirred_ops;
@@ -136,6 +137,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (exists && bind)
return 0;
+ if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot specify Block ID and dev simultaneously");
+ if (exists)
+ tcf_idr_release(*a, bind);
+ else
+ tcf_idr_cleanup(tn, index);
+
+ return -EINVAL;
+ }
+
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
case TCA_EGRESS_REDIR:
@@ -152,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- if (!parm->ifindex) {
+ if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) {
tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Must specify device or block");
return -EINVAL;
}
ret = tcf_idr_create_from_flags(tn, index, est, a,
@@ -192,6 +205,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_mirred_replace_dev(m, ndev);
netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
m->tcfm_mac_header_xmit = mac_header_xmit;
+ m->tcfm_blockid = 0;
+ } else if (tb[TCA_MIRRED_BLOCKID]) {
+ tcf_mirred_replace_dev(m, NULL);
+ m->tcfm_mac_header_xmit = false;
+ m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]);
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
m->tcfm_eaction = parm->eaction;
@@ -316,6 +334,89 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
return retval;
}
+static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev_prev = NULL;
+ struct net_device *dev = NULL;
+ unsigned long index;
+ int mirred_eaction;
+
+ mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ?
+ TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ if (!dev_prev)
+ goto assign_prev;
+
+ tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev),
+ mirred_eaction, retval);
+assign_prev:
+ dev_prev = dev;
+ }
+
+ if (dev_prev)
+ return tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev_prev),
+ m_eaction, retval);
+
+ return retval;
+}
+
+static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev = NULL;
+ unsigned long index;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ tcf_mirred_to_dev(skb, m, dev,
+ dev_is_mac_header_xmit(dev),
+ m_eaction, retval);
+ }
+
+ return retval;
+}
+
+static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m,
+ const u32 blockid, struct tcf_result *res,
+ int retval)
+{
+ const u32 exception_ifindex = skb->dev->ifindex;
+ struct tcf_block *block;
+ bool is_redirect;
+ int m_eaction;
+
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+
+ /* we are already under rcu protection, so can call block lookup
+ * directly.
+ */
+ block = tcf_block_lookup(dev_net(skb->dev), blockid);
+ if (!block || xa_empty(&block->ports)) {
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
+ }
+
+ if (is_redirect)
+ return tcf_blockcast_redir(skb, m, block, m_eaction,
+ exception_ifindex, retval);
+
+ /* If it's not redirect, it is mirror */
+ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex,
+ retval);
+}
+
TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
@@ -326,6 +427,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
bool m_mac_header_xmit;
struct net_device *dev;
int m_eaction;
+ u32 blockid;
nest_level = __this_cpu_inc_return(mirred_nest_level);
if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
@@ -338,6 +440,12 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
tcf_lastuse_update(&m->tcf_tm);
tcf_action_update_bstats(&m->common, skb);
+ blockid = READ_ONCE(m->tcfm_blockid);
+ if (blockid) {
+ retval = tcf_blockcast(skb, m, blockid, res, retval);
+ goto dec_nest_level;
+ }
+
dev = rcu_dereference_bh(m->tcfm_dev);
if (unlikely(!dev)) {
pr_notice_once("tc mirred: target device is gone\n");
@@ -379,6 +487,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
};
struct net_device *dev;
struct tcf_t t;
+ u32 blockid;
spin_lock_bh(&m->tcf_lock);
opt.action = m->tcf_action;
@@ -390,6 +499,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ blockid = m->tcfm_blockid;
+ if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid))
+ goto nla_put_failure;
+
tcf_tm_dump(&t, &m->tcf_tm);
if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
@@ -420,6 +533,8 @@ static int mirred_device_event(struct notifier_block *unused,
* net_device are already rcu protected.
*/
RCU_INIT_POINTER(m->tcfm_dev, NULL);
+ } else if (m->tcfm_blockid) {
+ m->tcfm_blockid = 0;
}
spin_unlock_bh(&m->tcf_lock);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use
2023-12-19 18:16 [PATCH net-next v8 0/5] net/sched: Introduce tc block ports tracking and use Victor Nogueira
` (4 preceding siblings ...)
2023-12-19 18:16 ` [PATCH net-next v8 5/5] net/sched: act_mirred: Allow mirred to block Victor Nogueira
@ 2023-12-26 21:30 ` patchwork-bot+netdevbpf
5 siblings, 0 replies; 11+ messages in thread
From: patchwork-bot+netdevbpf @ 2023-12-26 21:30 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, davem, edumazet, kuba, pabeni, xiyou.wangcong, jiri,
mleitner, vladbu, paulb, pctammela, netdev, kernel
Hello:
This series was applied to netdev/net-next.git (main)
by David S. Miller <davem@davemloft.net>:
On Tue, 19 Dec 2023 15:16:18 -0300 you wrote:
> __context__
> The "tc block" is a collection of netdevs/ports which allow qdiscs to share
> match-action block instances (as opposed to the traditional tc filter per
> netdev/port)[1].
>
> Up to this point in the implementation, the block is unaware of its ports.
> This patch makes the tc block ports available to the datapath.
>
> [...]
Here is the summary with links:
- [net-next,v8,1/5] net/sched: Introduce tc block netdev tracking infra
https://git.kernel.org/netdev/net-next/c/913b47d3424e
- [net-next,v8,2/5] net/sched: cls_api: Expose tc block to the datapath
https://git.kernel.org/netdev/net-next/c/a7042cf8f231
- [net-next,v8,3/5] net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability
https://git.kernel.org/netdev/net-next/c/16085e48cb48
- [net-next,v8,4/5] net/sched: act_mirred: Add helper function tcf_mirred_replace_dev
https://git.kernel.org/netdev/net-next/c/415e38bf1d8d
- [net-next,v8,5/5] net/sched: act_mirred: Allow mirred to block
https://git.kernel.org/netdev/net-next/c/42f39036cda8
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 11+ messages in thread