* [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra
2023-08-19 16:35 [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Victor Nogueira
@ 2023-08-19 16:35 ` Victor Nogueira
2023-08-21 19:12 ` Vlad Buslov
2023-08-19 16:35 ` [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath Victor Nogueira
` (2 subsequent siblings)
3 siblings, 1 reply; 16+ messages in thread
From: Victor Nogueira @ 2023-08-19 16:35 UTC (permalink / raw)
To: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev
Cc: mleitner, vladbu, horms, pctammela, kernel
The tc block is a collection of netdevs/ports which allow qdiscs to share
filter block instances (as opposed to the traditional tc filter per port).
Example:
$ tc qdisc add dev ens7 ingress block 22
$ tc qdisc add dev ens8 ingress block 22
Now we can add a filter using the block index:
$ tc filter add block 22 protocol ip pref 25 \
flower dst_ip 192.168.0.0/16 action drop
Up to this point, the block is unaware of its ports. This patch fixes that
and makes the tc block ports available to the datapath as well as control
path on offloading.
Suggested-by: Jiri Pirko <jiri@nvidia.com>
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/sch_generic.h | 4 ++
net/sched/cls_api.c | 1 +
net/sched/sch_api.c | 79 +++++++++++++++++++++++++++++++++++++--
net/sched/sch_generic.c | 34 ++++++++++++++++-
4 files changed, 112 insertions(+), 6 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e92f73bb3198..824a0ecb5afc 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
+#include <linux/xarray.h>
struct Qdisc_ops;
struct qdisc_walker;
@@ -126,6 +127,8 @@ struct Qdisc {
struct rcu_head rcu;
netdevice_tracker dev_tracker;
+ netdevice_tracker in_block_tracker;
+ netdevice_tracker eg_block_tracker;
/* private data */
long privdata[] ____cacheline_aligned;
};
@@ -458,6 +461,7 @@ struct tcf_chain {
};
struct tcf_block {
+ struct xarray ports; /* datapath accessible */
/* Lock protects tcf_block and lifetime-management data of chains
* attached to the block (refcnt, action_refcnt, explicitly_created).
*/
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a193cc7b3241..a976792ef02f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
+ xa_init(&block->ports);
/* Don't store q pointer for blocks which are shared */
if (!tcf_block_shared(block))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa6b1fe65151..6c0c220cdb21 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
return 0;
}
+static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
+{
+ if (tca[TCA_INGRESS_BLOCK])
+ sch->ops->ingress_block_set(sch, 0);
+
+ if (tca[TCA_EGRESS_BLOCK])
+ sch->ops->egress_block_set(sch, 0);
+}
+
+static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
+ struct nlattr **tca,
+ struct netlink_ext_ack *extack)
+{
+ const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
+ struct tcf_block *in_block = NULL;
+ struct tcf_block *eg_block = NULL;
+ unsigned long cl = 0;
+ int err;
+
+ if (tca[TCA_INGRESS_BLOCK]) {
+ /* works for both ingress and clsact */
+ cl = TC_H_MIN_INGRESS;
+ in_block = cl_ops->tcf_block(sch, cl, NULL);
+ if (!in_block) {
+ NL_SET_ERR_MSG(extack, "Shared ingress block missing");
+ return -EINVAL;
+ }
+
+ err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
+ return err;
+ }
+
+ netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
+ }
+
+ if (tca[TCA_EGRESS_BLOCK]) {
+ cl = TC_H_MIN_EGRESS;
+ eg_block = cl_ops->tcf_block(sch, cl, NULL);
+ if (!eg_block) {
+ NL_SET_ERR_MSG(extack, "Shared egress block missing");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ netdev_put(dev, &sch->eg_block_tracker);
+ NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
+ goto err_out;
+ }
+ netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
+ }
+
+ return 0;
+err_out:
+ if (in_block) {
+ xa_erase(&in_block->ports, dev->ifindex);
+ netdev_put(dev, &sch->in_block_tracker);
+ NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
+ }
+ return err;
+}
+
static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
struct netlink_ext_ack *extack)
{
@@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch = qdisc_alloc(dev_queue, ops, extack);
if (IS_ERR(sch)) {
err = PTR_ERR(sch);
- goto err_out2;
+ goto err_out1;
}
sch->parent = parent;
@@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (handle == 0) {
NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
err = -ENOSPC;
- goto err_out3;
+ goto err_out2;
}
}
if (!netif_is_multiqueue(dev))
@@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = qdisc_block_indexes_set(sch, tca, extack);
if (err)
- goto err_out3;
+ goto err_out2;
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB], extack);
@@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);
+ err = qdisc_block_add_dev(sch, dev, tca, extack);
+ if (err)
+ goto err_out4;
+
return sch;
err_out4:
@@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ qdisc_block_undo_set(sch, tca);
+err_out2:
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
-err_out2:
+err_out1:
module_put(ops->owner);
err_out:
*errp = err;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d7e23f4cc0e..0fb51fd6f01e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
- const struct Qdisc_ops *ops = qdisc->ops;
+ struct net_device *dev = qdisc_dev(qdisc);
+ const struct Qdisc_ops *ops = qdisc->ops;
+ const struct Qdisc_class_ops *cops;
+ struct tcf_block *block;
+ unsigned long cl;
+ u32 block_index;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
+ cops = ops->cl_ops;
+ if (ops->ingress_block_get) {
+ block_index = ops->ingress_block_get(qdisc);
+ if (block_index) {
+ cl = TC_H_MIN_INGRESS;
+ block = cops->tcf_block(qdisc, cl, NULL);
+ if (block) {
+ if (xa_erase(&block->ports, dev->ifindex))
+ netdev_put(dev, &qdisc->in_block_tracker);
+ }
+ }
+ }
+
+ if (ops->egress_block_get) {
+ block_index = ops->egress_block_get(qdisc);
+ if (block_index) {
+ cl = TC_H_MIN_EGRESS;
+ block = cops->tcf_block(qdisc, cl, NULL);
+ if (block) {
+ if (xa_erase(&block->ports, dev->ifindex))
+ netdev_put(dev, &qdisc->eg_block_tracker);
+ }
+ }
+ }
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra
2023-08-19 16:35 ` [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
@ 2023-08-21 19:12 ` Vlad Buslov
2023-08-24 14:05 ` Jamal Hadi Salim
0 siblings, 1 reply; 16+ messages in thread
From: Vlad Buslov @ 2023-08-21 19:12 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev,
mleitner, horms, pctammela, kernel
On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> The tc block is a collection of netdevs/ports which allow qdiscs to share
> filter block instances (as opposed to the traditional tc filter per port).
> Example:
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
>
> Now we can add a filter using the block index:
> $ tc filter add block 22 protocol ip pref 25 \
> flower dst_ip 192.168.0.0/16 action drop
>
> Up to this point, the block is unaware of its ports. This patch fixes that
> and makes the tc block ports available to the datapath as well as control
> path on offloading.
>
> Suggested-by: Jiri Pirko <jiri@nvidia.com>
> Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> ---
> include/net/sch_generic.h | 4 ++
> net/sched/cls_api.c | 1 +
> net/sched/sch_api.c | 79 +++++++++++++++++++++++++++++++++++++--
> net/sched/sch_generic.c | 34 ++++++++++++++++-
> 4 files changed, 112 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index e92f73bb3198..824a0ecb5afc 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -19,6 +19,7 @@
> #include <net/gen_stats.h>
> #include <net/rtnetlink.h>
> #include <net/flow_offload.h>
> +#include <linux/xarray.h>
>
> struct Qdisc_ops;
> struct qdisc_walker;
> @@ -126,6 +127,8 @@ struct Qdisc {
>
> struct rcu_head rcu;
> netdevice_tracker dev_tracker;
> + netdevice_tracker in_block_tracker;
> + netdevice_tracker eg_block_tracker;
> /* private data */
> long privdata[] ____cacheline_aligned;
> };
> @@ -458,6 +461,7 @@ struct tcf_chain {
> };
>
> struct tcf_block {
> + struct xarray ports; /* datapath accessible */
> /* Lock protects tcf_block and lifetime-management data of chains
> * attached to the block (refcnt, action_refcnt, explicitly_created).
> */
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index a193cc7b3241..a976792ef02f 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> refcount_set(&block->refcnt, 1);
> block->net = net;
> block->index = block_index;
> + xa_init(&block->ports);
Missing dual call to xa_destroy() for this.
>
> /* Don't store q pointer for blocks which are shared */
> if (!tcf_block_shared(block))
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index aa6b1fe65151..6c0c220cdb21 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> return 0;
> }
>
> +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> +{
> + if (tca[TCA_INGRESS_BLOCK])
> + sch->ops->ingress_block_set(sch, 0);
> +
> + if (tca[TCA_EGRESS_BLOCK])
> + sch->ops->egress_block_set(sch, 0);
> +}
> +
> +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> + struct nlattr **tca,
> + struct netlink_ext_ack *extack)
> +{
> + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> + struct tcf_block *in_block = NULL;
> + struct tcf_block *eg_block = NULL;
> + unsigned long cl = 0;
> + int err;
> +
> + if (tca[TCA_INGRESS_BLOCK]) {
> + /* works for both ingress and clsact */
> + cl = TC_H_MIN_INGRESS;
> + in_block = cl_ops->tcf_block(sch, cl, NULL);
> + if (!in_block) {
> + NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> + return -EINVAL;
> + }
> +
> + err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> + return err;
> + }
> +
> + netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> + }
> +
> + if (tca[TCA_EGRESS_BLOCK]) {
> + cl = TC_H_MIN_EGRESS;
> + eg_block = cl_ops->tcf_block(sch, cl, NULL);
> + if (!eg_block) {
> + NL_SET_ERR_MSG(extack, "Shared egress block missing");
> + err = -EINVAL;
> + goto err_out;
> + }
> +
> + err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> + if (err) {
> + netdev_put(dev, &sch->eg_block_tracker);
> + NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> + goto err_out;
> + }
> + netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> + }
> +
> + return 0;
> +err_out:
> + if (in_block) {
> + xa_erase(&in_block->ports, dev->ifindex);
> + netdev_put(dev, &sch->in_block_tracker);
> + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> + }
> + return err;
> +}
> +
> static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> struct netlink_ext_ack *extack)
> {
> @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> sch = qdisc_alloc(dev_queue, ops, extack);
> if (IS_ERR(sch)) {
> err = PTR_ERR(sch);
> - goto err_out2;
> + goto err_out1;
> }
>
> sch->parent = parent;
> @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> if (handle == 0) {
> NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
> err = -ENOSPC;
> - goto err_out3;
> + goto err_out2;
> }
> }
> if (!netif_is_multiqueue(dev))
> @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
>
> err = qdisc_block_indexes_set(sch, tca, extack);
> if (err)
> - goto err_out3;
> + goto err_out2;
>
> if (tca[TCA_STAB]) {
> stab = qdisc_get_stab(tca[TCA_STAB], extack);
> @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> qdisc_hash_add(sch, false);
> trace_qdisc_create(ops, dev, parent);
>
> + err = qdisc_block_add_dev(sch, dev, tca, extack);
> + if (err)
> + goto err_out4;
> +
> return sch;
>
> err_out4:
> @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> ops->destroy(sch);
> qdisc_put_stab(rtnl_dereference(sch->stab));
> err_out3:
> + qdisc_block_undo_set(sch, tca);
Is this a bugfix? This new call is for all sites that jump to
err_out{3|4} even though you only added new code to the end of the
function.
> +err_out2:
> netdev_put(dev, &sch->dev_tracker);
> qdisc_free(sch);
> -err_out2:
> +err_out1:
> module_put(ops->owner);
> err_out:
> *errp = err;
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 5d7e23f4cc0e..0fb51fd6f01e 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
>
> static void __qdisc_destroy(struct Qdisc *qdisc)
> {
> - const struct Qdisc_ops *ops = qdisc->ops;
> + struct net_device *dev = qdisc_dev(qdisc);
> + const struct Qdisc_ops *ops = qdisc->ops;
> + const struct Qdisc_class_ops *cops;
> + struct tcf_block *block;
> + unsigned long cl;
> + u32 block_index;
>
> #ifdef CONFIG_NET_SCHED
> qdisc_hash_del(qdisc);
> @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
>
> qdisc_reset(qdisc);
>
> + cops = ops->cl_ops;
> + if (ops->ingress_block_get) {
> + block_index = ops->ingress_block_get(qdisc);
> + if (block_index) {
> + cl = TC_H_MIN_INGRESS;
> + block = cops->tcf_block(qdisc, cl, NULL);
> + if (block) {
> + if (xa_erase(&block->ports, dev->ifindex))
> + netdev_put(dev, &qdisc->in_block_tracker);
> + }
> + }
> + }
> +
> + if (ops->egress_block_get) {
> + block_index = ops->egress_block_get(qdisc);
> + if (block_index) {
> + cl = TC_H_MIN_EGRESS;
> + block = cops->tcf_block(qdisc, cl, NULL);
> + if (block) {
> + if (xa_erase(&block->ports, dev->ifindex))
> + netdev_put(dev, &qdisc->eg_block_tracker);
> + }
> + }
> + }
> +
> if (ops->destroy)
> ops->destroy(qdisc);
>
> module_put(ops->owner);
> - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> + netdev_put(dev, &qdisc->dev_tracker);
>
> trace_qdisc_destroy(qdisc);
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra
2023-08-21 19:12 ` Vlad Buslov
@ 2023-08-24 14:05 ` Jamal Hadi Salim
0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 14:05 UTC (permalink / raw)
To: Vlad Buslov
Cc: Victor Nogueira, xiyou.wangcong, jiri, davem, edumazet, kuba,
pabeni, netdev, mleitner, horms, pctammela, kernel
On Mon, Aug 21, 2023 at 3:18 PM Vlad Buslov <vladbu@nvidia.com> wrote:
>
>
> On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> > The tc block is a collection of netdevs/ports which allow qdiscs to share
> > filter block instances (as opposed to the traditional tc filter per port).
> > Example:
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> >
> > Now we can add a filter using the block index:
> > $ tc filter add block 22 protocol ip pref 25 \
> > flower dst_ip 192.168.0.0/16 action drop
> >
> > Up to this point, the block is unaware of its ports. This patch fixes that
> > and makes the tc block ports available to the datapath as well as control
> > path on offloading.
> >
> > Suggested-by: Jiri Pirko <jiri@nvidia.com>
> > Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> > ---
> > include/net/sch_generic.h | 4 ++
> > net/sched/cls_api.c | 1 +
> > net/sched/sch_api.c | 79 +++++++++++++++++++++++++++++++++++++--
> > net/sched/sch_generic.c | 34 ++++++++++++++++-
> > 4 files changed, 112 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index e92f73bb3198..824a0ecb5afc 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -19,6 +19,7 @@
> > #include <net/gen_stats.h>
> > #include <net/rtnetlink.h>
> > #include <net/flow_offload.h>
> > +#include <linux/xarray.h>
> >
> > struct Qdisc_ops;
> > struct qdisc_walker;
> > @@ -126,6 +127,8 @@ struct Qdisc {
> >
> > struct rcu_head rcu;
> > netdevice_tracker dev_tracker;
> > + netdevice_tracker in_block_tracker;
> > + netdevice_tracker eg_block_tracker;
> > /* private data */
> > long privdata[] ____cacheline_aligned;
> > };
> > @@ -458,6 +461,7 @@ struct tcf_chain {
> > };
> >
> > struct tcf_block {
> > + struct xarray ports; /* datapath accessible */
> > /* Lock protects tcf_block and lifetime-management data of chains
> > * attached to the block (refcnt, action_refcnt, explicitly_created).
> > */
> > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> > index a193cc7b3241..a976792ef02f 100644
> > --- a/net/sched/cls_api.c
> > +++ b/net/sched/cls_api.c
> > @@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> > refcount_set(&block->refcnt, 1);
> > block->net = net;
> > block->index = block_index;
> > + xa_init(&block->ports);
>
> Missing dual call to xa_destroy() for this.
>
Good catch - that should go in block destroy. I am not sure why
kmemleak test didnt catch this.
> >
> > /* Don't store q pointer for blocks which are shared */
> > if (!tcf_block_shared(block))
> > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> > index aa6b1fe65151..6c0c220cdb21 100644
> > --- a/net/sched/sch_api.c
> > +++ b/net/sched/sch_api.c
> > @@ -1180,6 +1180,71 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
> > return 0;
> > }
> >
> > +static void qdisc_block_undo_set(struct Qdisc *sch, struct nlattr **tca)
> > +{
> > + if (tca[TCA_INGRESS_BLOCK])
> > + sch->ops->ingress_block_set(sch, 0);
> > +
> > + if (tca[TCA_EGRESS_BLOCK])
> > + sch->ops->egress_block_set(sch, 0);
> > +}
> > +
> > +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
> > + struct nlattr **tca,
> > + struct netlink_ext_ack *extack)
> > +{
> > + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
> > + struct tcf_block *in_block = NULL;
> > + struct tcf_block *eg_block = NULL;
> > + unsigned long cl = 0;
> > + int err;
> > +
> > + if (tca[TCA_INGRESS_BLOCK]) {
> > + /* works for both ingress and clsact */
> > + cl = TC_H_MIN_INGRESS;
> > + in_block = cl_ops->tcf_block(sch, cl, NULL);
> > + if (!in_block) {
> > + NL_SET_ERR_MSG(extack, "Shared ingress block missing");
> > + return -EINVAL;
> > + }
> > +
> > + err = xa_insert(&in_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > + if (err) {
> > + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > + return err;
> > + }
> > +
> > + netdev_hold(dev, &sch->in_block_tracker, GFP_KERNEL);
> > + }
> > +
> > + if (tca[TCA_EGRESS_BLOCK]) {
> > + cl = TC_H_MIN_EGRESS;
> > + eg_block = cl_ops->tcf_block(sch, cl, NULL);
> > + if (!eg_block) {
> > + NL_SET_ERR_MSG(extack, "Shared egress block missing");
> > + err = -EINVAL;
> > + goto err_out;
> > + }
> > +
> > + err = xa_insert(&eg_block->ports, dev->ifindex, dev, GFP_KERNEL);
> > + if (err) {
> > + netdev_put(dev, &sch->eg_block_tracker);
> > + NL_SET_ERR_MSG(extack, "Egress block dev insert failed");
> > + goto err_out;
> > + }
> > + netdev_hold(dev, &sch->eg_block_tracker, GFP_KERNEL);
> > + }
> > +
> > + return 0;
> > +err_out:
> > + if (in_block) {
> > + xa_erase(&in_block->ports, dev->ifindex);
> > + netdev_put(dev, &sch->in_block_tracker);
> > + NL_SET_ERR_MSG(extack, "ingress block dev insert failed");
> > + }
> > + return err;
> > +}
> > +
> > static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
> > struct netlink_ext_ack *extack)
> > {
> > @@ -1270,7 +1335,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> > sch = qdisc_alloc(dev_queue, ops, extack);
> > if (IS_ERR(sch)) {
> > err = PTR_ERR(sch);
> > - goto err_out2;
> > + goto err_out1;
> > }
> >
> > sch->parent = parent;
> > @@ -1289,7 +1354,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> > if (handle == 0) {
> > NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
> > err = -ENOSPC;
> > - goto err_out3;
> > + goto err_out2;
> > }
> > }
> > if (!netif_is_multiqueue(dev))
> > @@ -1311,7 +1376,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> >
> > err = qdisc_block_indexes_set(sch, tca, extack);
> > if (err)
> > - goto err_out3;
> > + goto err_out2;
> >
> > if (tca[TCA_STAB]) {
> > stab = qdisc_get_stab(tca[TCA_STAB], extack);
> > @@ -1350,6 +1415,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> > qdisc_hash_add(sch, false);
> > trace_qdisc_create(ops, dev, parent);
> >
> > + err = qdisc_block_add_dev(sch, dev, tca, extack);
> > + if (err)
> > + goto err_out4;
> > +
> > return sch;
> >
> > err_out4:
> > @@ -1360,9 +1429,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
> > ops->destroy(sch);
> > qdisc_put_stab(rtnl_dereference(sch->stab));
> > err_out3:
> > + qdisc_block_undo_set(sch, tca);
>
> Is this a bugfix? This new call is for all sites that jump to
> err_out{3|4} even though you only added new code to the end of the
> function.
I guess it could be labelled as a "bug fix" - the existing code did
not "rewind" the block ID setting when you have attributes
TCA_EGRESS/INGRESS_BLOCK and the blockid is set and then something
later on fails down the codepath..
Maybe need to separate out this into a different patch or even send it
as a bug fix.
cheers,
jamal
> > +err_out2:
> > netdev_put(dev, &sch->dev_tracker);
> > qdisc_free(sch);
> > -err_out2:
> > +err_out1:
> > module_put(ops->owner);
> > err_out:
> > *errp = err;
> > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> > index 5d7e23f4cc0e..0fb51fd6f01e 100644
> > --- a/net/sched/sch_generic.c
> > +++ b/net/sched/sch_generic.c
> > @@ -1048,7 +1048,12 @@ static void qdisc_free_cb(struct rcu_head *head)
> >
> > static void __qdisc_destroy(struct Qdisc *qdisc)
> > {
> > - const struct Qdisc_ops *ops = qdisc->ops;
> > + struct net_device *dev = qdisc_dev(qdisc);
> > + const struct Qdisc_ops *ops = qdisc->ops;
> > + const struct Qdisc_class_ops *cops;
> > + struct tcf_block *block;
> > + unsigned long cl;
> > + u32 block_index;
> >
> > #ifdef CONFIG_NET_SCHED
> > qdisc_hash_del(qdisc);
> > @@ -1059,11 +1064,36 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
> >
> > qdisc_reset(qdisc);
> >
> > + cops = ops->cl_ops;
> > + if (ops->ingress_block_get) {
> > + block_index = ops->ingress_block_get(qdisc);
> > + if (block_index) {
> > + cl = TC_H_MIN_INGRESS;
> > + block = cops->tcf_block(qdisc, cl, NULL);
> > + if (block) {
> > + if (xa_erase(&block->ports, dev->ifindex))
> > + netdev_put(dev, &qdisc->in_block_tracker);
> > + }
> > + }
> > + }
> > +
> > + if (ops->egress_block_get) {
> > + block_index = ops->egress_block_get(qdisc);
> > + if (block_index) {
> > + cl = TC_H_MIN_EGRESS;
> > + block = cops->tcf_block(qdisc, cl, NULL);
> > + if (block) {
> > + if (xa_erase(&block->ports, dev->ifindex))
> > + netdev_put(dev, &qdisc->eg_block_tracker);
> > + }
> > + }
> > + }
> > +
> > if (ops->destroy)
> > ops->destroy(qdisc);
> >
> > module_put(ops->owner);
> > - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
> > + netdev_put(dev, &qdisc->dev_tracker);
> >
> > trace_qdisc_destroy(qdisc);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath
2023-08-19 16:35 [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Victor Nogueira
2023-08-19 16:35 ` [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
@ 2023-08-19 16:35 ` Victor Nogueira
2023-08-23 17:33 ` Marcelo Ricardo Leitner
2023-08-19 16:35 ` [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action Victor Nogueira
2023-08-21 19:07 ` [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Vlad Buslov
3 siblings, 1 reply; 16+ messages in thread
From: Victor Nogueira @ 2023-08-19 16:35 UTC (permalink / raw)
To: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev
Cc: mleitner, vladbu, horms, pctammela, kernel
The datapath can now find the block of the port in which the packet arrived
at. It can then use it for various activities.
In the next patch we show a simple action that multicasts to all ports
excep for the port in which the packet arrived on.
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/sch_generic.h | 4 ++++
net/sched/cls_api.c | 10 +++++++++-
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 824a0ecb5afc..c5defb166ef6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -440,6 +440,8 @@ struct qdisc_skb_cb {
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
+ /* This should allow eBPF to continue to align */
+ u32 block_index;
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
@@ -488,6 +490,8 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);
+
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a976792ef02f..00e776cdd3fc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1011,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return block;
}
-static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
return idr_find(&tn->idr, block_index);
}
+EXPORT_SYMBOL(tcf_block_lookup);
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
@@ -1737,9 +1738,13 @@ int tcf_classify(struct sk_buff *skb,
const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
+ struct qdisc_skb_cb *qdisc_cb = qdisc_skb_cb(skb);
+
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
+ qdisc_cb->block_index = block ? block->index : 0;
+
return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0,
&last_executed_chain);
#else
@@ -1751,6 +1756,7 @@ int tcf_classify(struct sk_buff *skb,
int ret;
if (block) {
+ qdisc_cb->block_index = block->index;
ext = skb_ext_find(skb, TC_SKB_EXT);
if (ext && (ext->chain || ext->act_miss)) {
@@ -1778,6 +1784,8 @@ int tcf_classify(struct sk_buff *skb,
tp = rcu_dereference_bh(fchain->filter_chain);
last_executed_chain = fchain->index;
}
+ } else {
+ qdisc_cb->block_index = 0;
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index,
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath
2023-08-19 16:35 ` [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath Victor Nogueira
@ 2023-08-23 17:33 ` Marcelo Ricardo Leitner
2023-08-24 14:09 ` Jamal Hadi Salim
0 siblings, 1 reply; 16+ messages in thread
From: Marcelo Ricardo Leitner @ 2023-08-23 17:33 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev,
vladbu, horms, pctammela, kernel
On Sat, Aug 19, 2023 at 01:35:13PM -0300, Victor Nogueira wrote:
> The datapath can now find the block of the port in which the packet arrived
> at. It can then use it for various activities.
I think $subject needs a s/ports//. Because, well, the patch is
exposing the block, which contains the ports.. The first sentence here
goes along with this rationale.
more below
>
> In the next patch we show a simple action that multicasts to all ports
> excep for the port in which the packet arrived on.
"except"
>
> Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> ---
> include/net/sch_generic.h | 4 ++++
> net/sched/cls_api.c | 10 +++++++++-
> 2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 824a0ecb5afc..c5defb166ef6 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -440,6 +440,8 @@ struct qdisc_skb_cb {
> };
> #define QDISC_CB_PRIV_LEN 20
> unsigned char data[QDISC_CB_PRIV_LEN];
> + /* This should allow eBPF to continue to align */
Not sure if this comment really belongs in here. Up to you but it
seems better suited in the patch description. Hopefully the next one
won't do something like:
/* This should allow eBPF to continue to align */
u32 block_index;
+ /* This one too */
+ u32 my_var;
:-)
> + u32 block_index;
> };
>
> typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath
2023-08-23 17:33 ` Marcelo Ricardo Leitner
@ 2023-08-24 14:09 ` Jamal Hadi Salim
0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 14:09 UTC (permalink / raw)
To: Marcelo Ricardo Leitner
Cc: Victor Nogueira, xiyou.wangcong, jiri, davem, edumazet, kuba,
pabeni, netdev, vladbu, horms, pctammela, kernel
On Wed, Aug 23, 2023 at 1:33 PM Marcelo Ricardo Leitner
<mleitner@redhat.com> wrote:
>
> On Sat, Aug 19, 2023 at 01:35:13PM -0300, Victor Nogueira wrote:
> > The datapath can now find the block of the port in which the packet arrived
> > at. It can then use it for various activities.
>
> I think $subject needs a s/ports//. Because, well, the patch is
> exposing the block, which contains the ports.. The first sentence here
> goes along with this rationale.
>
> more below
>
> >
> > In the next patch we show a simple action that multicasts to all ports
> > excep for the port in which the packet arrived on.
>
> "except"
>
Thanks Marcelo. We'll fix both in the next version.
cheers,
jamal
> > Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> > ---
> > include/net/sch_generic.h | 4 ++++
> > net/sched/cls_api.c | 10 +++++++++-
> > 2 files changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index 824a0ecb5afc..c5defb166ef6 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -440,6 +440,8 @@ struct qdisc_skb_cb {
> > };
> > #define QDISC_CB_PRIV_LEN 20
> > unsigned char data[QDISC_CB_PRIV_LEN];
> > + /* This should allow eBPF to continue to align */
>
> Not sure if this comment really belongs in here. Up to you but it
> seems better suited in the patch description. Hopefully the next one
> won't do something like:
>
> /* This should allow eBPF to continue to align */
> u32 block_index;
> + /* This one too */
> + u32 my_var;
>
> :-)
>
> > + u32 block_index;
> > };
> >
> > typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-19 16:35 [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Victor Nogueira
2023-08-19 16:35 ` [PATCH net-next v2 1/3] net/sched: Introduce tc block netdev tracking infra Victor Nogueira
2023-08-19 16:35 ` [PATCH net-next v2 2/3] net/sched: cls_api: Expose tc block ports to the datapath Victor Nogueira
@ 2023-08-19 16:35 ` Victor Nogueira
2023-08-23 17:58 ` Marcelo Ricardo Leitner
2023-08-24 14:30 ` Weird sparse error WAS( " Jamal Hadi Salim
2023-08-21 19:07 ` [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Vlad Buslov
3 siblings, 2 replies; 16+ messages in thread
From: Victor Nogueira @ 2023-08-19 16:35 UTC (permalink / raw)
To: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev
Cc: mleitner, vladbu, horms, pctammela, kernel
This action takes advantage of the presence of tc block ports set in the
datapath and broadcast a packet to all ports on that set with exception of
the port in which it arrived on..
Example usage:
$ tc qdisc add dev ens7 ingress block 22
$ tc qdisc add dev ens8 ingress block 22
Now we can add a filter using the block index:
$ tc filter add block 22 protocol ip pref 25 \
flower dst_ip 192.168.0.0/16 action blockcast
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/tc_wrapper.h | 5 +
net/sched/Kconfig | 13 ++
net/sched/Makefile | 1 +
net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
4 files changed, 318 insertions(+)
create mode 100644 net/sched/act_blockcast.c
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a6d481b5bcbc..8ef848968be7 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -28,6 +28,7 @@ TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_blockcast_run);
TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
@@ -57,6 +58,10 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
if (a->ops->act == tcf_mirred_act)
return tcf_mirred_act(skb, a, res);
#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_BLOCKCAST)
+ if (a->ops->act == tcf_blockcast_run)
+ return tcf_blockcast_run(skb, a, res);
+#endif
#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
if (a->ops->act == tcf_pedit_act)
return tcf_pedit_act(skb, a, res);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..1b0edf1287d0 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -780,6 +780,19 @@ config NET_ACT_SIMP
To compile this code as a module, choose M here: the
module will be called act_simple.
+config NET_ACT_BLOCKCAST
+ tristate "TC block Multicast"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to add an action that will multicast an skb to egress of
+ all netdevs that belong to a tc block except for the netdev on which
+ the skb arrived on
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_blockcast.
+
config NET_ACT_SKBEDIT
tristate "SKB Editing"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b5fd49641d91..2cdcf30645eb 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
+obj-$(CONFIG_NET_ACT_BLOCKCAST) += act_blockcast.o
obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_MPLS) += act_mpls.o
diff --git a/net/sched/act_blockcast.c b/net/sched/act_blockcast.c
new file mode 100644
index 000000000000..85fd0289927c
--- /dev/null
+++ b/net/sched/act_blockcast.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * net/sched/act_blockcast.c Block Cast action
+ * Copyright (c) 2023, Mojatatu Networks
+ * Authors: Jamal Hadi Salim <jhs@mojatatu.com>
+ * Victor Nogueira <victor@mojatatu.com>
+ * Pedro Tammela <pctammela@mojatatu.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <linux/if_arp.h>
+#include <net/tc_wrapper.h>
+
+#include <linux/tc_act/tc_defact.h>
+
+static struct tc_action_ops act_blockcast_ops;
+
+struct tcf_blockcast_act {
+ struct tc_action common;
+};
+
+#define to_blockcast_act(a) ((struct tcf_blockcast_act *)a)
+
+#define TCA_ID_BLOCKCAST 123
+#define CAST_RECURSION_LIMIT 4
+
+static DEFINE_PER_CPU(unsigned int, redirect_rec_level);
+
+static int cast_one(struct sk_buff *skb, const u32 ifindex)
+{
+ struct sk_buff *skb2 = skb;
+ int retval = TC_ACT_PIPE;
+ struct net_device *dev;
+ unsigned int rec_level;
+ bool expects_nh;
+ int mac_len;
+ bool at_nh;
+ int err;
+
+ rec_level = __this_cpu_inc_return(redirect_rec_level);
+ if (unlikely(rec_level > CAST_RECURSION_LIMIT)) {
+ net_warn_ratelimited("blockcast: exceeded redirect recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+ if (unlikely(!dev)) {
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ if (unlikely(!(dev->flags & IFF_UP) || !netif_carrier_ok(dev))) {
+ net_notice_ratelimited("blockcast: device %s is down\n",
+ dev->name);
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2) {
+ __this_cpu_dec(redirect_rec_level);
+ return retval;
+ }
+
+ nf_reset_ct(skb2);
+
+ expects_nh = !dev_is_mac_header_xmit(dev);
+ at_nh = skb->data == skb_network_header(skb);
+ if (at_nh != expects_nh) {
+ mac_len = skb_at_tc_ingress(skb) ?
+ skb->mac_len :
+ skb_network_header(skb) - skb_mac_header(skb);
+
+ if (expects_nh) {
+ /* target device/action expect data at nh */
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+ /* target device/action expect data at mac */
+ skb_push_rcsum(skb2, mac_len);
+ }
+ }
+
+ skb2->skb_iif = skb->dev->ifindex;
+ skb2->dev = dev;
+
+ err = dev_queue_xmit(skb2);
+ if (err)
+ retval = TC_ACT_SHOT;
+
+ __this_cpu_dec(redirect_rec_level);
+
+ return retval;
+}
+
+TC_INDIRECT_SCOPE int tcf_blockcast_run(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
+{
+ u32 block_index = qdisc_skb_cb(skb)->block_index;
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ int action = READ_ONCE(p->tcf_action);
+ struct net *net = dev_net(skb->dev);
+ struct tcf_block *block;
+ struct net_device *dev;
+ u32 exception_ifindex;
+ unsigned long index;
+
+ block = tcf_block_lookup(net, block_index);
+ exception_ifindex = skb->dev->ifindex;
+
+ tcf_action_update_bstats(&p->common, skb);
+ tcf_lastuse_update(&p->tcf_tm);
+
+ if (!block || xa_empty(&block->ports))
+ goto act_done;
+
+ /* we are already under rcu protection, so iterating block is safe*/
+ xa_for_each(&block->ports, index, dev) {
+ int err;
+
+ if (index == exception_ifindex)
+ continue;
+
+ err = cast_one(skb, dev->ifindex);
+ if (err != TC_ACT_PIPE)
+ printk("(%d)Failed to send to dev\t%d: %s\n", err,
+ dev->ifindex, dev->name);
+ }
+
+act_done:
+ if (action == TC_ACT_SHOT)
+ tcf_action_inc_drop_qstats(&p->common);
+ return action;
+}
+
+static const struct nla_policy blockcast_policy[TCA_DEF_MAX + 1] = {
+ [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
+};
+
+static int tcf_blockcast_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
+ struct nlattr *tb[TCA_DEF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tc_defact *parm;
+ bool exists = false;
+ int ret = 0, err;
+ u32 index;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla,
+ blockcast_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_DEF_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_DEF_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ exists = err;
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_blockcast_ops, bind, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ err = -EEXIST;
+ goto release_idr;
+ }
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ if (exists)
+ spin_lock_bh(&p->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ if (exists)
+ spin_unlock_bh(&p->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static int tcf_blockcast_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ struct tc_defact opt = {
+ .index = p->tcf_index,
+ .refcnt = refcount_read(&p->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
+ };
+ struct tcf_t t;
+
+ spin_lock_bh(&p->tcf_lock);
+ opt.action = p->tcf_action;
+ if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &p->tcf_tm);
+ if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&p->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&p->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_blockcast_ops = {
+ .kind = "blockcast",
+ .id = TCA_ID_BLOCKCAST,
+ .owner = THIS_MODULE,
+ .act = tcf_blockcast_run,
+ .dump = tcf_blockcast_dump,
+ .init = tcf_blockcast_init,
+ .size = sizeof(struct tcf_blockcast_act),
+};
+
+static __net_init int blockcast_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
+
+ return tc_action_net_init(net, tn, &act_blockcast_ops);
+}
+
+static void __net_exit blockcast_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, act_blockcast_ops.net_id);
+}
+
+static struct pernet_operations blockcast_net_ops = {
+ .init = blockcast_init_net,
+ .exit_batch = blockcast_exit_net,
+ .id = &act_blockcast_ops.net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Mojatatu Networks, Inc");
+MODULE_LICENSE("GPL");
+
+static int __init blockcast_init_module(void)
+{
+ int ret = tcf_register_action(&act_blockcast_ops, &blockcast_net_ops);
+
+ if (!ret)
+ pr_info("blockcast TC action Loaded\n");
+ return ret;
+}
+
+static void __exit blockcast_cleanup_module(void)
+{
+ tcf_unregister_action(&act_blockcast_ops, &blockcast_net_ops);
+}
+
+module_init(blockcast_init_module);
+module_exit(blockcast_cleanup_module);
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-19 16:35 ` [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action Victor Nogueira
@ 2023-08-23 17:58 ` Marcelo Ricardo Leitner
2023-08-24 14:19 ` Jamal Hadi Salim
2023-08-24 14:30 ` Weird sparse error WAS( " Jamal Hadi Salim
1 sibling, 1 reply; 16+ messages in thread
From: Marcelo Ricardo Leitner @ 2023-08-23 17:58 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev,
vladbu, horms, pctammela, kernel
On Sat, Aug 19, 2023 at 01:35:14PM -0300, Victor Nogueira wrote:
> This action takes advantage of the presence of tc block ports set in the
> datapath and broadcast a packet to all ports on that set with exception of
> the port in which it arrived on..
I couldn't find anything int he code blocking this action from being
used in the egress path as well. So what about: s/arrived/& or is
being transmitted/ , making it explicit that it is an expected usage?
more below
>
> Example usage:
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
>
> Now we can add a filter using the block index:
> $ tc filter add block 22 protocol ip pref 25 \
> flower dst_ip 192.168.0.0/16 action blockcast
>
> Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> ---
> include/net/tc_wrapper.h | 5 +
> net/sched/Kconfig | 13 ++
> net/sched/Makefile | 1 +
> net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 318 insertions(+)
> create mode 100644 net/sched/act_blockcast.c
>
> diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
> index a6d481b5bcbc..8ef848968be7 100644
> --- a/include/net/tc_wrapper.h
> +++ b/include/net/tc_wrapper.h
> @@ -28,6 +28,7 @@ TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
> TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
> TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
> TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
> +TC_INDIRECT_ACTION_DECLARE(tcf_blockcast_run);
> TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
> TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
> TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
> @@ -57,6 +58,10 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
> if (a->ops->act == tcf_mirred_act)
> return tcf_mirred_act(skb, a, res);
> #endif
> +#if IS_BUILTIN(CONFIG_NET_ACT_BLOCKCAST)
> + if (a->ops->act == tcf_blockcast_run)
> + return tcf_blockcast_run(skb, a, res);
> +#endif
> #if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
> if (a->ops->act == tcf_pedit_act)
> return tcf_pedit_act(skb, a, res);
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index 4b95cb1ac435..1b0edf1287d0 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -780,6 +780,19 @@ config NET_ACT_SIMP
> To compile this code as a module, choose M here: the
> module will be called act_simple.
>
> +config NET_ACT_BLOCKCAST
> + tristate "TC block Multicast"
> + depends on NET_CLS_ACT
> + help
> + Say Y here to add an action that will multicast an skb to egress of
> + all netdevs that belong to a tc block except for the netdev on which
> + the skb arrived on
> +
> + If unsure, say N.
> +
> + To compile this code as a module, choose M here: the
> + module will be called act_blockcast.
> +
> config NET_ACT_SKBEDIT
> tristate "SKB Editing"
> depends on NET_CLS_ACT
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index b5fd49641d91..2cdcf30645eb 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
> obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
> obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
> obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
> +obj-$(CONFIG_NET_ACT_BLOCKCAST) += act_blockcast.o
> obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
> obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
> obj-$(CONFIG_NET_ACT_MPLS) += act_mpls.o
> diff --git a/net/sched/act_blockcast.c b/net/sched/act_blockcast.c
> new file mode 100644
> index 000000000000..85fd0289927c
> --- /dev/null
> +++ b/net/sched/act_blockcast.c
> @@ -0,0 +1,299 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * net/sched/act_blockcast.c Block Cast action
> + * Copyright (c) 2023, Mojatatu Networks
> + * Authors: Jamal Hadi Salim <jhs@mojatatu.com>
> + * Victor Nogueira <victor@mojatatu.com>
> + * Pedro Tammela <pctammela@mojatatu.com>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/skbuff.h>
> +#include <linux/rtnetlink.h>
> +#include <net/netlink.h>
> +#include <net/pkt_sched.h>
> +#include <net/pkt_cls.h>
> +#include <linux/if_arp.h>
> +#include <net/tc_wrapper.h>
> +
> +#include <linux/tc_act/tc_defact.h>
> +
> +static struct tc_action_ops act_blockcast_ops;
> +
> +struct tcf_blockcast_act {
> + struct tc_action common;
> +};
> +
> +#define to_blockcast_act(a) ((struct tcf_blockcast_act *)a)
> +
> +#define TCA_ID_BLOCKCAST 123
This needs to be part of enum tca_id instead, as this is uapi.
> +#define CAST_RECURSION_LIMIT 4
> +
> +static DEFINE_PER_CPU(unsigned int, redirect_rec_level);
> +
> +static int cast_one(struct sk_buff *skb, const u32 ifindex)
> +{
> + struct sk_buff *skb2 = skb;
> + int retval = TC_ACT_PIPE;
> + struct net_device *dev;
> + unsigned int rec_level;
> + bool expects_nh;
> + int mac_len;
> + bool at_nh;
> + int err;
> +
> + rec_level = __this_cpu_inc_return(redirect_rec_level);
> + if (unlikely(rec_level > CAST_RECURSION_LIMIT)) {
> + net_warn_ratelimited("blockcast: exceeded redirect recursion limit on dev %s\n",
> + netdev_name(skb->dev));
I wrote the comment below earlier than this one :-)
Here, I would think this is really an exception path, and if this
shows up, it needs to be addressed. So this msg IMHO is fine.
> + __this_cpu_dec(redirect_rec_level);
> + return TC_ACT_SHOT;
> + }
> +
> + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
> + if (unlikely(!dev)) {
> + __this_cpu_dec(redirect_rec_level);
> + return TC_ACT_SHOT;
> + }
> +
> + if (unlikely(!(dev->flags & IFF_UP) || !netif_carrier_ok(dev))) {
> + net_notice_ratelimited("blockcast: device %s is down\n",
> + dev->name);
Please no, not this warning. We already have a situation with mirred
and ovs bridges often being down and getting dmesg spammed. We
couldn't remove that log msg because of fear of some sysadmin missing
the hint. But here, that doesn't apply, and dmesg is not the right way
to debug packet drops.
> + __this_cpu_dec(redirect_rec_level);
> + return TC_ACT_SHOT;
> + }
> +
> + skb2 = skb_clone(skb, GFP_ATOMIC);
> + if (!skb2) {
> + __this_cpu_dec(redirect_rec_level);
> + return retval;
> + }
> +
> + nf_reset_ct(skb2);
> +
> + expects_nh = !dev_is_mac_header_xmit(dev);
> + at_nh = skb->data == skb_network_header(skb);
> + if (at_nh != expects_nh) {
> + mac_len = skb_at_tc_ingress(skb) ?
> + skb->mac_len :
> + skb_network_header(skb) - skb_mac_header(skb);
> +
> + if (expects_nh) {
> + /* target device/action expect data at nh */
> + skb_pull_rcsum(skb2, mac_len);
> + } else {
> + /* target device/action expect data at mac */
> + skb_push_rcsum(skb2, mac_len);
> + }
> + }
> +
> + skb2->skb_iif = skb->dev->ifindex;
> + skb2->dev = dev;
> +
> + err = dev_queue_xmit(skb2);
> + if (err)
> + retval = TC_ACT_SHOT;
> +
> + __this_cpu_dec(redirect_rec_level);
> +
> + return retval;
> +}
> +
> +TC_INDIRECT_SCOPE int tcf_blockcast_run(struct sk_buff *skb,
> + const struct tc_action *a,
> + struct tcf_result *res)
> +{
> + u32 block_index = qdisc_skb_cb(skb)->block_index;
> + struct tcf_blockcast_act *p = to_blockcast_act(a);
> + int action = READ_ONCE(p->tcf_action);
> + struct net *net = dev_net(skb->dev);
> + struct tcf_block *block;
> + struct net_device *dev;
> + u32 exception_ifindex;
> + unsigned long index;
> +
> + block = tcf_block_lookup(net, block_index);
> + exception_ifindex = skb->dev->ifindex;
> +
> + tcf_action_update_bstats(&p->common, skb);
> + tcf_lastuse_update(&p->tcf_tm);
> +
> + if (!block || xa_empty(&block->ports))
> + goto act_done;
> +
> + /* we are already under rcu protection, so iterating block is safe*/
> + xa_for_each(&block->ports, index, dev) {
> + int err;
> +
> + if (index == exception_ifindex)
> + continue;
> +
> + err = cast_one(skb, dev->ifindex);
> + if (err != TC_ACT_PIPE)
> + printk("(%d)Failed to send to dev\t%d: %s\n", err,
> + dev->ifindex, dev->name);
Same comment here about logging.
> + }
> +
> +act_done:
> + if (action == TC_ACT_SHOT)
> + tcf_action_inc_drop_qstats(&p->common);
> + return action;
> +}
> +
> +static const struct nla_policy blockcast_policy[TCA_DEF_MAX + 1] = {
> + [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
> +};
> +
> +static int tcf_blockcast_init(struct net *net, struct nlattr *nla,
> + struct nlattr *est, struct tc_action **a,
> + struct tcf_proto *tp, u32 flags,
> + struct netlink_ext_ack *extack)
> +{
> + struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
> + struct tcf_blockcast_act *p = to_blockcast_act(a);
> + bool bind = flags & TCA_ACT_FLAGS_BIND;
> + struct nlattr *tb[TCA_DEF_MAX + 1];
> + struct tcf_chain *goto_ch = NULL;
> + struct tc_defact *parm;
> + bool exists = false;
> + int ret = 0, err;
> + u32 index;
> +
> + if (!nla)
> + return -EINVAL;
> +
> + err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla,
> + blockcast_policy, NULL);
Why the _deprecated one again please? This one doesn't need backwards
compatibility.
Thanks,
Marcelo
> + if (err < 0)
> + return err;
> +
> + if (!tb[TCA_DEF_PARMS])
> + return -EINVAL;
> +
> + parm = nla_data(tb[TCA_DEF_PARMS]);
> + index = parm->index;
> +
> + err = tcf_idr_check_alloc(tn, &index, a, bind);
> + if (err < 0)
> + return err;
> +
> + exists = err;
> + if (exists && bind)
> + return 0;
> +
> + if (!exists) {
> + ret = tcf_idr_create_from_flags(tn, index, est, a,
> + &act_blockcast_ops, bind, flags);
> + if (ret) {
> + tcf_idr_cleanup(tn, index);
> + return ret;
> + }
> +
> + ret = ACT_P_CREATED;
> + } else {
> + if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
> + err = -EEXIST;
> + goto release_idr;
> + }
> + }
> +
> + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
> + if (err < 0)
> + goto release_idr;
> +
> + if (exists)
> + spin_lock_bh(&p->tcf_lock);
> + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> + if (exists)
> + spin_unlock_bh(&p->tcf_lock);
> +
> + if (goto_ch)
> + tcf_chain_put_by_act(goto_ch);
> +
> + return ret;
> +release_idr:
> + tcf_idr_release(*a, bind);
> + return err;
> +}
> +
> +static int tcf_blockcast_dump(struct sk_buff *skb, struct tc_action *a,
> + int bind, int ref)
> +{
> + unsigned char *b = skb_tail_pointer(skb);
> + struct tcf_blockcast_act *p = to_blockcast_act(a);
> + struct tc_defact opt = {
> + .index = p->tcf_index,
> + .refcnt = refcount_read(&p->tcf_refcnt) - ref,
> + .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
> + };
> + struct tcf_t t;
> +
> + spin_lock_bh(&p->tcf_lock);
> + opt.action = p->tcf_action;
> + if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt))
> + goto nla_put_failure;
> +
> + tcf_tm_dump(&t, &p->tcf_tm);
> + if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
> + goto nla_put_failure;
> + spin_unlock_bh(&p->tcf_lock);
> +
> + return skb->len;
> +
> +nla_put_failure:
> + spin_unlock_bh(&p->tcf_lock);
> + nlmsg_trim(skb, b);
> + return -1;
> +}
> +
> +static struct tc_action_ops act_blockcast_ops = {
> + .kind = "blockcast",
> + .id = TCA_ID_BLOCKCAST,
> + .owner = THIS_MODULE,
> + .act = tcf_blockcast_run,
> + .dump = tcf_blockcast_dump,
> + .init = tcf_blockcast_init,
> + .size = sizeof(struct tcf_blockcast_act),
> +};
> +
> +static __net_init int blockcast_init_net(struct net *net)
> +{
> + struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
> +
> + return tc_action_net_init(net, tn, &act_blockcast_ops);
> +}
> +
> +static void __net_exit blockcast_exit_net(struct list_head *net_list)
> +{
> + tc_action_net_exit(net_list, act_blockcast_ops.net_id);
> +}
> +
> +static struct pernet_operations blockcast_net_ops = {
> + .init = blockcast_init_net,
> + .exit_batch = blockcast_exit_net,
> + .id = &act_blockcast_ops.net_id,
> + .size = sizeof(struct tc_action_net),
> +};
> +
> +MODULE_AUTHOR("Mojatatu Networks, Inc");
> +MODULE_LICENSE("GPL");
> +
> +static int __init blockcast_init_module(void)
> +{
> + int ret = tcf_register_action(&act_blockcast_ops, &blockcast_net_ops);
> +
> + if (!ret)
> + pr_info("blockcast TC action Loaded\n");
> + return ret;
> +}
> +
> +static void __exit blockcast_cleanup_module(void)
> +{
> + tcf_unregister_action(&act_blockcast_ops, &blockcast_net_ops);
> +}
> +
> +module_init(blockcast_init_module);
> +module_exit(blockcast_cleanup_module);
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-23 17:58 ` Marcelo Ricardo Leitner
@ 2023-08-24 14:19 ` Jamal Hadi Salim
0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 14:19 UTC (permalink / raw)
To: Marcelo Ricardo Leitner
Cc: Victor Nogueira, xiyou.wangcong, jiri, davem, edumazet, kuba,
pabeni, netdev, vladbu, horms, pctammela, kernel
On Wed, Aug 23, 2023 at 1:58 PM Marcelo Ricardo Leitner
<mleitner@redhat.com> wrote:
>
> On Sat, Aug 19, 2023 at 01:35:14PM -0300, Victor Nogueira wrote:
> > This action takes advantage of the presence of tc block ports set in the
> > datapath and broadcast a packet to all ports on that set with exception of
> > the port in which it arrived on..
>
> I couldn't find anything int he code blocking this action from being
> used in the egress path as well. So what about: s/arrived/& or is
> being transmitted/ , making it explicit that it is an expected usage?
>
sure.
> more below
>
> >
> > Example usage:
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> >
> > Now we can add a filter using the block index:
> > $ tc filter add block 22 protocol ip pref 25 \
> > flower dst_ip 192.168.0.0/16 action blockcast
> >
> > Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
> > Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
> > Signed-off-by: Victor Nogueira <victor@mojatatu.com>
> > ---
> > include/net/tc_wrapper.h | 5 +
> > net/sched/Kconfig | 13 ++
> > net/sched/Makefile | 1 +
> > net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
> > 4 files changed, 318 insertions(+)
> > create mode 100644 net/sched/act_blockcast.c
> >
> > diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
> > index a6d481b5bcbc..8ef848968be7 100644
> > --- a/include/net/tc_wrapper.h
> > +++ b/include/net/tc_wrapper.h
> > @@ -28,6 +28,7 @@ TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
> > TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
> > TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
> > TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
> > +TC_INDIRECT_ACTION_DECLARE(tcf_blockcast_run);
> > TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
> > TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
> > TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
> > @@ -57,6 +58,10 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
> > if (a->ops->act == tcf_mirred_act)
> > return tcf_mirred_act(skb, a, res);
> > #endif
> > +#if IS_BUILTIN(CONFIG_NET_ACT_BLOCKCAST)
> > + if (a->ops->act == tcf_blockcast_run)
> > + return tcf_blockcast_run(skb, a, res);
> > +#endif
> > #if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
> > if (a->ops->act == tcf_pedit_act)
> > return tcf_pedit_act(skb, a, res);
> > diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> > index 4b95cb1ac435..1b0edf1287d0 100644
> > --- a/net/sched/Kconfig
> > +++ b/net/sched/Kconfig
> > @@ -780,6 +780,19 @@ config NET_ACT_SIMP
> > To compile this code as a module, choose M here: the
> > module will be called act_simple.
> >
> > +config NET_ACT_BLOCKCAST
> > + tristate "TC block Multicast"
> > + depends on NET_CLS_ACT
> > + help
> > + Say Y here to add an action that will multicast an skb to egress of
> > + all netdevs that belong to a tc block except for the netdev on which
> > + the skb arrived on
> > +
> > + If unsure, say N.
> > +
> > + To compile this code as a module, choose M here: the
> > + module will be called act_blockcast.
> > +
> > config NET_ACT_SKBEDIT
> > tristate "SKB Editing"
> > depends on NET_CLS_ACT
> > diff --git a/net/sched/Makefile b/net/sched/Makefile
> > index b5fd49641d91..2cdcf30645eb 100644
> > --- a/net/sched/Makefile
> > +++ b/net/sched/Makefile
> > @@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
> > obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
> > obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
> > obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
> > +obj-$(CONFIG_NET_ACT_BLOCKCAST) += act_blockcast.o
> > obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
> > obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
> > obj-$(CONFIG_NET_ACT_MPLS) += act_mpls.o
> > diff --git a/net/sched/act_blockcast.c b/net/sched/act_blockcast.c
> > new file mode 100644
> > index 000000000000..85fd0289927c
> > --- /dev/null
> > +++ b/net/sched/act_blockcast.c
> > @@ -0,0 +1,299 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * net/sched/act_blockcast.c Block Cast action
> > + * Copyright (c) 2023, Mojatatu Networks
> > + * Authors: Jamal Hadi Salim <jhs@mojatatu.com>
> > + * Victor Nogueira <victor@mojatatu.com>
> > + * Pedro Tammela <pctammela@mojatatu.com>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/skbuff.h>
> > +#include <linux/rtnetlink.h>
> > +#include <net/netlink.h>
> > +#include <net/pkt_sched.h>
> > +#include <net/pkt_cls.h>
> > +#include <linux/if_arp.h>
> > +#include <net/tc_wrapper.h>
> > +
> > +#include <linux/tc_act/tc_defact.h>
> > +
> > +static struct tc_action_ops act_blockcast_ops;
> > +
> > +struct tcf_blockcast_act {
> > + struct tc_action common;
> > +};
> > +
> > +#define to_blockcast_act(a) ((struct tcf_blockcast_act *)a)
> > +
> > +#define TCA_ID_BLOCKCAST 123
>
> This needs to be part of enum tca_id instead, as this is uapi.
>
> > +#define CAST_RECURSION_LIMIT 4
> > +
> > +static DEFINE_PER_CPU(unsigned int, redirect_rec_level);
> > +
> > +static int cast_one(struct sk_buff *skb, const u32 ifindex)
> > +{
> > + struct sk_buff *skb2 = skb;
> > + int retval = TC_ACT_PIPE;
> > + struct net_device *dev;
> > + unsigned int rec_level;
> > + bool expects_nh;
> > + int mac_len;
> > + bool at_nh;
> > + int err;
> > +
> > + rec_level = __this_cpu_inc_return(redirect_rec_level);
> > + if (unlikely(rec_level > CAST_RECURSION_LIMIT)) {
> > + net_warn_ratelimited("blockcast: exceeded redirect recursion limit on dev %s\n",
> > + netdev_name(skb->dev));
>
> I wrote the comment below earlier than this one :-)
> Here, I would think this is really an exception path, and if this
> shows up, it needs to be addressed. So this msg IMHO is fine.
>
ok;->
> > + __this_cpu_dec(redirect_rec_level);
> > + return TC_ACT_SHOT;
> > + }
> > +
> > + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
> > + if (unlikely(!dev)) {
> > + __this_cpu_dec(redirect_rec_level);
> > + return TC_ACT_SHOT;
> > + }
> > +
> > + if (unlikely(!(dev->flags & IFF_UP) || !netif_carrier_ok(dev))) {
> > + net_notice_ratelimited("blockcast: device %s is down\n",
> > + dev->name);
>
> Please no, not this warning. We already have a situation with mirred
> and ovs bridges often being down and getting dmesg spammed. We
> couldn't remove that log msg because of fear of some sysadmin missing
> the hint. But here, that doesn't apply, and dmesg is not the right way
> to debug packet drops.
>
I think we could probably increment the action error counter here
instead. The error is not catastrophic really, one of the ports in the
block is not up - big deal.
> > + __this_cpu_dec(redirect_rec_level);
> > + return TC_ACT_SHOT;
> > + }
> > +
> > + skb2 = skb_clone(skb, GFP_ATOMIC);
> > + if (!skb2) {
> > + __this_cpu_dec(redirect_rec_level);
> > + return retval;
> > + }
> > +
> > + nf_reset_ct(skb2);
> > +
> > + expects_nh = !dev_is_mac_header_xmit(dev);
> > + at_nh = skb->data == skb_network_header(skb);
> > + if (at_nh != expects_nh) {
> > + mac_len = skb_at_tc_ingress(skb) ?
> > + skb->mac_len :
> > + skb_network_header(skb) - skb_mac_header(skb);
> > +
> > + if (expects_nh) {
> > + /* target device/action expect data at nh */
> > + skb_pull_rcsum(skb2, mac_len);
> > + } else {
> > + /* target device/action expect data at mac */
> > + skb_push_rcsum(skb2, mac_len);
> > + }
> > + }
> > +
> > + skb2->skb_iif = skb->dev->ifindex;
> > + skb2->dev = dev;
> > +
> > + err = dev_queue_xmit(skb2);
> > + if (err)
> > + retval = TC_ACT_SHOT;
> > +
> > + __this_cpu_dec(redirect_rec_level);
> > +
> > + return retval;
> > +}
> > +
> > +TC_INDIRECT_SCOPE int tcf_blockcast_run(struct sk_buff *skb,
> > + const struct tc_action *a,
> > + struct tcf_result *res)
> > +{
> > + u32 block_index = qdisc_skb_cb(skb)->block_index;
> > + struct tcf_blockcast_act *p = to_blockcast_act(a);
> > + int action = READ_ONCE(p->tcf_action);
> > + struct net *net = dev_net(skb->dev);
> > + struct tcf_block *block;
> > + struct net_device *dev;
> > + u32 exception_ifindex;
> > + unsigned long index;
> > +
> > + block = tcf_block_lookup(net, block_index);
> > + exception_ifindex = skb->dev->ifindex;
> > +
> > + tcf_action_update_bstats(&p->common, skb);
> > + tcf_lastuse_update(&p->tcf_tm);
> > +
> > + if (!block || xa_empty(&block->ports))
> > + goto act_done;
> > +
> > + /* we are already under rcu protection, so iterating block is safe*/
> > + xa_for_each(&block->ports, index, dev) {
> > + int err;
> > +
> > + if (index == exception_ifindex)
> > + continue;
> > +
> > + err = cast_one(skb, dev->ifindex);
> > + if (err != TC_ACT_PIPE)
> > + printk("(%d)Failed to send to dev\t%d: %s\n", err,
> > + dev->ifindex, dev->name);
>
> Same comment here about logging.
Yep, error count increment will do.
>
> > + }
> > +
> > +act_done:
> > + if (action == TC_ACT_SHOT)
> > + tcf_action_inc_drop_qstats(&p->common);
> > + return action;
> > +}
> > +
> > +static const struct nla_policy blockcast_policy[TCA_DEF_MAX + 1] = {
> > + [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
> > +};
> > +
> > +static int tcf_blockcast_init(struct net *net, struct nlattr *nla,
> > + struct nlattr *est, struct tc_action **a,
> > + struct tcf_proto *tp, u32 flags,
> > + struct netlink_ext_ack *extack)
> > +{
> > + struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
> > + struct tcf_blockcast_act *p = to_blockcast_act(a);
> > + bool bind = flags & TCA_ACT_FLAGS_BIND;
> > + struct nlattr *tb[TCA_DEF_MAX + 1];
> > + struct tcf_chain *goto_ch = NULL;
> > + struct tc_defact *parm;
> > + bool exists = false;
> > + int ret = 0, err;
> > + u32 index;
> > +
> > + if (!nla)
> > + return -EINVAL;
> > +
> > + err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla,
> > + blockcast_policy, NULL);
>
> Why the _deprecated one again please? This one doesn't need backwards
> compatibility.
>
Ah, it's just TheLinuxWay(tm). Original code was cutnpasted from act_simple.
Thanks Marcelo.
cheers,
jamal
> Thanks,
> Marcelo
>
> > + if (err < 0)
> > + return err;
> > +
> > + if (!tb[TCA_DEF_PARMS])
> > + return -EINVAL;
> > +
> > + parm = nla_data(tb[TCA_DEF_PARMS]);
> > + index = parm->index;
> > +
> > + err = tcf_idr_check_alloc(tn, &index, a, bind);
> > + if (err < 0)
> > + return err;
> > +
> > + exists = err;
> > + if (exists && bind)
> > + return 0;
> > +
> > + if (!exists) {
> > + ret = tcf_idr_create_from_flags(tn, index, est, a,
> > + &act_blockcast_ops, bind, flags);
> > + if (ret) {
> > + tcf_idr_cleanup(tn, index);
> > + return ret;
> > + }
> > +
> > + ret = ACT_P_CREATED;
> > + } else {
> > + if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
> > + err = -EEXIST;
> > + goto release_idr;
> > + }
> > + }
> > +
> > + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
> > + if (err < 0)
> > + goto release_idr;
> > +
> > + if (exists)
> > + spin_lock_bh(&p->tcf_lock);
> > + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> > + if (exists)
> > + spin_unlock_bh(&p->tcf_lock);
> > +
> > + if (goto_ch)
> > + tcf_chain_put_by_act(goto_ch);
> > +
> > + return ret;
> > +release_idr:
> > + tcf_idr_release(*a, bind);
> > + return err;
> > +}
> > +
> > +static int tcf_blockcast_dump(struct sk_buff *skb, struct tc_action *a,
> > + int bind, int ref)
> > +{
> > + unsigned char *b = skb_tail_pointer(skb);
> > + struct tcf_blockcast_act *p = to_blockcast_act(a);
> > + struct tc_defact opt = {
> > + .index = p->tcf_index,
> > + .refcnt = refcount_read(&p->tcf_refcnt) - ref,
> > + .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
> > + };
> > + struct tcf_t t;
> > +
> > + spin_lock_bh(&p->tcf_lock);
> > + opt.action = p->tcf_action;
> > + if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt))
> > + goto nla_put_failure;
> > +
> > + tcf_tm_dump(&t, &p->tcf_tm);
> > + if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
> > + goto nla_put_failure;
> > + spin_unlock_bh(&p->tcf_lock);
> > +
> > + return skb->len;
> > +
> > +nla_put_failure:
> > + spin_unlock_bh(&p->tcf_lock);
> > + nlmsg_trim(skb, b);
> > + return -1;
> > +}
> > +
> > +static struct tc_action_ops act_blockcast_ops = {
> > + .kind = "blockcast",
> > + .id = TCA_ID_BLOCKCAST,
> > + .owner = THIS_MODULE,
> > + .act = tcf_blockcast_run,
> > + .dump = tcf_blockcast_dump,
> > + .init = tcf_blockcast_init,
> > + .size = sizeof(struct tcf_blockcast_act),
> > +};
> > +
> > +static __net_init int blockcast_init_net(struct net *net)
> > +{
> > + struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
> > +
> > + return tc_action_net_init(net, tn, &act_blockcast_ops);
> > +}
> > +
> > +static void __net_exit blockcast_exit_net(struct list_head *net_list)
> > +{
> > + tc_action_net_exit(net_list, act_blockcast_ops.net_id);
> > +}
> > +
> > +static struct pernet_operations blockcast_net_ops = {
> > + .init = blockcast_init_net,
> > + .exit_batch = blockcast_exit_net,
> > + .id = &act_blockcast_ops.net_id,
> > + .size = sizeof(struct tc_action_net),
> > +};
> > +
> > +MODULE_AUTHOR("Mojatatu Networks, Inc");
> > +MODULE_LICENSE("GPL");
> > +
> > +static int __init blockcast_init_module(void)
> > +{
> > + int ret = tcf_register_action(&act_blockcast_ops, &blockcast_net_ops);
> > +
> > + if (!ret)
> > + pr_info("blockcast TC action Loaded\n");
> > + return ret;
> > +}
> > +
> > +static void __exit blockcast_cleanup_module(void)
> > +{
> > + tcf_unregister_action(&act_blockcast_ops, &blockcast_net_ops);
> > +}
> > +
> > +module_init(blockcast_init_module);
> > +module_exit(blockcast_cleanup_module);
> > --
> > 2.25.1
> >
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Weird sparse error WAS( [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-19 16:35 ` [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action Victor Nogueira
2023-08-23 17:58 ` Marcelo Ricardo Leitner
@ 2023-08-24 14:30 ` Jamal Hadi Salim
2023-08-24 14:41 ` Paolo Abeni
2023-09-05 9:18 ` Dan Carpenter
1 sibling, 2 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 14:30 UTC (permalink / raw)
To: Dan Carpenter; +Cc: Simon Horman, Linux Kernel Network Developers
Dan/Simon,
Can someone help explain this error on the code below:
../net/sched/act_blockcast.c:213:9: warning: context imbalance in
'tcf_blockcast_init' - different lock contexts for basic block
Looks like a false positive ...
cheers,
jamal
---------- Forwarded message ---------
From: Victor Nogueira <victor@mojatatu.com>
Date: Sat, Aug 19, 2023 at 12:35 PM
Subject: [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce
blockcast tc action
To: <jhs@mojatatu.com>, <xiyou.wangcong@gmail.com>,
<jiri@resnulli.us>, <davem@davemloft.net>, <edumazet@google.com>,
<kuba@kernel.org>, <pabeni@redhat.com>, <netdev@vger.kernel.org>
Cc: <mleitner@redhat.com>, <vladbu@nvidia.com>, <horms@kernel.org>,
<pctammela@mojatatu.com>, <kernel@mojatatu.com>
This action takes advantage of the presence of tc block ports set in the
datapath and broadcast a packet to all ports on that set with exception of
the port in which it arrived on..
Example usage:
$ tc qdisc add dev ens7 ingress block 22
$ tc qdisc add dev ens8 ingress block 22
Now we can add a filter using the block index:
$ tc filter add block 22 protocol ip pref 25 \
flower dst_ip 192.168.0.0/16 action blockcast
Co-developed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
---
include/net/tc_wrapper.h | 5 +
net/sched/Kconfig | 13 ++
net/sched/Makefile | 1 +
net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
4 files changed, 318 insertions(+)
create mode 100644 net/sched/act_blockcast.c
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a6d481b5bcbc..8ef848968be7 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -28,6 +28,7 @@ TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_blockcast_run);
TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
@@ -57,6 +58,10 @@ static inline int tc_act(struct sk_buff *skb, const
struct tc_action *a,
if (a->ops->act == tcf_mirred_act)
return tcf_mirred_act(skb, a, res);
#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_BLOCKCAST)
+ if (a->ops->act == tcf_blockcast_run)
+ return tcf_blockcast_run(skb, a, res);
+#endif
#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
if (a->ops->act == tcf_pedit_act)
return tcf_pedit_act(skb, a, res);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..1b0edf1287d0 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -780,6 +780,19 @@ config NET_ACT_SIMP
To compile this code as a module, choose M here: the
module will be called act_simple.
+config NET_ACT_BLOCKCAST
+ tristate "TC block Multicast"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to add an action that will multicast an skb to egress of
+ all netdevs that belong to a tc block except for the netdev on which
+ the skb arrived on
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_blockcast.
+
config NET_ACT_SKBEDIT
tristate "SKB Editing"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b5fd49641d91..2cdcf30645eb 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
+obj-$(CONFIG_NET_ACT_BLOCKCAST) += act_blockcast.o
obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_MPLS) += act_mpls.o
diff --git a/net/sched/act_blockcast.c b/net/sched/act_blockcast.c
new file mode 100644
index 000000000000..85fd0289927c
--- /dev/null
+++ b/net/sched/act_blockcast.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * net/sched/act_blockcast.c Block Cast action
+ * Copyright (c) 2023, Mojatatu Networks
+ * Authors: Jamal Hadi Salim <jhs@mojatatu.com>
+ * Victor Nogueira <victor@mojatatu.com>
+ * Pedro Tammela <pctammela@mojatatu.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <linux/if_arp.h>
+#include <net/tc_wrapper.h>
+
+#include <linux/tc_act/tc_defact.h>
+
+static struct tc_action_ops act_blockcast_ops;
+
+struct tcf_blockcast_act {
+ struct tc_action common;
+};
+
+#define to_blockcast_act(a) ((struct tcf_blockcast_act *)a)
+
+#define TCA_ID_BLOCKCAST 123
+#define CAST_RECURSION_LIMIT 4
+
+static DEFINE_PER_CPU(unsigned int, redirect_rec_level);
+
+static int cast_one(struct sk_buff *skb, const u32 ifindex)
+{
+ struct sk_buff *skb2 = skb;
+ int retval = TC_ACT_PIPE;
+ struct net_device *dev;
+ unsigned int rec_level;
+ bool expects_nh;
+ int mac_len;
+ bool at_nh;
+ int err;
+
+ rec_level = __this_cpu_inc_return(redirect_rec_level);
+ if (unlikely(rec_level > CAST_RECURSION_LIMIT)) {
+ net_warn_ratelimited("blockcast: exceeded redirect
recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+ if (unlikely(!dev)) {
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ if (unlikely(!(dev->flags & IFF_UP) || !netif_carrier_ok(dev))) {
+ net_notice_ratelimited("blockcast: device %s is down\n",
+ dev->name);
+ __this_cpu_dec(redirect_rec_level);
+ return TC_ACT_SHOT;
+ }
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2) {
+ __this_cpu_dec(redirect_rec_level);
+ return retval;
+ }
+
+ nf_reset_ct(skb2);
+
+ expects_nh = !dev_is_mac_header_xmit(dev);
+ at_nh = skb->data == skb_network_header(skb);
+ if (at_nh != expects_nh) {
+ mac_len = skb_at_tc_ingress(skb) ?
+ skb->mac_len :
+ skb_network_header(skb) - skb_mac_header(skb);
+
+ if (expects_nh) {
+ /* target device/action expect data at nh */
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+ /* target device/action expect data at mac */
+ skb_push_rcsum(skb2, mac_len);
+ }
+ }
+
+ skb2->skb_iif = skb->dev->ifindex;
+ skb2->dev = dev;
+
+ err = dev_queue_xmit(skb2);
+ if (err)
+ retval = TC_ACT_SHOT;
+
+ __this_cpu_dec(redirect_rec_level);
+
+ return retval;
+}
+
+TC_INDIRECT_SCOPE int tcf_blockcast_run(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
+{
+ u32 block_index = qdisc_skb_cb(skb)->block_index;
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ int action = READ_ONCE(p->tcf_action);
+ struct net *net = dev_net(skb->dev);
+ struct tcf_block *block;
+ struct net_device *dev;
+ u32 exception_ifindex;
+ unsigned long index;
+
+ block = tcf_block_lookup(net, block_index);
+ exception_ifindex = skb->dev->ifindex;
+
+ tcf_action_update_bstats(&p->common, skb);
+ tcf_lastuse_update(&p->tcf_tm);
+
+ if (!block || xa_empty(&block->ports))
+ goto act_done;
+
+ /* we are already under rcu protection, so iterating block is safe*/
+ xa_for_each(&block->ports, index, dev) {
+ int err;
+
+ if (index == exception_ifindex)
+ continue;
+
+ err = cast_one(skb, dev->ifindex);
+ if (err != TC_ACT_PIPE)
+ printk("(%d)Failed to send to dev\t%d: %s\n", err,
+ dev->ifindex, dev->name);
+ }
+
+act_done:
+ if (action == TC_ACT_SHOT)
+ tcf_action_inc_drop_qstats(&p->common);
+ return action;
+}
+
+static const struct nla_policy blockcast_policy[TCA_DEF_MAX + 1] = {
+ [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
+};
+
+static int tcf_blockcast_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
+ struct nlattr *tb[TCA_DEF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tc_defact *parm;
+ bool exists = false;
+ int ret = 0, err;
+ u32 index;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla,
+ blockcast_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_DEF_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_DEF_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ exists = err;
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_blockcast_ops,
bind, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ err = -EEXIST;
+ goto release_idr;
+ }
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ if (exists)
+ spin_lock_bh(&p->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ if (exists)
+ spin_unlock_bh(&p->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static int tcf_blockcast_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_blockcast_act *p = to_blockcast_act(a);
+ struct tc_defact opt = {
+ .index = p->tcf_index,
+ .refcnt = refcount_read(&p->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
+ };
+ struct tcf_t t;
+
+ spin_lock_bh(&p->tcf_lock);
+ opt.action = p->tcf_action;
+ if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &p->tcf_tm);
+ if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&p->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&p->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_blockcast_ops = {
+ .kind = "blockcast",
+ .id = TCA_ID_BLOCKCAST,
+ .owner = THIS_MODULE,
+ .act = tcf_blockcast_run,
+ .dump = tcf_blockcast_dump,
+ .init = tcf_blockcast_init,
+ .size = sizeof(struct tcf_blockcast_act),
+};
+
+static __net_init int blockcast_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, act_blockcast_ops.net_id);
+
+ return tc_action_net_init(net, tn, &act_blockcast_ops);
+}
+
+static void __net_exit blockcast_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, act_blockcast_ops.net_id);
+}
+
+static struct pernet_operations blockcast_net_ops = {
+ .init = blockcast_init_net,
+ .exit_batch = blockcast_exit_net,
+ .id = &act_blockcast_ops.net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Mojatatu Networks, Inc");
+MODULE_LICENSE("GPL");
+
+static int __init blockcast_init_module(void)
+{
+ int ret = tcf_register_action(&act_blockcast_ops, &blockcast_net_ops);
+
+ if (!ret)
+ pr_info("blockcast TC action Loaded\n");
+ return ret;
+}
+
+static void __exit blockcast_cleanup_module(void)
+{
+ tcf_unregister_action(&act_blockcast_ops, &blockcast_net_ops);
+}
+
+module_init(blockcast_init_module);
+module_exit(blockcast_cleanup_module);
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: Weird sparse error WAS( [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-24 14:30 ` Weird sparse error WAS( " Jamal Hadi Salim
@ 2023-08-24 14:41 ` Paolo Abeni
2023-08-24 14:57 ` Jamal Hadi Salim
2023-09-05 9:18 ` Dan Carpenter
1 sibling, 1 reply; 16+ messages in thread
From: Paolo Abeni @ 2023-08-24 14:41 UTC (permalink / raw)
To: Jamal Hadi Salim, Dan Carpenter
Cc: Simon Horman, Linux Kernel Network Developers
On Thu, 2023-08-24 at 10:30 -0400, Jamal Hadi Salim wrote:
> Dan/Simon,
> Can someone help explain this error on the code below:
>
> ../net/sched/act_blockcast.c:213:9: warning: context imbalance in
> 'tcf_blockcast_init' - different lock contexts for basic block
IIRC sparse is fooled by lock under conditionals, in this case:
if (exists)
spin_lock_bh(&p->tcf_lock);
a possible solution would be:
if (exists) {
spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
spin_unlock_bh(&p->tcf_lock);
} else {
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
}
Using some additional helpers the code could be less ugly...
Cheers,
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: Weird sparse error WAS( [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-24 14:41 ` Paolo Abeni
@ 2023-08-24 14:57 ` Jamal Hadi Salim
0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 14:57 UTC (permalink / raw)
To: Paolo Abeni; +Cc: Dan Carpenter, Simon Horman, Linux Kernel Network Developers
On Thu, Aug 24, 2023 at 10:41 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Thu, 2023-08-24 at 10:30 -0400, Jamal Hadi Salim wrote:
> > Dan/Simon,
> > Can someone help explain this error on the code below:
> >
> > ../net/sched/act_blockcast.c:213:9: warning: context imbalance in
> > 'tcf_blockcast_init' - different lock contexts for basic block
>
> IIRC sparse is fooled by lock under conditionals, in this case:
>
> if (exists)
> spin_lock_bh(&p->tcf_lock);
>
> a possible solution would be:
>
> if (exists) {
> spin_lock_bh(&p->tcf_lock);
> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> spin_unlock_bh(&p->tcf_lock);
> } else {
> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> }
>
aha;->
Thanks - this should fix it. We will fix it to follow this pattern.
> Using some additional helpers the code could be less ugly...
I think only one other action(ife) has this pattern - we should be
able to fix that one instead.
cheers,
jamal
> Cheers,
>
> Paolo
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: Weird sparse error WAS( [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action
2023-08-24 14:30 ` Weird sparse error WAS( " Jamal Hadi Salim
2023-08-24 14:41 ` Paolo Abeni
@ 2023-09-05 9:18 ` Dan Carpenter
1 sibling, 0 replies; 16+ messages in thread
From: Dan Carpenter @ 2023-09-05 9:18 UTC (permalink / raw)
To: Jamal Hadi Salim; +Cc: Simon Horman, Linux Kernel Network Developers
On Thu, Aug 24, 2023 at 10:30:18AM -0400, Jamal Hadi Salim wrote:
> Dan/Simon,
> Can someone help explain this error on the code below:
>
> ../net/sched/act_blockcast.c:213:9: warning: context imbalance in
> 'tcf_blockcast_init' - different lock contexts for basic block
>
> Looks like a false positive ...
I maintain Smatch and not Sparse. It is a false positive. Smatch will
parse that code correctly. ;)
regards,
dan carpenter
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use
2023-08-19 16:35 [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Victor Nogueira
` (2 preceding siblings ...)
2023-08-19 16:35 ` [PATCH net-next v2 3/3] net/sched: act_blockcast: Introduce blockcast tc action Victor Nogueira
@ 2023-08-21 19:07 ` Vlad Buslov
2023-08-24 13:47 ` Jamal Hadi Salim
3 siblings, 1 reply; 16+ messages in thread
From: Vlad Buslov @ 2023-08-21 19:07 UTC (permalink / raw)
To: Victor Nogueira
Cc: jhs, xiyou.wangcong, jiri, davem, edumazet, kuba, pabeni, netdev,
mleitner, horms, pctammela, kernel
On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> __context__
> The "tc block" is a collection of netdevs/ports which allow qdiscs to share
> match-action block instances (as opposed to the traditional tc filter per
> netdev/port)[1].
>
> Example setup:
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
>
> Once the block is created we can add a filter using the block index:
> $ tc filter add block 22 protocol ip pref 25 \
> flower dst_ip 192.168.0.0/16 action drop
>
> A packet with dst IP matching 192.168.0.0/16 arriving on the ingress of
> either ens7 or ens8 is dropped.
>
> __this patchset__
> Up to this point in the implementation, the block is unaware of its ports.
> This patch fixes that and makes the tc block ports available to the
> datapath as well as the offload control path (by virtue of the ports being
> in the tc block structure).
Could you elaborate on offload control path? I guess I'm missing
something here because struct flow_cls_offload doesn't seem to include
pointer to the parent tcf_block instance.
>
> For the datapath we provide a use case of the tc block in an action
> we call "blockcast" in patch 3. This action can be used in an example as
> such:
>
> $ tc qdisc add dev ens7 ingress block 22
> $ tc qdisc add dev ens8 ingress block 22
> $ tc qdisc add dev ens9 ingress block 22
> $ tc filter add block 22 protocol ip pref 25 \
> flower dst_ip 192.168.0.0/16 action blockcast
>
> When a packet(matching dst IP 192.168.0.0/16) arrives on the ingress of any
> of ens7, ens8 or ens9 it will be copied to all ports other than itself.
> For example, if it arrives on ens8 then a copy of the packet will be
> "blockcasted";-> to both ens7 and ens9 (unmodified), but not to ens7.
>
> For an offload path, one use case is to "group" all ports belonging to a
> PCI device into the same tc block.
>
> Patch 1 introduces the required infra. Patch 2 exposes the tc block to the
> tc datapath and patch 3 implements datapath usage via a new tc action
> "blockcast".
>
> __Acknowledgements__
> Suggestions from Vlad Buslov and Marcelo Ricardo Leitner made this patchset
> better. The idea of integrating the ports into the tc block was suggested
> by Jiri Pirko.
>
> [1] See commit ca46abd6f89f ("Merge branch 'net-sched-allow-qdiscs-to-share-filter-block-instances'")
>
> Changes in v2:
> - Remove RFC tag
> - Add more details in patch 0(Jiri)
> - When CONFIG_NET_TC_SKB_EXT is selected we have unused qdisc_cb
> Reported-by: kernel test robot <lkp@intel.com> (and horms@kernel.org)
> - Fix bad dev dereference in printk of blockcast action (Simon)
>
> Victor Nogueira (3):
> net/sched: Introduce tc block netdev tracking infra
> net/sched: cls_api: Expose tc block ports to the datapath
> Introduce blockcast tc action
>
> include/net/sch_generic.h | 8 +
> include/net/tc_wrapper.h | 5 +
> net/sched/Kconfig | 13 ++
> net/sched/Makefile | 1 +
> net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
> net/sched/cls_api.c | 11 +-
> net/sched/sch_api.c | 79 +++++++++-
> net/sched/sch_generic.c | 40 ++++-
> 8 files changed, 449 insertions(+), 7 deletions(-)
> create mode 100644 net/sched/act_blockcast.c
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use
2023-08-21 19:07 ` [PATCH net-next v2 0/3] net/sched: Introduce tc block ports tracking and use Vlad Buslov
@ 2023-08-24 13:47 ` Jamal Hadi Salim
0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2023-08-24 13:47 UTC (permalink / raw)
To: Vlad Buslov
Cc: Victor Nogueira, xiyou.wangcong, jiri, davem, edumazet, kuba,
pabeni, netdev, mleitner, horms, pctammela, kernel
On Mon, Aug 21, 2023 at 3:12 PM Vlad Buslov <vladbu@nvidia.com> wrote:
>
> On Sat 19 Aug 2023 at 13:35, Victor Nogueira <victor@mojatatu.com> wrote:
> > __context__
> > The "tc block" is a collection of netdevs/ports which allow qdiscs to share
> > match-action block instances (as opposed to the traditional tc filter per
> > netdev/port)[1].
> >
> > Example setup:
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> >
> > Once the block is created we can add a filter using the block index:
> > $ tc filter add block 22 protocol ip pref 25 \
> > flower dst_ip 192.168.0.0/16 action drop
> >
> > A packet with dst IP matching 192.168.0.0/16 arriving on the ingress of
> > either ens7 or ens8 is dropped.
> >
> > __this patchset__
> > Up to this point in the implementation, the block is unaware of its ports.
> > This patch fixes that and makes the tc block ports available to the
> > datapath as well as the offload control path (by virtue of the ports being
> > in the tc block structure).
>
> Could you elaborate on offload control path? I guess I'm missing
> something here because struct flow_cls_offload doesn't seem to include
> pointer to the parent tcf_block instance.
>
Sorry - that statement was subconsciously over-reaching as far as this
patch is concerned, but talking from P4TC pov, (even though the
current submission for P4TC is s/w only):
A single PCI device is mapped to at least one PF and possibly many VFs
- this gets mapped to a tc block...
Then the tc filter adds the P4 program to a block. The goal then is to
send a table entry towards the driver, once instead of replicating it
many times.
This can be achieved either at a) the tc layer by keeping the entries
per block and only invoke the driver once or b) let the driver
maintain the state (with or without the tc block).
For P4TC either is achievable because the tables are "global". The
challenge is how to get the driver to be aware of the tc block.
To answer your question, the idea is to be able to pass this list of
ports per block to the driver (which as you point out doesnt exist
today, but should be easy to add).
Thoughts?
cheers,
jamal
> >
> > For the datapath we provide a use case of the tc block in an action
> > we call "blockcast" in patch 3. This action can be used in an example as
> > such:
> >
> > $ tc qdisc add dev ens7 ingress block 22
> > $ tc qdisc add dev ens8 ingress block 22
> > $ tc qdisc add dev ens9 ingress block 22
> > $ tc filter add block 22 protocol ip pref 25 \
> > flower dst_ip 192.168.0.0/16 action blockcast
> >
> > When a packet(matching dst IP 192.168.0.0/16) arrives on the ingress of any
> > of ens7, ens8 or ens9 it will be copied to all ports other than itself.
> > For example, if it arrives on ens8 then a copy of the packet will be
> > "blockcasted";-> to both ens7 and ens9 (unmodified), but not to ens7.
> >
> > For an offload path, one use case is to "group" all ports belonging to a
> > PCI device into the same tc block.
> >
> > Patch 1 introduces the required infra. Patch 2 exposes the tc block to the
> > tc datapath and patch 3 implements datapath usage via a new tc action
> > "blockcast".
> >
> > __Acknowledgements__
> > Suggestions from Vlad Buslov and Marcelo Ricardo Leitner made this patchset
> > better. The idea of integrating the ports into the tc block was suggested
> > by Jiri Pirko.
> >
> > [1] See commit ca46abd6f89f ("Merge branch 'net-sched-allow-qdiscs-to-share-filter-block-instances'")
> >
> > Changes in v2:
> > - Remove RFC tag
> > - Add more details in patch 0(Jiri)
> > - When CONFIG_NET_TC_SKB_EXT is selected we have unused qdisc_cb
> > Reported-by: kernel test robot <lkp@intel.com> (and horms@kernel.org)
> > - Fix bad dev dereference in printk of blockcast action (Simon)
> >
> > Victor Nogueira (3):
> > net/sched: Introduce tc block netdev tracking infra
> > net/sched: cls_api: Expose tc block ports to the datapath
> > Introduce blockcast tc action
> >
> > include/net/sch_generic.h | 8 +
> > include/net/tc_wrapper.h | 5 +
> > net/sched/Kconfig | 13 ++
> > net/sched/Makefile | 1 +
> > net/sched/act_blockcast.c | 299 ++++++++++++++++++++++++++++++++++++++
> > net/sched/cls_api.c | 11 +-
> > net/sched/sch_api.c | 79 +++++++++-
> > net/sched/sch_generic.c | 40 ++++-
> > 8 files changed, 449 insertions(+), 7 deletions(-)
> > create mode 100644 net/sched/act_blockcast.c
>
^ permalink raw reply [flat|nested] 16+ messages in thread