* [Bridge] [RFC PATCH net-next 1/5] skbuff: bridge: Add layer 2 miss indication
2023-05-09 7:04 [Bridge] [RFC PATCH net-next 0/5] Add layer 2 miss indication and filtering Ido Schimmel
@ 2023-05-09 7:04 ` Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 2/5] net/sched: flower: Allow matching on layer 2 miss Ido Schimmel
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Ido Schimmel @ 2023-05-09 7:04 UTC (permalink / raw)
To: netdev, bridge
Cc: petrm, jiri, taspelund, xiyou.wangcong, Ido Schimmel, razor, jhs,
edumazet, roopa, kuba, pabeni, davem
Allow the bridge driver to mark packets that did not match a layer 2
entry during forwarding by adding a 'l2_miss' bit to the skb.
Clear the bit whenever a packet enters the bridge (received from a
bridge port or transmitted via the bridge) and set it if the packet did
not match an FDB/MDB entry.
Subsequent patches will allow the flower classifier to match on this
bit. The motivating use case in non-DF (Designated Forwarder) filtering
where we would like to prevent decapsulated packets from being flooded
to a multi-homed host.
Do not allocate the bit if the kernel was not compiled with bridge
support and place it after the two bit fields in accordance with commit
4c60d04c2888 ("net: skbuff: push nf_trace down the bitfield"). The bit
does not increase the size of the structure as it is placed at an
existing hole. Layout with allmodconfig:
struct sk_buff {
[...]
__u8 csum_not_inet:1; /* 132: 3 1 */
__u8 l2_miss:1; /* 132: 4 1 */
/* XXX 3 bits hole, try to pack */
/* XXX 1 byte hole, try to pack */
__u16 tc_index; /* 134 2 */
u16 alloc_cpu; /* 136 2 */
[...]
} __attribute__((__aligned__(8)));
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
include/linux/skbuff.h | 4 ++++
net/bridge/br_device.c | 1 +
net/bridge/br_forward.c | 3 +++
net/bridge/br_input.c | 1 +
4 files changed, 9 insertions(+)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..c7a84767ed48 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -801,6 +801,7 @@ typedef unsigned char *sk_buff_data_t;
* @encap_hdr_csum: software checksum is needed
* @csum_valid: checksum is already valid
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
+ * @l2_miss: Packet did not match an L2 entry during forwarding
* @csum_complete_sw: checksum was completed by software
* @csum_level: indicates the number of consecutive checksums found in
* the packet minus one that have been verified as
@@ -991,6 +992,9 @@ struct sk_buff {
#if IS_ENABLED(CONFIG_IP_SCTP)
__u8 csum_not_inet:1;
#endif
+#if IS_ENABLED(CONFIG_BRIDGE)
+ __u8 l2_miss:1;
+#endif
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8eca8a5c80c6..91dbdae4afd4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vid = 0;
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+ skb->l2_miss = 0;
rcu_read_lock();
nf_ops = rcu_dereference(nf_br_ops);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 57744704ff69..5893648c4da2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
+ skb->l2_miss = 1;
+
list_for_each_entry_rcu(p, &br->port_list, list) {
/* Do not flood unicast traffic to ports that turn it off, nor
* other traffic if flood off, except for traffic we originate
@@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
allow_mode_include = false;
} else {
p = NULL;
+ skb->l2_miss = 1;
}
while (p || rp) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fc17b9fd93e6..d8ab5890cbe6 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+ skb->l2_miss = 0;
p = br_port_get_rcu(skb->dev);
if (p->flags & BR_VLAN_TUNNEL)
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Bridge] [RFC PATCH net-next 2/5] net/sched: flower: Allow matching on layer 2 miss
2023-05-09 7:04 [Bridge] [RFC PATCH net-next 0/5] Add layer 2 miss indication and filtering Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 1/5] skbuff: bridge: Add layer 2 miss indication Ido Schimmel
@ 2023-05-09 7:04 ` Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 3/5] flow_offload: Reject " Ido Schimmel
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Ido Schimmel @ 2023-05-09 7:04 UTC (permalink / raw)
To: netdev, bridge
Cc: petrm, jiri, taspelund, xiyou.wangcong, Ido Schimmel, razor, jhs,
edumazet, roopa, kuba, pabeni, davem
Add the 'TCA_FLOWER_L2_MISS' netlink attribute that allows user space to
match on packets that encountered a layer 2 miss. The miss indication is
set as metadata in the skb by the bridge driver upon FDB/MDB lookup
miss.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
include/net/flow_dissector.h | 2 ++
include/uapi/linux/pkt_cls.h | 2 ++
net/core/flow_dissector.c | 3 +++
net/sched/cls_flower.c | 14 ++++++++++++--
4 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 85b2281576ed..8b41668c77fc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -243,10 +243,12 @@ struct flow_dissector_key_ip {
* struct flow_dissector_key_meta:
* @ingress_ifindex: ingress ifindex
* @ingress_iftype: ingress interface type
+ * @l2_miss: packet did not match an L2 entry during forwarding
*/
struct flow_dissector_key_meta {
int ingress_ifindex;
u16 ingress_iftype;
+ u8 l2_miss;
};
/**
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 648a82f32666..00933dda7b10 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -594,6 +594,8 @@ enum {
TCA_FLOWER_KEY_L2TPV3_SID, /* be32 */
+ TCA_FLOWER_L2_MISS, /* u8 */
+
__TCA_FLOWER_MAX,
};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 25fb0bbc310f..3776c7bdd228 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -241,6 +241,9 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_META,
target_container);
meta->ingress_ifindex = skb->skb_iif;
+#if IS_ENABLED(CONFIG_BRIDGE)
+ meta->l2_miss = skb->l2_miss;
+#endif
}
EXPORT_SYMBOL(skb_flow_dissect_meta);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9dbc43388e57..4eb06c6367fc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -615,7 +615,8 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
- [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_UNSPEC] = { .strict_start_type =
+ TCA_FLOWER_L2_MISS },
[TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
[TCA_FLOWER_INDEV] = { .type = NLA_STRING,
.len = IFNAMSIZ },
@@ -720,7 +721,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
-
+ [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
};
static const struct nla_policy
@@ -1668,6 +1669,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
mask->meta.ingress_ifindex = 0xffffffff;
}
+ fl_set_key_val(tb, &key->meta.l2_miss, TCA_FLOWER_L2_MISS,
+ &mask->meta.l2_miss, TCA_FLOWER_UNSPEC,
+ sizeof(key->meta.l2_miss));
+
fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst));
@@ -3074,6 +3079,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
}
+ if (fl_dump_key_val(skb, &key->meta.l2_miss,
+ TCA_FLOWER_L2_MISS, &mask->meta.l2_miss,
+ TCA_FLOWER_UNSPEC, sizeof(key->meta.l2_miss)))
+ goto nla_put_failure;
+
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst)) ||
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Bridge] [RFC PATCH net-next 3/5] flow_offload: Reject matching on layer 2 miss
2023-05-09 7:04 [Bridge] [RFC PATCH net-next 0/5] Add layer 2 miss indication and filtering Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 1/5] skbuff: bridge: Add layer 2 miss indication Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 2/5] net/sched: flower: Allow matching on layer 2 miss Ido Schimmel
@ 2023-05-09 7:04 ` Ido Schimmel
2023-05-10 9:57 ` [Bridge] [EXT] " Elad Nachman
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 4/5] mlxsw: spectrum_flower: Add ability to match " Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 5/5] selftests: forwarding: Add layer 2 miss test cases Ido Schimmel
4 siblings, 1 reply; 7+ messages in thread
From: Ido Schimmel @ 2023-05-09 7:04 UTC (permalink / raw)
To: netdev, bridge
Cc: petrm, jiri, taspelund, xiyou.wangcong, Ido Schimmel, razor, jhs,
edumazet, roopa, kuba, pabeni, davem
Adjust drivers that support the 'FLOW_DISSECTOR_KEY_META' key to reject
filters that try to match on the newly added layer 2 miss option. Add an
extack message to clearly communicate the failure reason to user space.
Example:
# tc filter add dev swp1 egress pref 1 proto all flower skip_sw l2_miss true action drop
Error: mlxsw_spectrum: Can't match on "l2_miss".
We have an error talking to the kernel
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
.../net/ethernet/marvell/prestera/prestera_flower.c | 6 ++++++
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++++++
drivers/net/ethernet/mscc/ocelot_flower.c | 10 ++++++++++
4 files changed, 28 insertions(+)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
index 91a478b75cbf..3e20e71b0f81 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -148,6 +148,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
__be16 key, mask;
flow_rule_match_meta(f_rule, &match);
+
+ if (match.mask->l2_miss) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
+ return -EOPNOTSUPP;
+ }
+
if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
NL_SET_ERR_MSG_MOD(f->common.extack,
"Unsupported ingress ifindex mask");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 728b82ce4031..516653568330 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2586,6 +2586,12 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
return 0;
flow_rule_match_meta(rule, &match);
+
+ if (match.mask->l2_miss) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
+ return -EOPNOTSUPP;
+ }
+
if (!match.mask->ingress_ifindex)
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 594cdcb90b3d..6fec9223250b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -294,6 +294,12 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
return 0;
flow_rule_match_meta(rule, &match);
+
+ if (match.mask->l2_miss) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
+ return -EOPNOTSUPP;
+ }
+
if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
return -EINVAL;
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index ee052404eb55..e0916afcddfb 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -592,6 +592,16 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
return -EOPNOTSUPP;
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+ if (match.mask->l2_miss) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't match on \"l2_miss\"");
+ return -EOPNOTSUPP;
+ }
+ }
+
/* For VCAP ES0 (egress rewriter) we can match on the ingress port */
if (!ingress) {
ret = ocelot_flower_parse_indev(ocelot, port, f, filter);
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [Bridge] [EXT] [RFC PATCH net-next 3/5] flow_offload: Reject matching on layer 2 miss
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 3/5] flow_offload: Reject " Ido Schimmel
@ 2023-05-10 9:57 ` Elad Nachman
0 siblings, 0 replies; 7+ messages in thread
From: Elad Nachman @ 2023-05-10 9:57 UTC (permalink / raw)
To: Ido Schimmel, netdev@vger.kernel.org,
bridge@lists.linux-foundation.org
Cc: petrm@nvidia.com, jiri@resnulli.us, taspelund@nvidia.com,
xiyou.wangcong@gmail.com, razor@blackwall.org, jhs@mojatatu.com,
edumazet@google.com, roopa@nvidia.com, kuba@kernel.org,
pabeni@redhat.com, davem@davemloft.net
> -----Original Message-----
> From: Ido Schimmel <idosch@nvidia.com>
> Sent: Tuesday, May 9, 2023 10:05 AM
> To: netdev@vger.kernel.org; bridge@lists.linux-foundation.org
> Cc: davem@davemloft.net; kuba@kernel.org; pabeni@redhat.com;
> edumazet@google.com; razor@blackwall.org; roopa@nvidia.com;
> jhs@mojatatu.com; xiyou.wangcong@gmail.com; jiri@resnulli.us;
> petrm@nvidia.com; taspelund@nvidia.com; Ido Schimmel
> <idosch@nvidia.com>
> Subject: [EXT] [RFC PATCH net-next 3/5] flow_offload: Reject matching on
> layer 2 miss
>
> External Email
>
> ----------------------------------------------------------------------
> Adjust drivers that support the 'FLOW_DISSECTOR_KEY_META' key to reject
> filters that try to match on the newly added layer 2 miss option. Add an
> extack message to clearly communicate the failure reason to user space.
>
> Example:
>
> # tc filter add dev swp1 egress pref 1 proto all flower skip_sw l2_miss true
> action drop
> Error: mlxsw_spectrum: Can't match on "l2_miss".
> We have an error talking to the kernel
>
> Signed-off-by: Ido Schimmel <idosch@nvidia.com>
> ---
> .../net/ethernet/marvell/prestera/prestera_flower.c | 6 ++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
> drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++++++
> drivers/net/ethernet/mscc/ocelot_flower.c | 10 ++++++++++
> 4 files changed, 28 insertions(+)
>
> diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
> b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
> index 91a478b75cbf..3e20e71b0f81 100644
> --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
> +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
> @@ -148,6 +148,12 @@ static int prestera_flower_parse_meta(struct
> prestera_acl_rule *rule,
> __be16 key, mask;
>
> flow_rule_match_meta(f_rule, &match);
> +
> + if (match.mask->l2_miss) {
> + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on
> \"l2_miss\"");
> + return -EOPNOTSUPP;
> + }
> +
> if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
> NL_SET_ERR_MSG_MOD(f->common.extack,
> "Unsupported ingress ifindex mask"); diff --
> git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> index 728b82ce4031..516653568330 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> @@ -2586,6 +2586,12 @@ static int mlx5e_flower_parse_meta(struct
> net_device *filter_dev,
> return 0;
>
> flow_rule_match_meta(rule, &match);
> +
> + if (match.mask->l2_miss) {
> + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on
> \"l2_miss\"");
> + return -EOPNOTSUPP;
> + }
> +
> if (!match.mask->ingress_ifindex)
> return 0;
>
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
> b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
> index 594cdcb90b3d..6fec9223250b 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
> @@ -294,6 +294,12 @@ static int mlxsw_sp_flower_parse_meta(struct
> mlxsw_sp_acl_rule_info *rulei,
> return 0;
>
> flow_rule_match_meta(rule, &match);
> +
> + if (match.mask->l2_miss) {
> + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on
> \"l2_miss\"");
> + return -EOPNOTSUPP;
> + }
> +
> if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
> NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported
> ingress ifindex mask");
> return -EINVAL;
> diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c
> b/drivers/net/ethernet/mscc/ocelot_flower.c
> index ee052404eb55..e0916afcddfb 100644
> --- a/drivers/net/ethernet/mscc/ocelot_flower.c
> +++ b/drivers/net/ethernet/mscc/ocelot_flower.c
> @@ -592,6 +592,16 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int
> port, bool ingress,
> return -EOPNOTSUPP;
> }
>
> + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
> + struct flow_match_meta match;
> +
> + flow_rule_match_meta(rule, &match);
> + if (match.mask->l2_miss) {
> + NL_SET_ERR_MSG_MOD(extack, "Can't match on
> \"l2_miss\"");
> + return -EOPNOTSUPP;
> + }
> + }
> +
> /* For VCAP ES0 (egress rewriter) we can match on the ingress port
> */
> if (!ingress) {
> ret = ocelot_flower_parse_indev(ocelot, port, f, filter);
> --
> 2.40.1
>
I have reviewed the prestera part. Looks straightforward enough.
Acked-by: Elad Nachman <enachman@marvell.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Bridge] [RFC PATCH net-next 4/5] mlxsw: spectrum_flower: Add ability to match on layer 2 miss
2023-05-09 7:04 [Bridge] [RFC PATCH net-next 0/5] Add layer 2 miss indication and filtering Ido Schimmel
` (2 preceding siblings ...)
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 3/5] flow_offload: Reject " Ido Schimmel
@ 2023-05-09 7:04 ` Ido Schimmel
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 5/5] selftests: forwarding: Add layer 2 miss test cases Ido Schimmel
4 siblings, 0 replies; 7+ messages in thread
From: Ido Schimmel @ 2023-05-09 7:04 UTC (permalink / raw)
To: netdev, bridge
Cc: petrm, jiri, taspelund, xiyou.wangcong, Ido Schimmel, razor, jhs,
edumazet, roopa, kuba, pabeni, davem
Add the 'dmac_type' key element to supported key blocks and make use of
it to match on layer 2 miss.
This is a two bits key in hardware with the following values:
00b - Known multicast.
01b - Broadcast.
10b - Known unicast.
11b - Unknown unicast or unregistered multicast.
When 'l2_miss' is set we need to match on 01b or 11b. Therefore, only
match on the LSB in order to differentiate between both cases of
'l2_miss'.
Tested on Spectrum-{1,2,3,4}.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
.../mellanox/mlxsw/core_acl_flex_keys.c | 1 +
.../mellanox/mlxsw/core_acl_flex_keys.h | 3 ++-
.../mellanox/mlxsw/spectrum_acl_flex_keys.c | 5 +++++
.../ethernet/mellanox/mlxsw/spectrum_flower.c | 20 ++++++++++++++-----
4 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index bd1a51a0a540..81af0b9a4329 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -42,6 +42,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
+ MLXSW_AFK_ELEMENT_INFO_U32(DMAC_TYPE, 0x40, 0, 2),
};
struct mlxsw_afk {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 3a037fe47211..6f1649cfa4cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -35,6 +35,7 @@ enum mlxsw_afk_element {
MLXSW_AFK_ELEMENT_IP_DSCP,
MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB,
+ MLXSW_AFK_ELEMENT_DMAC_TYPE,
MLXSW_AFK_ELEMENT_MAX,
};
@@ -69,7 +70,7 @@ struct mlxsw_afk_element_info {
MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \
_element, _offset, 0, _size)
-#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
+#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x44
struct mlxsw_afk_element_inst { /* element instance in actual block */
enum mlxsw_afk_element element;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 00c32320f891..18a968cded36 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -26,6 +26,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x02, 2),
MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(DMAC_TYPE, 0x08, 0, 2),
MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
};
@@ -50,6 +51,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(DMAC_TYPE, 0x00, 24, 2),
MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
@@ -78,6 +80,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(DMAC_TYPE, 0x00, 30, 2),
MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
};
@@ -123,6 +126,7 @@ const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(DMAC_TYPE, 0x00, 0, 2),
MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4),
};
@@ -313,6 +317,7 @@ const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = {
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(DMAC_TYPE, 0x00, 2, 2),
MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 18, 12),
MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 9, -1, true), /* RX_ACL_SYSTEM_PORT */
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 6fec9223250b..170a07f35897 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -295,11 +295,6 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
flow_rule_match_meta(rule, &match);
- if (match.mask->l2_miss) {
- NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\"");
- return -EOPNOTSUPP;
- }
-
if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask");
return -EINVAL;
@@ -327,6 +322,21 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
mlxsw_sp_port->local_port,
0xFFFFFFFF);
+
+ /* This is a two bits key in hardware with the following values:
+ * 00b - Known multicast.
+ * 01b - Broadcast.
+ * 10b - Known unicast.
+ * 11b - Unknown unicast or unregistered multicast.
+ *
+ * When 'l2_miss' is set we need to match on 01b or 11b. Therefore,
+ * only match on the LSB in order to differentiate between both cases
+ * of 'l2_miss'.
+ */
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DMAC_TYPE,
+ match.key->l2_miss,
+ match.mask->l2_miss & BIT(0));
+
return 0;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Bridge] [RFC PATCH net-next 5/5] selftests: forwarding: Add layer 2 miss test cases
2023-05-09 7:04 [Bridge] [RFC PATCH net-next 0/5] Add layer 2 miss indication and filtering Ido Schimmel
` (3 preceding siblings ...)
2023-05-09 7:04 ` [Bridge] [RFC PATCH net-next 4/5] mlxsw: spectrum_flower: Add ability to match " Ido Schimmel
@ 2023-05-09 7:04 ` Ido Schimmel
4 siblings, 0 replies; 7+ messages in thread
From: Ido Schimmel @ 2023-05-09 7:04 UTC (permalink / raw)
To: netdev, bridge
Cc: petrm, jiri, taspelund, xiyou.wangcong, Ido Schimmel, razor, jhs,
edumazet, roopa, kuba, pabeni, davem
Add test cases to verify that the bridge driver correctly marks layer 2
misses only when it should and that the flower classifier can match on
this metadata.
Example output:
# ./tc_flower_l2_miss.sh
TEST: L2 miss - Unicast [ OK ]
TEST: L2 miss - Multicast (IPv4) [ OK ]
TEST: L2 miss - Multicast (IPv6) [ OK ]
TEST: L2 miss - Link-local multicast (IPv4) [ OK ]
TEST: L2 miss - Link-local multicast (IPv6) [ OK ]
TEST: L2 miss - Broadcast [ OK ]
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
.../testing/selftests/net/forwarding/Makefile | 1 +
.../net/forwarding/tc_flower_l2_miss.sh | 343 ++++++++++++++++++
2 files changed, 344 insertions(+)
create mode 100755 tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index a474c60fe348..9d0062b542e5 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -83,6 +83,7 @@ TEST_PROGS = bridge_igmp.sh \
tc_chains.sh \
tc_flower_router.sh \
tc_flower.sh \
+ tc_flower_l2_miss.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
new file mode 100755
index 000000000000..fbf0a960b2c8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -0,0 +1,343 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | |
+# +----|------------------+ +------------------|---+
+# | |
+# +----|-------------------------------------------------------------------|---+
+# | SW | | |
+# | +-|-------------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_l2_miss_unicast
+ test_l2_miss_multicast
+ test_l2_miss_ll_multicast
+ test_l2_miss_broadcast
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+test_l2_miss_unicast()
+{
+ local dmac=00:01:02:03:04:05
+ local dip=192.0.2.2
+ local sip=192.0.2.1
+
+ RET=0
+
+ # Unknown unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \
+ flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+ # Known unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \
+ flower indev $swp1 l2_miss false dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ # Before adding FDB entry.
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was not hit before adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Known unicast filter was hit before adding FDB entry"
+
+ # Adding FDB entry.
+ bridge fdb replace $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was hit after adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was not hit after adding FDB entry"
+
+ # Deleting FDB entry.
+ bridge fdb del $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unknown unicast filter was not hit after deleting FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was hit after deleting FDB entry"
+
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower
+
+ log_test "L2 miss - Unicast"
+}
+
+test_l2_miss_multicast_common()
+{
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ # Unregistered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss true src_ip $sip dst_ip $dip \
+ action pass
+ # Registered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \
+ flower indev $swp1 l2_miss false src_ip $sip dst_ip $dip \
+ action pass
+
+ # Before adding MDB entry.
+ $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Registered multicast filter was hit before adding MDB entry"
+
+ # Adding MDB entry.
+ bridge mdb replace dev br1 port $swp2 grp $dip permanent
+
+ $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was hit after adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was not hit after adding MDB entry"
+
+ # Deleting MDB entry.
+ bridge mdb del dev br1 port $swp2 grp $dip
+
+ $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was hit after deleting MDB entry"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Multicast ($name)"
+}
+
+test_l2_miss_multicast_ipv4()
+{
+ local proto="ipv4"
+ local sip=192.0.2.1
+ local dip=239.1.1.1
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_multicast_common $proto $sip $dip $mode $name
+}
+
+test_l2_miss_multicast_ipv6()
+{
+ local proto="ipv6"
+ local sip=2001:db8:1::1
+ local dip=ff0e::1
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_multicast_common $proto $sip $dip $mode $name
+}
+
+test_l2_miss_multicast()
+{
+ # Configure $swp2 as a multicast router port so that it will forward
+ # both registered and unregistered multicast traffic.
+ bridge link set dev $swp2 mcast_router 2
+
+ # Forwarding according to MDB entries only takes place when the bridge
+ # detects that there is a valid querier in the network. Set the bridge
+ # as the querier and assign it a valid IPv6 link-local address to be
+ # used as the source address for MLD queries.
+ ip link set dev br1 type bridge mcast_querier 1
+ ip -6 address add fe80::1/64 nodad dev br1
+ # Wait the default Query Response Interval (10 seconds) for the bridge
+ # to determine that there are no other queriers in the network.
+ sleep 10
+
+ test_l2_miss_multicast_ipv4
+ test_l2_miss_multicast_ipv6
+
+ ip -6 address del fe80::1/64 dev br1
+ ip link set dev br1 type bridge mcast_querier 0
+ bridge link set dev $swp2 mcast_router 1
+}
+
+test_l2_miss_multicast_common2()
+{
+ local name=$1; shift
+ local dmac=$1; shift
+ local dip=224.0.0.1
+ local sip=192.0.2.1
+
+}
+
+test_l2_miss_ll_multicast_common()
+{
+ local proto=$1; shift
+ local dmac=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Filter was not hit"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Link-local multicast ($name)"
+}
+
+test_l2_miss_ll_multicast_ipv4()
+{
+ local proto=ipv4
+ local dmac=01:00:5e:00:00:01
+ local sip=192.0.2.1
+ local dip=224.0.0.1
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast_ipv6()
+{
+ local proto=ipv6
+ local dmac=33:33:00:00:00:01
+ local sip=2001:db8:1::1
+ local dip=ff02::1
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast()
+{
+ test_l2_miss_ll_multicast_ipv4
+ test_l2_miss_ll_multicast_ipv6
+}
+
+test_l2_miss_broadcast()
+{
+ local dmac=ff:ff:ff:ff:ff:ff
+ local smac=00:01:02:03:04:05
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol all handle 101 pref 1 \
+ flower indev $swp1 l2_miss true dst_mac $dmac src_mac $smac \
+ action pass
+
+ $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Filter was not hit"
+
+ tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower
+
+ log_test "L2 miss - Broadcast"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread