* [PATCH net-next v1 2/2] bridge: export multicast database via netlink
2012-11-30 9:58 [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Cong Wang
@ 2012-11-30 9:58 ` Cong Wang
2012-11-30 11:26 ` Thomas Graf
2012-11-30 9:58 ` [PATCH iproute2 v1] Add mdb command to bridge Cong Wang
` (2 subsequent siblings)
3 siblings, 1 reply; 13+ messages in thread
From: Cong Wang @ 2012-11-30 9:58 UTC (permalink / raw)
To: netdev
Cc: Cong Wang, bridge, Herbert Xu, Jesper Dangaard Brouer,
Thomas Graf, Stephen Hemminger, David S. Miller
This patch exports bridge multicast database via netlink
message type RTM_GETMDB. Similar to fdb, but currently bridge-specific.
We may need to support modify multicast database too (RTM_{ADD,DEL}MDB).
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
include/uapi/linux/if_bridge.h | 26 ++++++
include/uapi/linux/rtnetlink.h | 3 +
net/bridge/Makefile | 2 +-
net/bridge/br_mdb.c | 166 ++++++++++++++++++++++++++++++++++++++++
net/bridge/br_multicast.c | 2 +
net/bridge/br_private.h | 2 +
6 files changed, 200 insertions(+), 1 deletions(-)
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b388579..c30c236 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -116,4 +116,30 @@ enum {
__IFLA_BRIDGE_MAX,
};
#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ * [MDBA_MCADDR]
+ * [MDBA_BRPORT_NO]
+ * }
+ * [MDBA_ROUTER] = {
+ * [MDBA_BRPORT_NO]
+ * }
+ */
+enum {
+ MDBA_UNSPEC,
+ MDBA_MDB,
+ MDBA_ROUTER,
+ __MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+ MDBA_MDB_UNSPEC,
+ MDBA_MCADDR,
+ MDBA_BRPORT_NO,
+ __MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 3dee071..0df623f 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -125,6 +125,9 @@ enum {
RTM_GETNETCONF = 82,
#define RTM_GETNETCONF RTM_GETNETCONF
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index d0359ea..e859098 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
new file mode 100644
index 0000000..3751f7d
--- /dev/null
+++ b/net/bridge/br_mdb.c
@@ -0,0 +1,166 @@
+#include <linux/err.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+struct br_port_msg {
+ int ifindex;
+};
+
+static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+ u32 seq, struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_port *p;
+ struct hlist_node *n;
+ struct nlattr *nest;
+
+ if (!br->multicast_router || hlist_empty(&br->router_list)) {
+ printk(KERN_INFO "no router on bridge\n");
+ return 0;
+ }
+
+ nest = nla_nest_start(skb, MDBA_ROUTER);
+ if (nest == NULL)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+ if (p && nla_put_u16(skb, MDBA_BRPORT_NO, p->port_no))
+ goto fail;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+fail:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+ u32 seq, struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_mdb_htable *mdb;
+ struct nlattr *nest;
+ unsigned int i;
+ int s_idx;
+
+ if (br->multicast_disabled) {
+ printk(KERN_INFO "multicast is disabled on bridge\n");
+ return 0;
+ }
+
+ mdb = rcu_dereference(br->mdb);
+ if (!mdb) {
+ printk(KERN_INFO "no mdb on bridge\n");
+ return 0;
+ }
+
+ cb->seq = mdb->seq;
+ s_idx = cb->args[1];
+ if (s_idx >= mdb->max)
+ return 0;
+
+ nest = nla_nest_start(skb, MDBA_MDB);
+ if (nest == NULL)
+ return -EMSGSIZE;
+
+ for (i = s_idx; i < mdb->max; i++) {
+ struct hlist_node *h;
+ struct net_bridge_mdb_entry *mp;
+ struct net_bridge_port_group *p, **pp;
+ struct net_bridge_port *port;
+
+ hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
+ for (pp = &mp->ports;
+ (p = rcu_dereference(*pp)) != NULL;
+ pp = &p->next) {
+ port = p->port;
+ if (port) {
+ printk(KERN_INFO "port %u, mcaddr: %pI4\n", port->port_no, &mp->addr.u.ip4);
+ if (nla_put_u16(skb, MDBA_BRPORT_NO, port->port_no) ||
+ nla_put_be32(skb, MDBA_MCADDR, p->addr.u.ip4))
+ goto fail;
+ }
+ }
+ }
+ }
+
+ cb->args[1] = i;
+ nla_nest_end(skb, nest);
+ return 0;
+fail:
+ cb->args[1] = i;
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net_device *dev;
+ struct net *net = sock_net(skb->sk);
+ struct nlmsghdr *nlh;
+ u32 seq = cb->nlh->nlmsg_seq;
+ int idx = 0, s_idx;
+
+ s_idx = cb->args[0];
+
+ rcu_read_lock();
+
+ for_each_netdev_rcu(net, dev) {
+ if (dev->priv_flags & IFF_EBRIDGE) {
+ struct br_port_msg *bpm;
+
+ if (idx < s_idx)
+ goto cont;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ seq, RTM_GETMDB,
+ sizeof(*bpm), NLM_F_MULTI);
+ if (nlh == NULL)
+ break;
+
+ bpm = nlmsg_data(nlh);
+ bpm->ifindex = dev->ifindex;
+ if (br_mdb_fill_info(skb, cb, seq, dev) < 0) {
+ printk(KERN_INFO "br_mdb_fill_info failed\n");
+ goto fail;
+ }
+ if (br_rports_fill_info(skb, cb, seq, dev) < 0) {
+ printk(KERN_INFO "br_rports_fill_info failed\n");
+ goto fail;
+ }
+
+ nlmsg_end(skb, nlh);
+cont:
+ idx++;
+ }
+ }
+
+ rcu_read_unlock();
+ cb->args[0] = idx;
+ return skb->len;
+
+fail:
+ rcu_read_unlock();
+ nlmsg_cancel(skb, nlh);
+ return skb->len;
+}
+
+void br_mdb_init(void)
+{
+ rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2417434..d53e4f4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -322,6 +322,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
mdb->size = old ? old->size : 0;
mdb->ver = old ? old->ver ^ 1 : 0;
+ mdb->seq = old ? (old->seq + 1): 0;
if (!old || elasticity)
get_random_bytes(&mdb->secret, sizeof(mdb->secret));
@@ -1584,6 +1585,7 @@ void br_multicast_init(struct net_bridge *br)
br_multicast_querier_expired, (unsigned long)br);
setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
(unsigned long)br);
+ br_mdb_init();
}
void br_multicast_open(struct net_bridge *br)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index eb9cd42..6484069 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -105,6 +105,7 @@ struct net_bridge_mdb_htable
u32 max;
u32 secret;
u32 ver;
+ u32 seq;
};
struct net_bridge_port
@@ -432,6 +433,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p,
extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+extern void br_mdb_init(void);
static inline bool br_multicast_is_router(struct net_bridge *br)
{
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 2/2] bridge: export multicast database via netlink
2012-11-30 9:58 ` [PATCH net-next v1 2/2] bridge: export multicast database via netlink Cong Wang
@ 2012-11-30 11:26 ` Thomas Graf
2012-11-30 15:00 ` Cong Wang
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Graf @ 2012-11-30 11:26 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On 11/30/12 at 05:58pm, Cong Wang wrote:
> +enum {
> + MDBA_UNSPEC,
> + MDBA_MDB,
> + MDBA_ROUTER,
> + __MDBA_MAX,
> +};
> +#define MDBA_MAX (__MDBA_MAX - 1)
> +
> +enum {
> + MDBA_MDB_UNSPEC,
> + MDBA_MCADDR,
> + MDBA_BRPORT_NO,
> + __MDBA_MDB_MAX,
> +};
> +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
So juse use these enums from iproute2 directly instead redefining
them.
> diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
> new file mode 100644
> index 0000000..3751f7d
> --- /dev/null
> +++ b/net/bridge/br_mdb.c
> @@ -0,0 +1,166 @@
> +#include <linux/err.h>
> +#include <linux/if_ether.h>
> +#include <linux/igmp.h>
> +#include <linux/kernel.h>
> +#include <linux/netdevice.h>
> +#include <linux/rculist.h>
> +#include <linux/skbuff.h>
> +#include <linux/slab.h>
> +#include <net/ip.h>
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +#include <net/mld.h>
> +#include <net/addrconf.h>
> +#include <net/ip6_checksum.h>
> +#endif
> +
> +#include "br_private.h"
> +
> +struct br_port_msg {
> + int ifindex;
> +};
Make that __u32 ifindex and move the definition into if_bridge.h so
you can reuse it in iproute2.
> +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
> + u32 seq, struct net_device *dev)
> +{
> + struct net_bridge *br = netdev_priv(dev);
> + struct net_bridge_port *p;
> + struct hlist_node *n;
> + struct nlattr *nest;
> +
> + if (!br->multicast_router || hlist_empty(&br->router_list)) {
> + printk(KERN_INFO "no router on bridge\n");
> + return 0;
> + }
> +
> + nest = nla_nest_start(skb, MDBA_ROUTER);
> + if (nest == NULL)
> + return -EMSGSIZE;
> +
> + hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
> + if (p && nla_put_u16(skb, MDBA_BRPORT_NO, p->port_no))
> + goto fail;
> + }
port_no 0 is reserved, right?
We can reduce message size here and make it easier to extend by
using p->port_no as the attribute id by doing something like this:
hlist_for_each_entry_rcu(p, n, &br->router_list, rlist)
if (nla_put_flag(skb, p->port_no))
goto fail;
This will result in an empty attribute body for now and if you ever
need to include more data you can simply start putting attributes
insde that empty body and old users will continue to function.
> +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
> + u32 seq, struct net_device *dev)
> +{
> + struct net_bridge *br = netdev_priv(dev);
> + struct net_bridge_mdb_htable *mdb;
> + struct nlattr *nest;
> + unsigned int i;
> + int s_idx;
> +
> + if (br->multicast_disabled) {
> + printk(KERN_INFO "multicast is disabled on bridge\n");
> + return 0;
> + }
> +
> + mdb = rcu_dereference(br->mdb);
> + if (!mdb) {
> + printk(KERN_INFO "no mdb on bridge\n");
> + return 0;
> + }
> +
> + cb->seq = mdb->seq;
I'm not sure how this is supposed to worl. cb->seq may not change
throughout the complete dump process or the dump will be interrupted
and the user is required to restart.
Each bridge will have its own mdb resulting in a differing seq.
> + s_idx = cb->args[1];
> + if (s_idx >= mdb->max)
> + return 0;
> +
> + nest = nla_nest_start(skb, MDBA_MDB);
> + if (nest == NULL)
> + return -EMSGSIZE;
> +
> + for (i = s_idx; i < mdb->max; i++) {
> + struct hlist_node *h;
> + struct net_bridge_mdb_entry *mp;
> + struct net_bridge_port_group *p, **pp;
> + struct net_bridge_port *port;
> +
> + hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
> + for (pp = &mp->ports;
> + (p = rcu_dereference(*pp)) != NULL;
> + pp = &p->next) {
> + port = p->port;
> + if (port) {
> + printk(KERN_INFO "port %u, mcaddr: %pI4\n", port->port_no, &mp->addr.u.ip4);
> + if (nla_put_u16(skb, MDBA_BRPORT_NO, port->port_no) ||
> + nla_put_be32(skb, MDBA_MCADDR, p->addr.u.ip4))
> + goto fail;
> + }
> + }
> + }
> + }
> +
> + cb->args[1] = i;
> + nla_nest_end(skb, nest);
> + return 0;
> +fail:
> + cb->args[1] = i;
> + nla_nest_cancel(skb, nest);
> + return -EMSGSIZE;
This still relies on the assumption that all of mdb->mhash[] will fit
into a single netlink message. Is that guaranteed? If so that would
simplify this a lot and you wouldn't have to worry about rehashes at
all.
As-is it looks broken, you store an offset into the hash but if you
run out of space you trim away the complete nested attribute again
and thus offseting will actually result in data loss because that
data has never been wired but you will skip over it next time.
You need something like this:
for (i = 0; i < mdb->max; i++) {
[...]
hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
if (idx < s_idx)
goto skip;
hentry = nla_nest_start(...)
if (nla_put_u16(...) ||
nla_put_32(...)) {
nla_nest_cancel(skb, hentry);
err = -EMSGSIZE;
goto out;
}
nla_nest_end(skb, hentry);
skip:
idx++;
}
}
out:
cb->args[1] = idx;
nla_nest_end(skb, nest);
return err;
> +}
> +
> +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +{
> + struct net_device *dev;
> + struct net *net = sock_net(skb->sk);
> + struct nlmsghdr *nlh;
> + u32 seq = cb->nlh->nlmsg_seq;
> + int idx = 0, s_idx;
> +
> + s_idx = cb->args[0];
> +
> + rcu_read_lock();
> +
> + for_each_netdev_rcu(net, dev) {
> + if (dev->priv_flags & IFF_EBRIDGE) {
> + struct br_port_msg *bpm;
> +
> + if (idx < s_idx)
> + goto cont;
> +
> + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
> + seq, RTM_GETMDB,
> + sizeof(*bpm), NLM_F_MULTI);
> + if (nlh == NULL)
> + break;
> +
> + bpm = nlmsg_data(nlh);
> + bpm->ifindex = dev->ifindex;
> + if (br_mdb_fill_info(skb, cb, seq, dev) < 0) {
> + printk(KERN_INFO "br_mdb_fill_info failed\n");
> + goto fail;
> + }
> + if (br_rports_fill_info(skb, cb, seq, dev) < 0) {
> + printk(KERN_INFO "br_rports_fill_info failed\n");
> + goto fail;
> + }
> +
> + nlmsg_end(skb, nlh);
> +cont:
> + idx++;
> + }
> + }
> +
> + rcu_read_unlock();
> + cb->args[0] = idx;
> + return skb->len;
> +
> +fail:
> + rcu_read_unlock();
> + nlmsg_cancel(skb, nlh);
> + return skb->len;
Assuming you implement partial messages as proposed above you don't
want to nlmsg_cancel() here. Instead you just want to nlmsg_end() and
send out the message with a partial MDB_MDBA and continue where you
left off.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 2/2] bridge: export multicast database via netlink
2012-11-30 11:26 ` Thomas Graf
@ 2012-11-30 15:00 ` Cong Wang
2012-11-30 15:27 ` Thomas Graf
0 siblings, 1 reply; 13+ messages in thread
From: Cong Wang @ 2012-11-30 15:00 UTC (permalink / raw)
To: Thomas Graf
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On Fri, 2012-11-30 at 11:26 +0000, Thomas Graf wrote:
> On 11/30/12 at 05:58pm, Cong Wang wrote:
> > +
> > + nest = nla_nest_start(skb, MDBA_ROUTER);
> > + if (nest == NULL)
> > + return -EMSGSIZE;
> > +
> > + hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
> > + if (p && nla_put_u16(skb, MDBA_BRPORT_NO, p->port_no))
> > + goto fail;
> > + }
>
> port_no 0 is reserved, right?
>
> We can reduce message size here and make it easier to extend by
> using p->port_no as the attribute id by doing something like this:
>
> hlist_for_each_entry_rcu(p, n, &br->router_list, rlist)
> if (nla_put_flag(skb, p->port_no))
> goto fail;
>
> This will result in an empty attribute body for now and if you ever
> need to include more data you can simply start putting attributes
> insde that empty body and old users will continue to function.
I don't understand this. nla_put_flag() is used to put a flag (only one
bit set) into a netlink message, so why should we use it to put
p->port_no here? And why port_no 0 matters here?
[...]
> > +
> > + cb->seq = mdb->seq;
>
> I'm not sure how this is supposed to worl. cb->seq may not change
> throughout the complete dump process or the dump will be interrupted
> and the user is required to restart.
>
> Each bridge will have its own mdb resulting in a differing seq.
>
So I should use net->dev_base_seq + mdb->seq ?
All of the rest suggestions are taken by me.
Thanks for your detailed review!
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 2/2] bridge: export multicast database via netlink
2012-11-30 15:00 ` Cong Wang
@ 2012-11-30 15:27 ` Thomas Graf
2012-12-01 3:56 ` Cong Wang
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Graf @ 2012-11-30 15:27 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On 11/30/12 at 11:00pm, Cong Wang wrote:
> I don't understand this. nla_put_flag() is used to put a flag (only one
> bit set) into a netlink message, so why should we use it to put
> p->port_no here? And why port_no 0 matters here?
nla_put_flag() will simply add a netlink attribute with no payload,
i.e. just the header. Assuming that port_no == 0 is invalid the
port_no can be used as attribute id as both are 16bit integers.
It will look like this:
MDBA_ROUTERS = {
{
.nla_len = 4,
.nla_type = <port_no_1>,
},
{
.nla_len = 4,
.nla_type = <port_no_2>,
}
[...]
}
If you ever need to extend this you can just add payload to the
per port attribute and nothing will break.
> So I should use net->dev_base_seq + mdb->seq ?
No you can't, mdb->seq is not stable throughout a dump. What you
can do is save mdb->seq in cb->args[] and in case you continue
dumping from the same mdb in the next call to your dump function
you check if it changed and bump cb->seq if it did to trigger an
interrupt.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 2/2] bridge: export multicast database via netlink
2012-11-30 15:27 ` Thomas Graf
@ 2012-12-01 3:56 ` Cong Wang
0 siblings, 0 replies; 13+ messages in thread
From: Cong Wang @ 2012-12-01 3:56 UTC (permalink / raw)
To: Thomas Graf
Cc: netdev, bridge, Herbert Xu, Jesper Dangaard Brouer,
Stephen Hemminger, David S. Miller
On Fri, 2012-11-30 at 15:27 +0000, Thomas Graf wrote:
> On 11/30/12 at 11:00pm, Cong Wang wrote:
> > I don't understand this. nla_put_flag() is used to put a flag (only one
> > bit set) into a netlink message, so why should we use it to put
> > p->port_no here? And why port_no 0 matters here?
>
> nla_put_flag() will simply add a netlink attribute with no payload,
> i.e. just the header. Assuming that port_no == 0 is invalid the
> port_no can be used as attribute id as both are 16bit integers.
>
> It will look like this:
>
> MDBA_ROUTERS = {
> {
> .nla_len = 4,
> .nla_type = <port_no_1>,
> },
> {
> .nla_len = 4,
> .nla_type = <port_no_2>,
> }
> [...]
> }
>
> If you ever need to extend this you can just add payload to the
> per port attribute and nothing will break.
Never mind, I will use port->dev->ifindex instead of port->port_no. This
will also make the user-space easier.
>
> > So I should use net->dev_base_seq + mdb->seq ?
>
> No you can't, mdb->seq is not stable throughout a dump. What you
> can do is save mdb->seq in cb->args[] and in case you continue
> dumping from the same mdb in the next call to your dump function
> you check if it changed and bump cb->seq if it did to trigger an
> interrupt.
Ok, will do.
Thanks!
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH iproute2 v1] Add mdb command to bridge
2012-11-30 9:58 [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Cong Wang
2012-11-30 9:58 ` [PATCH net-next v1 2/2] bridge: export multicast database via netlink Cong Wang
@ 2012-11-30 9:58 ` Cong Wang
2012-11-30 10:54 ` Thomas Graf
2012-11-30 10:18 ` [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Thomas Graf
2012-11-30 15:52 ` Stephen Hemminger
3 siblings, 1 reply; 13+ messages in thread
From: Cong Wang @ 2012-11-30 9:58 UTC (permalink / raw)
To: netdev
Cc: Cong Wang, bridge, Herbert Xu, Jesper Dangaard Brouer,
Thomas Graf, Stephen Hemminger, David S. Miller
Sample output:
# ./bridge/bridge mdb
bridge dev br0
multicast database:
port no 1, multicast addr 224.8.8.9
port no 2, multicast addr 224.8.8.8
router ports: 2
TODO: display device name too
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
bridge/Makefile | 2 +-
bridge/br_common.h | 35 ++++++++++
bridge/bridge.c | 1 +
bridge/mdb.c | 157 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/rtnetlink.h | 3 +
5 files changed, 197 insertions(+), 1 deletions(-)
diff --git a/bridge/Makefile b/bridge/Makefile
index 9a6743e..67aceb4 100644
--- a/bridge/Makefile
+++ b/bridge/Makefile
@@ -1,4 +1,4 @@
-BROBJ = bridge.o fdb.o monitor.o link.o
+BROBJ = bridge.o fdb.o monitor.o link.o mdb.o
include ../Config
diff --git a/bridge/br_common.h b/bridge/br_common.h
index 718ecb9..83b2b9f 100644
--- a/bridge/br_common.h
+++ b/bridge/br_common.h
@@ -5,6 +5,7 @@ extern int print_fdb(const struct sockaddr_nl *who,
struct nlmsghdr *n, void *arg);
extern int do_fdb(int argc, char **argv);
+extern int do_mdb(int argc, char **argv);
extern int do_monitor(int argc, char **argv);
extern int preferred_family;
@@ -12,3 +13,37 @@ extern int show_stats;
extern int show_detail;
extern int timestamp;
extern struct rtnl_handle rth;
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ * [MDBA_MCADDR]
+ * [MDBA_BRPORT_NO]
+ * }
+ * [MDBA_ROUTER] = {
+ * [MDBA_BRPORT_NO]
+ * }
+ */
+enum {
+ MDBA_UNSPEC,
+ MDBA_MDB,
+ MDBA_ROUTER,
+ __MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+ MDBA_MDB_UNSPEC,
+ MDBA_MCADDR,
+ MDBA_BRPORT_NO,
+ __MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
+struct br_port_msg {
+ int ifindex;
+};
+
+#ifndef MDBA_RTA
+#define MDBA_RTA(r) \
+ ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
+#endif
diff --git a/bridge/bridge.c b/bridge/bridge.c
index e2c33b0..1fcd365 100644
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@@ -43,6 +43,7 @@ static const struct cmd {
int (*func)(int argc, char **argv);
} cmds[] = {
{ "fdb", do_fdb },
+ { "mdb", do_mdb },
{ "monitor", do_monitor },
{ "help", do_help },
{ 0 }
diff --git a/bridge/mdb.c b/bridge/mdb.c
new file mode 100644
index 0000000..f2f4ff2
--- /dev/null
+++ b/bridge/mdb.c
@@ -0,0 +1,157 @@
+/*
+ * Get mdb table with netlink
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+#include <linux/if_ether.h>
+#include <linux/neighbour.h>
+#include <string.h>
+#include <arpa/inet.h>
+
+#include "libnetlink.h"
+#include "br_common.h"
+#include "rt_names.h"
+#include "utils.h"
+
+int filter_index;
+
+static void usage(void)
+{
+ fprintf(stderr, " bridge mdb {show} [ dev DEV ]\n");
+ exit(-1);
+}
+
+static void br_print_router_ports(FILE *f, struct rtattr *attr)
+{
+ uint16_t *port_no;
+ struct rtattr *i;
+ int rem;
+
+ fprintf(f, "router ports: ");
+
+ rem = RTA_PAYLOAD(attr);
+ for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+ port_no = RTA_DATA(i);
+ fprintf(f, "%u ", *port_no);
+ }
+ fprintf(f, "\n");
+}
+
+static void br_print_mdb_entry(FILE *f, struct rtattr *attr)
+{
+ uint32_t addr;
+ uint16_t port_no;
+ struct rtattr *i;
+ int rem;
+ SPRINT_BUF(abuf);
+
+ fprintf(f, "multicast database:\n");
+
+ rem = RTA_PAYLOAD(attr);
+ for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+ port_no = rta_getattr_u16(i);
+ i = RTA_NEXT(i, rem);
+ addr = rta_getattr_u32(i);
+ fprintf(f, "port no %u, multicast addr %s\n", port_no, inet_ntop(AF_INET, &addr, abuf, sizeof(abuf)));
+ }
+}
+
+int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+ FILE *fp = arg;
+ struct br_port_msg *r = NLMSG_DATA(n);
+ int len = n->nlmsg_len;
+ struct rtattr * tb[MDBA_MAX+1];
+
+ if (n->nlmsg_type != RTM_GETMDB) {
+ fprintf(stderr, "Not RTM_MDB: %08x %08x %08x\n",
+ n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+
+ return 0;
+ }
+
+ len -= NLMSG_LENGTH(sizeof(*r));
+ if (len < 0) {
+ fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+ return -1;
+ }
+
+ if (filter_index && filter_index != r->ifindex)
+ return 0;
+
+ if (!filter_index && r->ifindex)
+ fprintf(fp, "bridge dev %s\n", ll_index_to_name(r->ifindex));
+
+ parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+
+ if (tb[MDBA_MDB])
+ br_print_mdb_entry(fp, tb[MDBA_MDB]);
+
+ if (tb[MDBA_ROUTER])
+ br_print_router_ports(fp, tb[MDBA_ROUTER]);
+
+ return 0;
+}
+
+static int mdb_show(int argc, char **argv)
+{
+ char *filter_dev = NULL;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "dev") == 0) {
+ NEXT_ARG();
+ if (filter_dev)
+ duparg("dev", *argv);
+ filter_dev = *argv;
+ }
+ argc--; argv++;
+ }
+
+ if (filter_dev) {
+ filter_index = if_nametoindex(filter_dev);
+ if (filter_index == 0) {
+ fprintf(stderr, "Cannot find device \"%s\"\n",
+ filter_dev);
+ return -1;
+ }
+ }
+
+ if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
+ perror("Cannot send dump request");
+ exit(1);
+ }
+
+ if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
+ fprintf(stderr, "Dump terminated\n");
+ exit(1);
+ }
+
+ return 0;
+}
+
+int do_mdb(int argc, char **argv)
+{
+ ll_init_map(&rth);
+
+ if (argc > 0) {
+ if (matches(*argv, "show") == 0 ||
+ matches(*argv, "lst") == 0 ||
+ matches(*argv, "list") == 0)
+ return mdb_show(argc-1, argv+1);
+ if (matches(*argv, "help") == 0)
+ usage();
+ } else
+ return mdb_show(0, NULL);
+
+ fprintf(stderr, "Command \"%s\" is unknown, try \"bridge mdb help\".\n", *argv);
+ exit(-1);
+}
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0e3e0c1..f400b5e 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -120,6 +120,9 @@ enum {
RTM_SETDCB,
#define RTM_SETDCB RTM_SETDCB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH iproute2 v1] Add mdb command to bridge
2012-11-30 9:58 ` [PATCH iproute2 v1] Add mdb command to bridge Cong Wang
@ 2012-11-30 10:54 ` Thomas Graf
2012-11-30 14:50 ` Cong Wang
0 siblings, 1 reply; 13+ messages in thread
From: Thomas Graf @ 2012-11-30 10:54 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On 11/30/12 at 05:58pm, Cong Wang wrote:
> diff --git a/bridge/br_common.h b/bridge/br_common.h
> index 718ecb9..83b2b9f 100644
> --- a/bridge/br_common.h
> +++ b/bridge/br_common.h
> @@ -5,6 +5,7 @@ extern int print_fdb(const struct sockaddr_nl *who,
> struct nlmsghdr *n, void *arg);
>
> extern int do_fdb(int argc, char **argv);
> +extern int do_mdb(int argc, char **argv);
> extern int do_monitor(int argc, char **argv);
>
> extern int preferred_family;
> @@ -12,3 +13,37 @@ extern int show_stats;
> extern int show_detail;
> extern int timestamp;
> extern struct rtnl_handle rth;
> +
> +/* Bridge multicast database attributes
> + * [MDBA_MDB] = {
> + * [MDBA_MCADDR]
> + * [MDBA_BRPORT_NO]
> + * }
> + * [MDBA_ROUTER] = {
> + * [MDBA_BRPORT_NO]
> + * }
> + */
> +enum {
> + MDBA_UNSPEC,
> + MDBA_MDB,
> + MDBA_ROUTER,
> + __MDBA_MAX,
> +};
> +#define MDBA_MAX (__MDBA_MAX - 1)
> +
> +enum {
> + MDBA_MDB_UNSPEC,
> + MDBA_MCADDR,
> + MDBA_BRPORT_NO,
> + __MDBA_MDB_MAX,
> +};
> +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
> +
> +struct br_port_msg {
> + int ifindex;
> +};
> +
> +#ifndef MDBA_RTA
> +#define MDBA_RTA(r) \
> + ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
> +#endif
You shouldn't need to duplicate the attribute ids and struct br_port_msg
in iproute2. Just put these definitions in a uapi kernel header and
include the header from iproute2.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH iproute2 v1] Add mdb command to bridge
2012-11-30 10:54 ` Thomas Graf
@ 2012-11-30 14:50 ` Cong Wang
0 siblings, 0 replies; 13+ messages in thread
From: Cong Wang @ 2012-11-30 14:50 UTC (permalink / raw)
To: Thomas Graf
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On Fri, 2012-11-30 at 10:54 +0000, Thomas Graf wrote:
>
> You shouldn't need to duplicate the attribute ids and struct br_port_msg
> in iproute2. Just put these definitions in a uapi kernel header and
> include the header from iproute2.
Right, I was confused by include/linux/rtnetlink.h, thought iproute2 has
its own copy of these headers, but they should be exported from kernel.
Thanks!
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA
2012-11-30 9:58 [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Cong Wang
2012-11-30 9:58 ` [PATCH net-next v1 2/2] bridge: export multicast database via netlink Cong Wang
2012-11-30 9:58 ` [PATCH iproute2 v1] Add mdb command to bridge Cong Wang
@ 2012-11-30 10:18 ` Thomas Graf
2012-11-30 14:51 ` Cong Wang
2012-11-30 15:52 ` Stephen Hemminger
3 siblings, 1 reply; 13+ messages in thread
From: Thomas Graf @ 2012-11-30 10:18 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, Stephen Hemminger, David S. Miller,
Jesper Dangaard Brouer
On 11/30/12 at 05:58pm, Cong Wang wrote:
> @@ -168,6 +172,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
> [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
> [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
> [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
> + [IFLA_BRPORT_NO] = { .type = NLA_U16 },
> + [IFLA_BRPORT_ID] = { .type = NLA_U16 },
> };
>
> /* Change the state of the port and notify spanning tree */
I missed this in the first round. Not much of a point in adding policy
entries if the attributes are read-only as in this case.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA
2012-11-30 10:18 ` [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Thomas Graf
@ 2012-11-30 14:51 ` Cong Wang
0 siblings, 0 replies; 13+ messages in thread
From: Cong Wang @ 2012-11-30 14:51 UTC (permalink / raw)
To: Thomas Graf
Cc: netdev, bridge, Herbert Xu, Jesper Dangaard Brouer,
Stephen Hemminger, David S. Miller
On Fri, 2012-11-30 at 10:18 +0000, Thomas Graf wrote:
> On 11/30/12 at 05:58pm, Cong Wang wrote:
> > @@ -168,6 +172,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
> > [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
> > [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
> > [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
> > + [IFLA_BRPORT_NO] = { .type = NLA_U16 },
> > + [IFLA_BRPORT_ID] = { .type = NLA_U16 },
> > };
> >
> > /* Change the state of the port and notify spanning tree */
>
> I missed this in the first round. Not much of a point in adding policy
> entries if the attributes are read-only as in this case.
Yeah, remove these lines now.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA
2012-11-30 9:58 [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Cong Wang
` (2 preceding siblings ...)
2012-11-30 10:18 ` [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA Thomas Graf
@ 2012-11-30 15:52 ` Stephen Hemminger
2012-12-01 3:53 ` Cong Wang
3 siblings, 1 reply; 13+ messages in thread
From: Stephen Hemminger @ 2012-11-30 15:52 UTC (permalink / raw)
To: Cong Wang
Cc: netdev, bridge, Herbert Xu, David S. Miller, Thomas Graf,
Jesper Dangaard Brouer
On Fri, 30 Nov 2012 17:58:32 +0800
Cong Wang <amwang@redhat.com> wrote:
> port_no will be used to get ifindex of a port in user-space,
> export it togather with port_id.
>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Thomas Graf <tgraf@suug.ch>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Signed-off-by: Cong Wang <amwang@redhat.com>
> Acked-by: Thomas Graf <tgraf@suug.ch>
> Signed-off-by: Cong Wang <amwang@redhat.com>
> ---
> include/uapi/linux/if_link.h | 2 ++
> net/bridge/br_netlink.c | 8 +++++++-
> 2 files changed, 9 insertions(+), 1 deletions(-)
>
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index bb58aeb..9cd91a9 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -218,6 +218,8 @@ enum {
> IFLA_BRPORT_MODE, /* mode (hairpin) */
> IFLA_BRPORT_GUARD, /* bpdu guard */
> IFLA_BRPORT_PROTECT, /* root port protection */
> + IFLA_BRPORT_NO, /* port no */
> + IFLA_BRPORT_ID, /* port id */
> __IFLA_BRPORT_MAX
> };
> #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index 65429b9..7b7414e 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -28,6 +28,8 @@ static inline size_t br_port_info_size(void)
> + nla_total_size(1) /* IFLA_BRPORT_MODE */
> + nla_total_size(1) /* IFLA_BRPORT_GUARD */
> + nla_total_size(1) /* IFLA_BRPORT_PROTECT */
> + + nla_total_size(2) /* IFLA_BRPORT_NO */
> + + nla_total_size(2) /* IFLA_BRPORT_ID */
> + 0;
> }
>
> @@ -53,7 +55,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
> nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
> nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
> nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
> - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)))
> + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
> + nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) ||
> + nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id))
> return -EMSGSIZE;
>
> return 0;
> @@ -168,6 +172,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
> [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
> [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
> [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
> + [IFLA_BRPORT_NO] = { .type = NLA_U16 },
> + [IFLA_BRPORT_ID] = { .type = NLA_U16 },
> };
>
> /* Change the state of the port and notify spanning tree */
I don't think these are necessary. The device is already available and the relationship
can be determined from other messages. This is what RSTP daemon does.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v1 1/2] bridge: export port_no and port_id via IFA_INFO_DATA
2012-11-30 15:52 ` Stephen Hemminger
@ 2012-12-01 3:53 ` Cong Wang
0 siblings, 0 replies; 13+ messages in thread
From: Cong Wang @ 2012-12-01 3:53 UTC (permalink / raw)
To: Stephen Hemminger
Cc: netdev, bridge, Herbert Xu, Jesper Dangaard Brouer, Thomas Graf,
David S. Miller
On Fri, 2012-11-30 at 07:52 -0800, Stephen Hemminger wrote:
> I don't think these are necessary. The device is already available and
> the relationship
> can be determined from other messages. This is what RSTP daemon does.
>
Rethinking about it, I think we can just put port->dev->ifindex instead
of port->port_no in the netlink message, so patch 1/2 is not needed at
more.
Thanks!
^ permalink raw reply [flat|nested] 13+ messages in thread