Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH V2 07/12] bridge: Add netlink interface to configure vlans on bridge ports
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

Add a netlink interface to add and remove vlan configuration on bridge port.
The interface uses the RTM_SETLINK message and encodes the vlan
configuration inside the IFLA_AF_SPEC.  It is possble to include multiple
vlans to either add or remove in a single message.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/uapi/linux/if_bridge.h |   17 ++++++
 net/bridge/br_if.c             |    1 +
 net/bridge/br_netlink.c        |  107 +++++++++++++++++++++++++++++++++------
 3 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 52aa738..d0b4f5c 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -108,15 +108,32 @@ struct __fdb_entry {
  * [IFLA_AF_SPEC] = {
  *     [IFLA_BRIDGE_FLAGS]
  *     [IFLA_BRIDGE_MODE]
+ *     [IFLA_BRIDGE_VLAN_INFO]
  * }
  */
 enum {
 	IFLA_BRIDGE_FLAGS,
 	IFLA_BRIDGE_MODE,
+	IFLA_BRIDGE_VLAN_INFO,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
 
+/* Bridge VLAN info
+ * [IFLA_BRIDGE_VLAN_INFO]
+ */
+enum {
+	BR_VLAN_ADD,
+	BR_VLAN_DEL,
+};
+
+struct bridge_vlan_info {
+	u16 op_code;
+	u16 flags;
+	u16 vid;
+	u16 unused;
+};
+
 /* Bridge multicast database attributes
  * [MDBA_MDB] = {
  *     [MDBA_MDB_ENTRY] = {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 14c7c6a..57bbb35 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -23,6 +23,7 @@
 #include <linux/if_ether.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <linux/if_vlan.h>
 
 #include "br_private.h"
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index dead9df..9cf2879 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -123,6 +124,9 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	if (!port)
+		return;
+
 	br_debug(port->br, "port %u(%s) event %d\n",
 		 (unsigned int)port->port_no, port->dev->name, event);
 
@@ -162,6 +166,60 @@ out:
 	return err;
 }
 
+const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
+	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct bridge_vlan_info), },
+};
+
+static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
+		     struct nlattr *af_spec)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	int err = 0;
+
+	if (nla_type(af_spec) != AF_BRIDGE)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
+		struct bridge_vlan_info *vinfo;
+
+		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+
+		if (vinfo->vid > VLAN_N_VID)
+			return -EINVAL;
+
+		switch (vinfo->op_code) {
+		case BR_VLAN_ADD:
+			if (p)
+				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			else {
+				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
+				if (!br_vlan_add(br, vinfo->vid, flags))
+					err = -ENOMEM;
+			}
+			break;
+
+		case BR_VLAN_DEL:
+			if (p)
+				err = nbp_vlan_delete(p, vinfo->vid,
+						      vinfo->flags);
+			else {
+				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
+				err = br_vlan_delete(br, vinfo->vid, flags);
+			}
+			break;
+		}
+	}
+
+	return err;
+}
+
 static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_STATE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_COST]	= { .type = NLA_U32 },
@@ -238,6 +296,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
+	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
 	int err;
@@ -245,35 +304,49 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 	ifm = nlmsg_data(nlh);
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
-	if (!protinfo)
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!protinfo && !afspec)
 		return 0;
 
 	p = br_port_get_rtnl(dev);
-	if (!p)
+	/* We want to accept dev as bridge itself if the AF_SPEC
+	 * is set to see if someone is setting vlan info on the brigde.
+	 */
+	if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec))
 		return -EINVAL;
 
-	if (protinfo->nla_type & NLA_F_NESTED) {
-		err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
-				       protinfo, ifla_brport_policy);
+	if (p && protinfo) {
+		if (protinfo->nla_type & NLA_F_NESTED) {
+			err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+					       protinfo, ifla_brport_policy);
+			if (err)
+				return err;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_setport(p, tb);
+			spin_unlock_bh(&p->br->lock);
+		} else {
+			/* Binary compatability with old RSTP */
+			if (nla_len(protinfo) < sizeof(u8))
+				return -EINVAL;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_set_port_state(p, nla_get_u8(protinfo));
+			spin_unlock_bh(&p->br->lock);
+		}
 		if (err)
-			return err;
-
-		spin_lock_bh(&p->br->lock);
-		err = br_setport(p, tb);
-		spin_unlock_bh(&p->br->lock);
-	} else {
-		/* Binary compatability with old RSTP */
-		if (nla_len(protinfo) < sizeof(u8))
-			return -EINVAL;
+			goto out;
+	}
 
-		spin_lock_bh(&p->br->lock);
-		err = br_set_port_state(p, nla_get_u8(protinfo));
-		spin_unlock_bh(&p->br->lock);
+	if (afspec) {
+		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+				afspec);
 	}
 
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+out:
 	return err;
 }
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 06/12] bridge: Add vlan id to multicast groups
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

Add vlan_id to multicasts groups so that we know which vlan
each group belongs to and can correctly forward to appropriate vlan.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/bridge/br_multicast.c |   64 +++++++++++++++++++++++++++++++--------------
 net/bridge/br_private.h   |    1 +
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 68e375a..072aa2d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -51,6 +51,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 {
 	if (a->proto != b->proto)
 		return 0;
+	if (a->vid != b->vid)
+		return 0;
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
@@ -62,16 +64,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	return 0;
 }
 
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
+				__u16 vid)
 {
-	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
+	return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
-				const struct in6_addr *ip)
+				const struct in6_addr *ip,
+				__u16 vid)
 {
-	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
+	return jhash_2words(ipv6_addr_hash(ip), vid,
+			    mdb->secret) & (mdb->max - 1);
 }
 #endif
 
@@ -80,10 +85,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 {
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
-		return __br_ip4_hash(mdb, ip->u.ip4);
+		return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return __br_ip6_hash(mdb, &ip->u.ip6);
+		return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
 #endif
 	}
 	return 0;
@@ -113,24 +118,27 @@ static struct net_bridge_mdb_entry *br_mdb_ip_get(
 }
 
 static struct net_bridge_mdb_entry *br_mdb_ip4_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst)
+	struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct net_bridge_mdb_entry *br_mdb_ip6_get(
-	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
+	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
+	__u16 vid)
 {
 	struct br_ip br_dst;
 
 	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
+	br_dst.vid = vid;
 
 	return br_mdb_ip_get(mdb, &br_dst);
 }
@@ -692,7 +700,8 @@ err:
 
 static int br_ip4_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      __be32 group)
+				      __be32 group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -701,6 +710,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -708,7 +718,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
-				      const struct in6_addr *group)
+				      const struct in6_addr *group,
+				      __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -717,6 +728,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	return br_multicast_add_group(br, port, &br_group);
 }
@@ -928,7 +940,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip4_multicast_add_group(br, port, group);
+		err = br_ip4_multicast_add_group(br, port, group,
+						 br_get_vlan(skb));
 		if (err)
 			break;
 	}
@@ -988,7 +1001,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
+		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
+						 br_get_vlan(skb));
 		if (!err)
 			break;
 	}
@@ -1106,7 +1120,8 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
+	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group,
+			    br_get_vlan(skb));
 	if (!mp)
 		goto out;
 
@@ -1178,7 +1193,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
+	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group,
+			    br_get_vlan(skb));
 	if (!mp)
 		goto out;
 
@@ -1283,7 +1299,8 @@ out:
 
 static void br_ip4_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 __be32 group)
+					 __be32 group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1292,6 +1309,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1299,7 +1317,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
-					 const struct in6_addr *group)
+					 const struct in6_addr *group,
+					 __u16 vid)
 {
 	struct br_ip br_group;
 
@@ -1308,6 +1327,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
+	br_group.vid = vid;
 
 	br_multicast_leave_group(br, port, &br_group);
 }
@@ -1390,7 +1410,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip4_multicast_add_group(br, port, ih->group);
+		err = br_ip4_multicast_add_group(br, port, ih->group,
+						 br_get_vlan(skb2));
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		err = br_ip4_multicast_igmp3_report(br, port, skb2);
@@ -1399,7 +1420,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		err = br_ip4_multicast_query(br, port, skb2);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
-		br_ip4_multicast_leave_group(br, port, ih->group);
+		br_ip4_multicast_leave_group(br, port, ih->group,
+					     br_get_vlan(skb2));
 		break;
 	}
 
@@ -1519,7 +1541,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
 		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
+		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca,
+						 br_get_vlan(skb2));
 		break;
 	    }
 	case ICMPV6_MLD2_REPORT:
@@ -1536,7 +1559,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 			goto out;
 		}
 		mld = (struct mld_msg *)skb_transport_header(skb2);
-		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
+		br_ip6_multicast_leave_group(br, port, &mld->mld_mca,
+					     br_get_vlan(skb2));
 	    }
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7a28900..2569afb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -63,6 +63,7 @@ struct br_ip
 #endif
 	} u;
 	__be16		proto;
+	__u16		vid;
 };
 
 #define BR_INVALID_VID	(1<<15)
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 05/12] bridge: Add vlan to unicast fdb entries
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

This patch adds vlan to unicast fdb entries that are created for
learned addresses (not the manually configured ones).  It adds
vlan id into the hash mix and uses vlan as an addditional parameter
for an entry match.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/uapi/linux/if_bridge.h |    2 +-
 net/bridge/br_device.c         |    6 ++-
 net/bridge/br_fdb.c            |   70 +++++++++++++++++++++++----------------
 net/bridge/br_input.c          |   16 +++++----
 net/bridge/br_private.h        |    7 +++-
 5 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 9a0f6ff..52aa738 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -94,7 +94,7 @@ struct __fdb_entry {
 	__u32 ageing_timer_value;
 	__u8 port_hi;
 	__u8 pad0;
-	__u16 unused;
+	__u16 fdb_vid;
 };
 
 /* Bridge Flags */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 57c5bac..1f9d0f9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -31,6 +31,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 	struct net_bridge_vlan *vlan;
+	u16 vid;
 
 	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -51,7 +52,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Any vlan transmitted by the bridge itself is permitted.
 	 * Try to cache the vlan in the CB to speed up forwarding.
 	 */
-	vlan = br_vlan_find(br, br_get_vlan(skb));
+	vid = br_get_vlan(skb);
+	vlan = br_vlan_find(br, vid);
 	if (vlan)
 		BR_INPUT_SKB_CB(skb)->vlan = vlan;
 
@@ -75,7 +77,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			br_multicast_deliver(mdst, skb);
 		else
 			br_flood_deliver(br, skb);
-	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
+	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
 		br_deliver(dst->dst, skb);
 	else
 		br_flood_deliver(br, skb);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9576e6..a244efc 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <asm/unaligned.h>
+#include <linux/if_vlan.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -67,11 +68,11 @@ static inline int has_expired(const struct net_bridge *br,
 		time_before_eq(fdb->updated + hold_time(br), jiffies);
 }
 
-static inline int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
 {
-	/* use 1 byte of OUI cnd 3 bytes of NIC */
+	/* use 1 byte of OUI and 3 bytes of NIC */
 	u32 key = get_unaligned((u32 *)(mac + 2));
-	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
+	return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
 }
 
 static void fdb_rcu_free(struct rcu_head *head)
@@ -132,7 +133,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	struct net_bridge_fdb_entry *f;
 
 	/* If old entry was unassociated with any port, then delete it. */
-	f = __br_fdb_get(br, br->dev->dev_addr);
+	f = __br_fdb_get(br, br->dev->dev_addr, 0);
 	if (f && f->is_local && !f->dst)
 		fdb_delete(br, f);
 
@@ -231,13 +232,16 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 
 /* No locking or refcounting, assumes caller has rcu_read_lock */
 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-					  const unsigned char *addr)
+					  const unsigned char *addr,
+					  __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr)) {
+	hlist_for_each_entry_rcu(fdb, h,
+				&br->hash[br_mac_hash(addr, vid)], hlist) {
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid) {
 			if (unlikely(has_expired(br, fdb)))
 				break;
 			return fdb;
@@ -261,7 +265,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	if (!port)
 		ret = 0;
 	else {
-		fdb = __br_fdb_get(port->br, addr);
+		fdb = __br_fdb_get(port->br, addr, 0);
 		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
 			fdb->dst->state == BR_STATE_FORWARDING;
 	}
@@ -313,6 +317,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 			fe->is_local = f->is_local;
 			if (!f->is_static)
 				fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
+			fe->fdb_vid = f->vlan_id;
 			++fe;
 			++num;
 		}
@@ -325,26 +330,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 }
 
 static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
-					     const unsigned char *addr)
+					     const unsigned char *addr,
+					     __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
 	hlist_for_each_entry(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
 }
 
 static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
-						 const unsigned char *addr)
+						 const unsigned char *addr,
+						 __u16 vid)
 {
 	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
 	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
-		if (ether_addr_equal(fdb->addr.addr, addr))
+		if (ether_addr_equal(fdb->addr.addr, addr) &&
+		    fdb->vlan_id == vid)
 			return fdb;
 	}
 	return NULL;
@@ -352,7 +361,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
 
 static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 					       struct net_bridge_port *source,
-					       const unsigned char *addr)
+					       const unsigned char *addr,
+					       __u16 vid)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -360,6 +370,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 	if (fdb) {
 		memcpy(fdb->addr.addr, addr, ETH_ALEN);
 		fdb->dst = source;
+		fdb->vlan_id = vid;
 		fdb->is_local = 0;
 		fdb->is_static = 0;
 		fdb->updated = fdb->used = jiffies;
@@ -371,13 +382,13 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		  const unsigned char *addr)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
 	struct net_bridge_fdb_entry *fdb;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, 0);
 	if (fdb) {
 		/* it is okay to have multiple ports with same
 		 * address, just use the first one.
@@ -390,7 +401,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		fdb_delete(br, fdb);
 	}
 
-	fdb = fdb_create(head, source, addr);
+	fdb = fdb_create(head, source, addr, 0);
 	if (!fdb)
 		return -ENOMEM;
 
@@ -412,9 +423,9 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 }
 
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-		   const unsigned char *addr)
+		   const unsigned char *addr, u16 vid)
 {
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
 	/* some users want to always flood. */
@@ -426,7 +437,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	      source->state == BR_STATE_FORWARDING))
 		return;
 
-	fdb = fdb_find_rcu(head, addr);
+	fdb = fdb_find_rcu(head, addr, vid);
 	if (likely(fdb)) {
 		/* attempt to update an entry for a local interface */
 		if (unlikely(fdb->is_local)) {
@@ -441,8 +452,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		}
 	} else {
 		spin_lock(&br->hash_lock);
-		if (likely(!fdb_find(head, addr))) {
-			fdb = fdb_create(head, source, addr);
+		if (likely(!fdb_find(head, addr, vid))) {
+			fdb = fdb_create(head, source, addr, vid);
 			if (fdb)
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
@@ -571,18 +582,18 @@ out:
 
 /* Update (create or replace) forwarding database entry */
 static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-			 __u16 state, __u16 flags)
+			 __u16 state, __u16 flags, __u16 vid)
 {
 	struct net_bridge *br = source->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		fdb = fdb_create(head, source, addr);
+		fdb = fdb_create(head, source, addr, vid);
 		if (!fdb)
 			return -ENOMEM;
 		fdb_notify(br, fdb, RTM_NEWNEIGH);
@@ -629,11 +640,12 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 	if (ndm->ndm_flags & NTF_USE) {
 		rcu_read_lock();
-		br_fdb_update(p->br, p, addr);
+		br_fdb_update(p->br, p, addr, 0);
 		rcu_read_unlock();
 	} else {
 		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
+		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags,
+				0);
 		spin_unlock_bh(&p->br->hash_lock);
 	}
 
@@ -643,10 +655,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
 {
 	struct net_bridge *br = p->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr);
+	fdb = fdb_find(head, addr, 0);
 	if (!fdb)
 		return -ENOENT;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e475f49..e51eb24 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -42,10 +42,10 @@ static int br_pass_frame_up(struct sk_buff *skb)
 		       netif_receive_skb);
 }
 
-static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb)
+static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb,
+			       u16 vid)
 {
 	struct net_port_vlan *pve;
-	u16 vid;
 
 	BR_INPUT_SKB_CB(skb)->vlan = NULL;
 
@@ -55,7 +55,6 @@ static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb)
 	if (list_empty(&p->vlan_list))
 		return true;
 
-	vid = br_get_vlan(skb);
 	pve = nbp_vlan_find(p, vid);
 	if (pve) {
 		BR_INPUT_SKB_CB(skb)->vlan = pve->vlan;
@@ -74,16 +73,18 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct sk_buff *skb2;
+	u16 vid;
 
 	if (!p || p->state == BR_STATE_DISABLED)
 		goto drop;
 
-	if (!br_allowed_ingress(p, skb))
+	vid = br_get_vlan(skb);
+	if (!br_allowed_ingress(p, skb, vid))
 		goto drop;
 
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
-	br_fdb_update(br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
 
 	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
 	    br_multicast_rcv(br, p, skb))
@@ -118,7 +119,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
 			skb2 = skb;
 
 		br->dev->stats.multicast++;
-	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
+	} else if ((dst = __br_fdb_get(br, dest, vid)) &&
+			dst->is_local) {
 		skb2 = skb;
 		/* Do not forward the packet since it's local. */
 		skb = NULL;
@@ -147,7 +149,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 
-	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source, br_get_vlan(skb));
 	return 0;	 /* process further */
 }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6793088..7a28900 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -95,6 +95,7 @@ struct net_bridge_fdb_entry
 	mac_addr			addr;
 	unsigned char			is_local;
 	unsigned char			is_static;
+	__u16				vlan_id;
 };
 
 struct net_bridge_port_group {
@@ -392,7 +393,8 @@ extern void br_fdb_cleanup(unsigned long arg);
 extern void br_fdb_delete_by_port(struct net_bridge *br,
 				  const struct net_bridge_port *p, int do_all);
 extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
-						 const unsigned char *addr);
+						 const unsigned char *addr,
+						 __u16 vid);
 extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
 extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 			  unsigned long count, unsigned long off);
@@ -401,7 +403,8 @@ extern int br_fdb_insert(struct net_bridge *br,
 			 const unsigned char *addr);
 extern void br_fdb_update(struct net_bridge *br,
 			  struct net_bridge_port *source,
-			  const unsigned char *addr);
+			  const unsigned char *addr,
+			  u16 vid);
 
 extern int br_fdb_delete(struct ndmsg *ndm,
 			 struct net_device *dev,
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 04/12] bridge: Cache vlan in the cb for faster egress lookup.
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

On input, cache the pointer to the bridge vlan info, so that
on egress, we have can simply look at the port bitmap instead
of traversing a vlan list.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/bridge/br_device.c  |    8 ++++++++
 net/bridge/br_forward.c |   14 ++++++++++++++
 net/bridge/br_input.c   |    6 +++++-
 net/bridge/br_private.h |    1 +
 4 files changed, 28 insertions(+), 1 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9546742..57c5bac 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -30,6 +30,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
+	struct net_bridge_vlan *vlan;
 
 	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -47,6 +48,13 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
+	/* Any vlan transmitted by the bridge itself is permitted.
+	 * Try to cache the vlan in the CB to speed up forwarding.
+	 */
+	vlan = br_vlan_find(br, br_get_vlan(skb));
+	if (vlan)
+		BR_INPUT_SKB_CB(skb)->vlan = vlan;
+
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 0c7ffc2..4ae5f55 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -30,11 +30,25 @@ static inline bool br_allowed_egress(const struct net_bridge_port *p,
 				     const struct sk_buff *skb)
 {
 	struct net_port_vlan *pve;
+	struct net_bridge_vlan *vlan = NULL;
 	u16 vid;
 
 	if (list_empty(&p->vlan_list))
 		return true;
 
+	vlan = BR_INPUT_SKB_CB(skb)->vlan;
+	if (vlan) {
+		/* If we have cached VLAN information, use port_bitmap
+		 * of the vlan to make the decision
+		 */
+		if (test_bit(p->port_no, vlan->port_bitmap))
+			return true;
+		return false;
+	}
+
+	/* We don't have cached vlan information, so we need to do
+	 * it the hard way.
+	 */
 	vid = br_get_vlan(skb);
 	pve = nbp_vlan_find(p, vid);
 	if (pve)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 54c0894..e475f49 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -47,6 +47,8 @@ static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb)
 	struct net_port_vlan *pve;
 	u16 vid;
 
+	BR_INPUT_SKB_CB(skb)->vlan = NULL;
+
 	/* If there are no vlan in the permitted list, all packets are
 	 * permitted.
 	 */
@@ -55,8 +57,10 @@ static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb)
 
 	vid = br_get_vlan(skb);
 	pve = nbp_vlan_find(p, vid);
-	if (pve)
+	if (pve) {
+		BR_INPUT_SKB_CB(skb)->vlan = pve->vlan;
 		return true;
+	}
 
 	return false;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 5090134..6793088 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -300,6 +300,7 @@ struct net_bridge
 
 struct br_input_skb_cb {
 	struct net_device *brdev;
+	struct net_bridge_vlan *vlan;
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	int igmp;
 	int mrouters_only;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 03/12] bridge: Verify that a vlan is allowed to egress on give port
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

When bridge forwards a frame, make sure that a frame is allowed
to egress on that port.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/bridge/br_forward.c |   18 ++++++++++++++++++
 net/bridge/br_private.h |    1 +
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 02015a5..0c7ffc2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -26,11 +26,29 @@ static int deliver_clone(const struct net_bridge_port *prev,
 			 void (*__packet_hook)(const struct net_bridge_port *p,
 					       struct sk_buff *skb));
 
+static inline bool br_allowed_egress(const struct net_bridge_port *p,
+				     const struct sk_buff *skb)
+{
+	struct net_port_vlan *pve;
+	u16 vid;
+
+	if (list_empty(&p->vlan_list))
+		return true;
+
+	vid = br_get_vlan(skb);
+	pve = nbp_vlan_find(p, vid);
+	if (pve)
+		return true;
+
+	return false;
+}
+
 /* Don't forward packets to originating port or forwarding diasabled */
 static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
 {
 	return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
+		br_allowed_egress(p, skb) &&
 		p->state == BR_STATE_FORWARDING);
 }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1ba76b4..5090134 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -202,6 +202,7 @@ static inline u16 br_get_vlan(const struct sk_buff *skb)
 	if (vlan_tx_tag_present(skb))
 		return vlan_tx_tag_get(skb) & VLAN_VID_MASK;
 
+	/* Untagged and VLAN 0 traffic is handled the same way */
 	if (vlan_get_tag(skb, &tag))
 		return 0;
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 00/12] Add basic VLAN support to bridges
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst

This series of patches provides an ability to add VLANs to the bridge
ports.  This is similar to what can be found in most switches.  The bridge
port may have any number of VLANs added to it including vlan 0 priority tagged
traffic.  When vlans are added to the port, only traffic tagged with particular
vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
entries and become part of the lookup.  This way we correctly identify the FDB
entry.

A single vlan may also be designated as untagged.  Any untagged traffic
recieved by the port will be assigned to this vlan.  Any traffic exiting
the port with a VID matching the untagged vlan will exit untagged (the
bridge will strip the vlan header).  This is similar to "Native Vlan" support
available in most switches.

The default behavior ofthe bridge is unchanged if no vlans have been
configured.

Changes since v1:
 - Fixed some forwarding bugs.
 - Add vlan to local fdb entries.  New local entries are created per vlan
   to facilite correct forwarding to bridge interface.
 - Allow configuration of vlans directly on the bridge master device
   in addition to ports.

Changes since rfc v2:
 - Per-port vlan bitmap is gone and is replaced with a vlan list.
 - Added bridge vlan list, which is referenced by each port.  Entries in
   the birdge vlan list have port bitmap that shows which port are parts
   of which vlan.
 - Netlink API changes.
 - Dropped sysfs support for now.  If people think this is really usefull,
   can add it back.
 - Support for native/untagged vlans.

Changes since rfc v1:
 - Comments addressed regarding formatting and RCU usage
 - iocts have been removed and changed over the netlink interface.
 - Added support of user added ndb entries.
 - changed sysfs interface to export a bitmap.  Also added a write interface.
   I am not sure how much I like it, but it made my testing easier/faster.  I
   might change the write interface to take text instead of binary.

Vlad Yasevich (12):
  bridge: Add vlan filtering infrastructure
  bridge: Validate that vlan is permitted on ingress
  bridge: Verify that a vlan is allowed to egress on give port
  bridge: Cache vlan in the cb for faster egress lookup.
  bridge: Add vlan to unicast fdb entries
  bridge: Add vlan id to multicast groups
  bridge: Add netlink interface to configure vlans on bridge ports
  bridge: Add vlan support to static neighbors
  bridge: Add the ability to configure untagged vlans
  bridge: Implement untagged vlan handling
  bridge: Dump vlan information from a bridge port
  bridge: Add vlan support for local fdb entries

 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    5 +-
 drivers/net/macvlan.c                         |    2 +-
 drivers/net/vxlan.c                           |    3 +-
 include/linux/netdevice.h                     |    4 +-
 include/uapi/linux/if_bridge.h                |   23 ++-
 include/uapi/linux/neighbour.h                |    1 +
 include/uapi/linux/rtnetlink.h                |    1 +
 net/bridge/br_device.c                        |   34 ++-
 net/bridge/br_fdb.c                           |  253 ++++++++++++---
 net/bridge/br_forward.c                       |  160 ++++++++++
 net/bridge/br_if.c                            |  404 ++++++++++++++++++++++++-
 net/bridge/br_input.c                         |   65 ++++-
 net/bridge/br_multicast.c                     |   71 +++--
 net/bridge/br_netlink.c                       |  178 ++++++++++--
 net/bridge/br_private.h                       |   71 ++++-
 net/core/rtnetlink.c                          |   40 ++-
 16 files changed, 1190 insertions(+), 125 deletions(-)

-- 
1.7.7.6

^ permalink raw reply

* [PATCH V2 02/12] bridge: Validate that vlan is permitted on ingress
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

When a frame arrives on a port, if we have VLANs configured,
validate that a given VLAN is allowed to ingress on a given
port.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/bridge/br_input.c   |   23 +++++++++++++++++++++++
 net/bridge/br_private.h |   15 +++++++++++++--
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 4b34207..54c0894 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -17,6 +17,7 @@
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
 #include "br_private.h"
 
 /* Hook for brouter */
@@ -41,6 +42,25 @@ static int br_pass_frame_up(struct sk_buff *skb)
 		       netif_receive_skb);
 }
 
+static bool br_allowed_ingress(struct net_bridge_port *p, struct sk_buff *skb)
+{
+	struct net_port_vlan *pve;
+	u16 vid;
+
+	/* If there are no vlan in the permitted list, all packets are
+	 * permitted.
+	 */
+	if (list_empty(&p->vlan_list))
+		return true;
+
+	vid = br_get_vlan(skb);
+	pve = nbp_vlan_find(p, vid);
+	if (pve)
+		return true;
+
+	return false;
+}
+
 /* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct sk_buff *skb)
 {
@@ -54,6 +74,9 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	if (!p || p->state == BR_STATE_DISABLED)
 		goto drop;
 
+	if (!br_allowed_ingress(p, skb))
+		goto drop;
+
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
 	br_fdb_update(br, p, eth_hdr(skb)->h_source);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 76d9fbc..1ba76b4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -66,8 +66,6 @@ struct br_ip
 };
 
 #define BR_INVALID_VID	(1<<15)
-#define BR_UNTAGGED_VID (1<<14)
-
 #define BR_VID_HASH_SIZE (1<<6)
 #define br_vlan_hash(vid) ((vid) % (BR_VID_HASH_SIZE - 1))
 
@@ -197,6 +195,19 @@ static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
 		rtnl_dereference(dev->rx_handler_data) : NULL;
 }
 
+static inline u16 br_get_vlan(const struct sk_buff *skb)
+{
+	u16 tag;
+
+	if (vlan_tx_tag_present(skb))
+		return vlan_tx_tag_get(skb) & VLAN_VID_MASK;
+
+	if (vlan_get_tag(skb, &tag))
+		return 0;
+
+	return tag & VLAN_VID_MASK;
+}
+
 struct br_cpu_netstats {
 	u64			rx_packets;
 	u64			rx_bytes;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 01/12] bridge: Add vlan filtering infrastructure
From: Vlad Yasevich @ 2012-12-18 19:00 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, davem, or.gerlitz, jhs, mst
In-Reply-To: <1355857263-31197-1-git-send-email-vyasevic@redhat.com>

This is an infrastructure patch.  It adds 2 structures types:
  net_bridge_vlan - list element of all vlans that have been configured
                    on the bridge.
  net_port_vlan - list element of all vlans configured on a specific port.
                  references net_bridge_vlan.

In this implementation, bridge has a hash list of all vlans that have
been added to the bridge.  Each vlan element holds a vid and port_bitmap
where each port sets its bit if a given vlan is added to the port.

Each port has its own list of vlans.  Each element here refrences a vlan
from the bridge list.

Write access to both lists is protected by RTNL, and read access is
protected by RCU.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 net/bridge/br_device.c  |    3 +
 net/bridge/br_if.c      |  251 +++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h |   33 ++++++
 3 files changed, 287 insertions(+), 0 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 7c78e26..9546742 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -332,6 +332,7 @@ static struct device_type br_type = {
 void br_dev_setup(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	int i;
 
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
@@ -354,6 +355,8 @@ void br_dev_setup(struct net_device *dev)
 	spin_lock_init(&br->lock);
 	INIT_LIST_HEAD(&br->port_list);
 	spin_lock_init(&br->hash_lock);
+	for (i = 0; i < BR_VID_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&br->vlan_hlist[i]);
 
 	br->bridge_id.prio[0] = 0x80;
 	br->bridge_id.prio[1] = 0x00;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1c8fdc3..14c7c6a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -83,6 +83,254 @@ void br_port_carrier_check(struct net_bridge_port *p)
 	spin_unlock_bh(&br->lock);
 }
 
+static void br_vlan_destroy(struct net_bridge_vlan *vlan)
+{
+	if (!bitmap_empty(vlan->port_bitmap, PORT_BITMAP_LEN)) {
+		pr_err("Attempt to delete a VLAN %d from the bridge with "
+		       "non-empty port bitmap (%p)\n", vlan->vid, vlan);
+		BUG();
+	}
+
+	hlist_del_rcu(&vlan->hlist);
+	synchronize_net();
+	kfree_rcu(vlan, rcu);
+}
+
+static void br_vlan_hold(struct net_bridge_vlan *vlan)
+{
+	atomic_inc(&vlan->refcnt);
+}
+
+static void br_vlan_put(struct net_bridge_vlan *vlan)
+{
+	if (atomic_dec_and_test(&vlan->refcnt))
+		br_vlan_destroy(vlan);
+}
+
+struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid)
+{
+	struct net_bridge_vlan *vlan;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_rcu(vlan, node,
+				 &br->vlan_hlist[br_vlan_hash(vid)], hlist) {
+		if (vlan->vid == vid)
+			return vlan;
+	}
+
+	return NULL;
+}
+
+/* Must be protected by RTNL */
+struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
+				    u16 flags)
+{
+	struct net_bridge_vlan *vlan;
+
+	ASSERT_RTNL();
+
+	vlan = br_vlan_find(br, vid);
+	if (vlan)
+		return vlan;
+
+	vlan = kzalloc(sizeof(struct net_bridge_vlan), GFP_KERNEL);
+	if (!vlan)
+		return NULL;
+
+	vlan->vid = vid;
+	atomic_set(&vlan->refcnt, 1);
+
+	if (flags & BRIDGE_FLAGS_SELF) {
+		/* Set bit 0 that is associated with the bridge master
+		 * device.  Port numbers start with 1.
+		 */
+		set_bit(0, vlan->port_bitmap);
+	}
+
+	hlist_add_head_rcu(&vlan->hlist, &br->vlan_hlist[br_vlan_hash(vid)]);
+	return vlan;
+}
+
+/* Must be protected by RTNL */
+static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
+{
+	ASSERT_RTNL();
+
+	if (flags & BRIDGE_FLAGS_SELF) {
+		/* Clear bit 0 that is associated with the bridge master
+		 * device.
+		 */
+		clear_bit(0, vlan->port_bitmap);
+	}
+
+	/* Try to remove the vlan, but only once all the ports have
+	 * been removed from the port bitmap
+	 */
+	if (!bitmap_empty(vlan->port_bitmap, PORT_BITMAP_LEN))
+		return;
+
+	vlan->vid = BR_INVALID_VID;
+
+	/* Drop the self-ref to trigger descrution. */
+	br_vlan_put(vlan);
+}
+
+/* Must be protected by RTNL */
+int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags)
+{
+	struct net_bridge_vlan *vlan;
+
+	ASSERT_RTNL();
+
+	vlan = br_vlan_find(br, vid);
+	if (!vlan)
+		return -ENOENT;
+
+	br_vlan_del(vlan, flags);
+	return 0;
+}
+
+static void br_vlan_flush(struct net_bridge *br)
+{
+	struct net_bridge_vlan *vlan;
+	struct hlist_node *node;
+	struct hlist_node *tmp;
+	int i;
+
+	/* Make sure that there are no vlans left in the bridge after
+	 * all the ports have been removed.
+	 */
+	for (i = 0; i < BR_VID_HASH_SIZE; i++) {
+		hlist_for_each_entry_safe(vlan, node, tmp,
+					  &br->vlan_hlist[i], hlist) {
+			br_vlan_del(vlan, BRIDGE_FLAGS_SELF);
+		}
+	}
+}
+
+struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p, u16 vid)
+{
+	struct net_port_vlan *pve;
+
+	/* Must be done either in rcu critical section or with RTNL held */
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
+
+	list_for_each_entry_rcu(pve, &p->vlan_list, list) {
+		if (pve->vid == vid)
+			return pve;
+	}
+
+	return NULL;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
+{
+	struct net_port_vlan *pve;
+	struct net_bridge_vlan *vlan;
+	struct net_device *dev = p->dev;
+	int err;
+
+	ASSERT_RTNL();
+
+	/* Find a vlan in the bridge vlan list.  If it isn't there,
+	 * create it
+	 */
+	vlan = br_vlan_add(p->br, vid, flags);
+	if (!vlan)
+		return -ENOMEM;
+
+	/* Check to see if this port is already part of the vlan.  If
+	 * it is, there is nothing more to do.
+	 */
+	if (test_bit(p->port_no, vlan->port_bitmap))
+		return -EEXIST;
+
+	/* Create port vlan, link it to bridge vlan list, and add port the
+	 * portgroup.
+	 */
+	pve = kmalloc(sizeof(*pve), GFP_KERNEL);
+	if (!pve) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	/* Add VLAN to the device filter if it is supported.
+	 * Stricly speaking, this is not necessary now, since devices
+	 * are made promiscuous by the bridge, but if that ever changes
+	 * this code will allow tagged traffic to enter the bridge.
+	 */
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	    dev->netdev_ops->ndo_vlan_rx_add_vid &&
+	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
+		err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid);
+		if (err)
+			goto clean_up;
+	}
+
+	pve->vid = vid;
+	pve->vlan = vlan;
+	br_vlan_hold(vlan);
+	set_bit(p->port_no, vlan->port_bitmap);
+
+	list_add_tail_rcu(&pve->list, &p->vlan_list);
+	return 0;
+
+clean_up:
+	kfree(pve);
+	br_vlan_del(vlan, flags);
+	return err;
+}
+
+/* Must be protected by RTNL */
+int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
+{
+	struct net_device *dev = p->dev;
+	struct net_port_vlan *pve;
+	struct net_bridge_vlan *vlan;
+
+	ASSERT_RTNL();
+
+	pve = nbp_vlan_find(p, vid);
+	if (!pve)
+		return -ENOENT;
+
+	/* Remove VLAN from the device filter if it is supported. */
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
+		int err;
+
+		err = dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid);
+		if (err)
+			pr_warn("failed to kill vid %d for device %s\n",
+				vid, dev->name);
+	}
+	pve->vid = BR_INVALID_VID;
+
+	vlan = pve->vlan;
+	pve->vlan = NULL;
+	clear_bit(p->port_no, vlan->port_bitmap);
+	br_vlan_put(vlan);
+
+	list_del_rcu(&pve->list);
+	kfree_rcu(pve, rcu);
+
+	br_vlan_del(vlan, flags);
+
+	return 0;
+}
+
+static void nbp_vlan_flush(struct net_bridge_port *p)
+{
+	struct net_port_vlan *pve;
+	struct net_port_vlan *tmp;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)
+		nbp_vlan_delete(p, pve->vid, BRIDGE_FLAGS_SELF);
+}
+
 static void release_nbp(struct kobject *kobj)
 {
 	struct net_bridge_port *p
@@ -139,6 +387,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	br_ifinfo_notify(RTM_DELLINK, p);
 
+	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 1);
 
 	list_del_rcu(&p->list);
@@ -170,6 +419,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
 		del_nbp(p);
 	}
 
+	br_vlan_flush(br);
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -222,6 +472,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->flags = 0;
 	br_init_port(p);
 	p->state = BR_STATE_DISABLED;
+	INIT_LIST_HEAD(&p->vlan_list);
 	br_stp_port_timer_init(p);
 	br_multicast_add_port(p);
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ae0a6ec..76d9fbc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
 #include <linux/netpoll.h>
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
+#include <linux/if_vlan.h>
 
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -26,6 +27,7 @@
 
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
+#define PORT_BITMAP_LEN	BITS_TO_LONGS(BR_MAX_PORTS)
 
 #define BR_VERSION	"2.3"
 
@@ -63,6 +65,27 @@ struct br_ip
 	__be16		proto;
 };
 
+#define BR_INVALID_VID	(1<<15)
+#define BR_UNTAGGED_VID (1<<14)
+
+#define BR_VID_HASH_SIZE (1<<6)
+#define br_vlan_hash(vid) ((vid) % (BR_VID_HASH_SIZE - 1))
+
+struct net_bridge_vlan {
+	struct hlist_node		hlist;
+	atomic_t			refcnt;
+	struct rcu_head			rcu;
+	u16				vid;
+	unsigned long			port_bitmap[PORT_BITMAP_LEN];
+};
+
+struct net_port_vlan {
+	struct list_head		list;
+	struct net_bridge_vlan		*vlan;
+	struct rcu_head			rcu;
+	u16				vid;
+};
+
 struct net_bridge_fdb_entry
 {
 	struct hlist_node		hlist;
@@ -155,6 +178,7 @@ struct net_bridge_port
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll			*np;
 #endif
+	struct list_head		vlan_list;
 };
 
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
@@ -259,6 +283,7 @@ struct net_bridge
 	struct timer_list		topology_change_timer;
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
+	struct hlist_head		vlan_hlist[BR_VID_HASH_SIZE];
 };
 
 struct br_input_skb_cb {
@@ -400,6 +425,14 @@ extern int br_del_if(struct net_bridge *br,
 extern int br_min_mtu(const struct net_bridge *br);
 extern netdev_features_t br_features_recompute(struct net_bridge *br,
 	netdev_features_t features);
+extern struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
+					   u16 flags);
+extern int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags);
+extern struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid);
+extern int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags);
+extern int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags);
+extern struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p,
+					   u16 vid);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH] build: unbreak linkage of m_xt.so
From: Mike Frysinger @ 2012-12-18 18:47 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Jan Engelhardt, stephen.hemminger, netdev, jhs, urykhy, shemonc,
	pablo, netfilter-devel
In-Reply-To: <20121218092130.1d3f4239@nehalam.linuxnetplumber.net>

[-- Attachment #1: Type: Text/Plain, Size: 1346 bytes --]

On Tuesday 18 December 2012 12:21:30 Stephen Hemminger wrote:
> On Sun, 16 Dec 2012 01:32:48 +0100 Jan Engelhardt wrote:
> > Commit v3.7.0~10 caused the variable new PKG_CONFIG variable never
> > to be present at the time of calling make, leading to tc/m_xt.so
> > not linked with -lxtables (result from pkg-config xtables --libs),
> > that in turn leading to
> > 
> > tc: symbol lookup error: /usr/lib64/tc//m_xt.so: undefined symbol:
> > xtables_init_all
> > 
> > Fixing that.
> > 
> > --- a/configure
> > +++ b/configure
> > @@ -4,7 +4,6 @@
> >  INCLUDE=${1:-"$PWD/include"}
> >  : ${PKG_CONFIG:=pkg-config}
> >  : ${CC=gcc}
> > -echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
> > 
> >  # Make a temp directory in build tree.
> >  TMPDIR=$(mktemp -d config.XXXXXX)
> > @@ -224,6 +223,7 @@ rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
> >  }
> >  
> >  echo "# Generated config based on" $INCLUDE >Config
> > +echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
> >  echo "TC schedulers"
> 
> Ok, manually did the diff (conflicted with other previous changes).

this patch is no longer necessary one you merged my:
	configure: move toolchain init to a function

it's actually undesirable to apply this after that since it makes the configure 
script less clear again ...

sorry if my commit message wasn't obvious.
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: inaccurate packet scheduling
From: Stephen Hemminger @ 2012-12-18 18:43 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Eric Dumazet, Stephen Hemminger, jhs, davem, tgraf, netdev
In-Reply-To: <20121218184036.GB1690@minipsycho.orion>

On Tue, 18 Dec 2012 19:40:36 +0100
Jiri Pirko <jiri@resnulli.us> wrote:

> Tue, Dec 18, 2012 at 07:06:53PM CET, edumazet@google.com wrote:
> >> No such messages in log
> >>
> >>>Also turn off TSO since it screws up any form of rate control.
> >>
> >> Turning off TSO did not help. However, turning off scatter-gather
> >> helped, on both HP DL360G8 (igb) and IBM JS22 (ehea).
> >>
> >
> >Think to also disable GSO
> 
> On rhel6 kernel (2.6.32-based) I have to disable GSO as well.
> 
> The question is: Is this a bug or is it mandatory to get this offload
> disabled in order to TBF to be working correctly?
> 
> Thanks a bunch.
> 
> Jiri
> 
> >
> >Alternatively, you can use "mtu 65536" to allow TBF to send packets
> >bigger than 2048 bytes

It is really a bug. The packet schedulers mostly assume that a single
skb results in a single packet, and that the skb->len equals the size
of the packet on the wire.

^ permalink raw reply

* Re: inaccurate packet scheduling
From: Jiri Pirko @ 2012-12-18 18:40 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Stephen Hemminger, jhs, davem, tgraf, netdev
In-Reply-To: <CANn89iJdROkJ+dZgfDB9wPpQ3wXdUHK7SX=C2iUbHxwh9=5orw@mail.gmail.com>

Tue, Dec 18, 2012 at 07:06:53PM CET, edumazet@google.com wrote:
>> No such messages in log
>>
>>>Also turn off TSO since it screws up any form of rate control.
>>
>> Turning off TSO did not help. However, turning off scatter-gather
>> helped, on both HP DL360G8 (igb) and IBM JS22 (ehea).
>>
>
>Think to also disable GSO

On rhel6 kernel (2.6.32-based) I have to disable GSO as well.

The question is: Is this a bug or is it mandatory to get this offload
disabled in order to TBF to be working correctly?

Thanks a bunch.

Jiri

>
>Alternatively, you can use "mtu 65536" to allow TBF to send packets
>bigger than 2048 bytes

^ permalink raw reply

* Re: inaccurate packet scheduling
From: Jiri Pirko @ 2012-12-18 17:54 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: jhs, davem, edumazet, tgraf, netdev
In-Reply-To: <e2f71dc7-bee5-41ca-bd64-f4569fa953da@tahiti.vyatta.com>

Tue, Dec 18, 2012 at 05:26:31PM CET, stephen.hemminger@vyatta.com wrote:
>
>
>----- Original Message -----
>> Hi all.
>> 
>> Run one of the following 2 scripts on machine A:
>> 
>> #!/bin/bash
>> tc qdisc del dev eth0 root
>> sleep 1
>> tc -batch << EOF
>> qdisc add dev eth0 root handle 1: prio bands 2 priomap 0 0 0 0 0 0 0
>> 0 0 0 0 0 0 0 0 0
>> qdisc add dev eth0 parent 1:1 handle 10: pfifo limit 50
>> qdisc add dev eth0 parent 1:2 handle 20 tbf latency 100ms rate 4mbit
>> burst 2m
>> filter add dev eth0 parent 1: protocol ip u32 match ip dst
>> $machineB_ip flowid 1:2
>> EOF
>> 
>> #!/bin/bash
>> tc qdisc del dev eth0 root
>> sleep 1
>> tc -batch << EOF
>> qdisc add dev eth0 root handle 1: prio bands 2 priomap 0 0 0 0 0 0 0
>> 0 0 0 0 0 0 0 0 0
>> qdisc add dev eth0 parent 1:1 handle 10: pfifo limit 20
>> qdisc add dev eth0 parent 1:2 handle 20: pfifo limit 20
>> filter add dev eth0 parent 1: protocol ip pref 10 \
>> u32 match ip dst $machineB_ip \
>> flowid 1:2 \
>> police rate 4Mbit burst 2m conform-exceed drop
>> EOF
>> 
>> And run:
>> [machineB ~]# iperf -s
>> [machineA ~]# iperf -c machineB_ip -t 60
>> 
>> Expected results are: ~3.8-4.2 Mbits/s
>> But actual results are: ~130-170 Kbits/s with tbf, ~70-300 Kbits/s
>> with policy rate
>> 
>> [machineA ~]# tc -s qdisc list dev eth0
>> qdisc prio 1: root refcnt 9 bands 2 priomap  0 0 0 0 0 0 0 0 0 0 0 0
>> 0 0 0 0
>>  Sent 1512384 bytes 1032 pkt (dropped 729, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> qdisc pfifo 10: parent 1:1 limit 50p
>>  Sent 4560 bytes 32 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> qdisc tbf 20: parent 1:2 rate 4000Kbit burst 2Mb lat 100.0ms
>>  Sent 1507824 bytes 1000 pkt (dropped 729, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> 
>> 
>> Tested with kernel pulled from linus's git today. This happens with
>> older
>> kernels as well (I tried 2.6.32-based rhel6 kernels).
>> 
>> This happens to me on following machines:
>> HP DL360G8 (x86_64) http://people.redhat.com/jpirko/aThoo2Ei/dl380g8/
>> HP DL360G3 (i686)
>> IBM JS22 (ppc64) http://people.redhat.com/jpirko/aThoo2Ei/ibmjs22/
>> 
>> On following machines, I do not observe this issue:
>> qemu kvm (x86_64)
>> IBM Zseries (s390x) http://people.redhat.com/jpirko/aThoo2Ei/ibmz/
>> 
>> Please ask in case you need me to provide any other details.
>> 
>> Thanks.
>
>Check kernel log for messages about clock. It could be that on the
>machines with issues TSC is not usable for kernel clock.

No such messages in log

>Also turn off TSO since it screws up any form of rate control.

Turning off TSO did not help. However, turning off scatter-gather
helped, on both HP DL360G8 (igb) and IBM JS22 (ehea).


>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 2/2] qlcnic: update driver version
From: Sony Chacko @ 2012-12-18 17:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Signed-off-by: Sony Chacko
In-Reply-To: <1355853591-29917-1-git-send-email-sony.chacko@qlogic.com>

From: Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 5379024..bc7ec64 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -36,8 +36,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 29
-#define QLCNIC_LINUX_VERSIONID  "5.0.29"
+#define _QLCNIC_LINUX_SUBVERSION 30
+#define QLCNIC_LINUX_VERSIONID  "5.0.30"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
-- 
1.7.1

^ permalink raw reply related

* [PATCH 1/2] qlcnic: fix unused variable warnings
From: Sony Chacko @ 2012-12-18 17:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Shahed Shaikh
In-Reply-To: <1355853591-29917-1-git-send-email-sony.chacko@qlogic.com>

From: Shahed Shaikh <shahed.shaikh@qlogic.com>

qlcnic_hw.c:370: warning: variable cmd_desc set but not used
qlcnic_hw.c:368: warning: variable consumer set but not used
qlcnic_main.c:448: warning: variable ref_count set but not used
qlcnic_main.c:534: warning: variable mem_base set but not used
qlcnic_ctx.c:137: warning: variable tmp_tmpl set but not used
qlcnic_ctx.c:133: warning: variable version set but not used
qlcnic_minidump.c:200: warning: variable opcode set but not used

Signed-off-by: Shahed Shaikh <shahed.shaikh@qlogic.com>
Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c    |    5 +++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c     |    5 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |    5 -----
 .../net/ethernet/qlogic/qlcnic/qlcnic_minidump.c   |    3 +--
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 58f094c..b14b8f0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -134,7 +134,7 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter)
 	__le32 *tmp_buf;
 	struct qlcnic_cmd_args cmd;
 	struct qlcnic_hardware_context *ahw;
-	struct qlcnic_dump_template_hdr *tmpl_hdr, *tmp_tmpl;
+	struct qlcnic_dump_template_hdr *tmpl_hdr;
 	dma_addr_t tmp_addr_t = 0;
 
 	ahw = adapter->ahw;
@@ -150,6 +150,8 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter)
 	}
 	temp_size = cmd.rsp.arg2;
 	version = cmd.rsp.arg3;
+	dev_info(&adapter->pdev->dev,
+		 "minidump template version = 0x%x", version);
 	if (!temp_size)
 		return -EIO;
 
@@ -174,7 +176,6 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter)
 		err = -EIO;
 		goto error;
 	}
-	tmp_tmpl = tmp_addr;
 	ahw->fw_dump.tmpl_hdr = vzalloc(temp_size);
 	if (!ahw->fw_dump.tmpl_hdr) {
 		err = -EIO;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index fc48e00..7a6d5eb 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -365,7 +365,7 @@ static int
 qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter,
 		struct cmd_desc_type0 *cmd_desc_arr, int nr_desc)
 {
-	u32 i, producer, consumer;
+	u32 i, producer;
 	struct qlcnic_cmd_buffer *pbuf;
 	struct cmd_desc_type0 *cmd_desc;
 	struct qlcnic_host_tx_ring *tx_ring;
@@ -379,7 +379,6 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter,
 	__netif_tx_lock_bh(tx_ring->txq);
 
 	producer = tx_ring->producer;
-	consumer = tx_ring->sw_consumer;
 
 	if (nr_desc >= qlcnic_tx_avail(tx_ring)) {
 		netif_tx_stop_queue(tx_ring->txq);
@@ -402,7 +401,7 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter,
 		pbuf->frag_count = 0;
 
 		memcpy(&tx_ring->desc_head[producer],
-			&cmd_desc_arr[i], sizeof(struct cmd_desc_type0));
+		       cmd_desc, sizeof(struct cmd_desc_type0));
 
 		producer = get_next_index(producer, tx_ring->num_desc);
 		i++;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a7554d9..d833f59 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -445,13 +445,10 @@ static int
 qlcnic_set_function_modes(struct qlcnic_adapter *adapter)
 {
 	u8 id;
-	u32 ref_count;
 	int i, ret = 1;
 	u32 data = QLCNIC_MGMT_FUNC;
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
-	/* If other drivers are not in use set their privilege level */
-	ref_count = QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE);
 	ret = qlcnic_api_lock(adapter);
 	if (ret)
 		goto err_lock;
@@ -531,11 +528,9 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev,
 {
 	u32 offset;
 	void __iomem *mem_ptr0 = NULL;
-	resource_size_t mem_base;
 	unsigned long mem_len, pci_len0 = 0, bar0_len;
 
 	/* remap phys address */
-	mem_base = pci_resource_start(pdev, 0);	/* 0 is for BAR 0 */
 	mem_len = pci_resource_len(pdev, 0);
 
 	qlcnic_get_bar_length(pdev->device, &bar0_len);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index 12ff292..0b8d862 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -197,7 +197,7 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter,
 	int i, k, timeout = 0;
 	void __iomem *base = adapter->ahw->pci_base0;
 	u32 addr, data;
-	u8 opcode, no_ops;
+	u8 no_ops;
 	struct __ctrl *ctr = &entry->region.ctrl;
 	struct qlcnic_dump_template_hdr *t_hdr = adapter->ahw->fw_dump.tmpl_hdr;
 
@@ -206,7 +206,6 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter,
 
 	for (i = 0; i < no_ops; i++) {
 		k = 0;
-		opcode = 0;
 		for (k = 0; k < 8; k++) {
 			if (!(ctr->opcode & (1 << k)))
 				continue;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 0/2] qlcnic: fix warnings
From: Sony Chacko @ 2012-12-18 17:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko

From: Sony Chacko <sony.chacko@qlogic.com>

Please apply to net-next.

Thanks,
Sony

^ permalink raw reply

* Re: inaccurate packet scheduling
From: Eric Dumazet @ 2012-12-18 18:06 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Stephen Hemminger, jhs, davem, tgraf, netdev
In-Reply-To: <20121218175409.GA1690@minipsycho.orion>

> No such messages in log
>
>>Also turn off TSO since it screws up any form of rate control.
>
> Turning off TSO did not help. However, turning off scatter-gather
> helped, on both HP DL360G8 (igb) and IBM JS22 (ehea).
>

Think to also disable GSO

Alternatively, you can use "mtu 65536" to allow TBF to send packets
bigger than 2048 bytes

^ permalink raw reply

* Re: TCP delayed ACK heuristic
From: Rick Jones @ 2012-12-18 17:54 UTC (permalink / raw)
  To: David Laight
  Cc: Cong Wang, netdev, Ben Greear, David Miller, Eric Dumazet,
	Stephen Hemminger, Thomas Graf
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B70F4@saturn3.aculab.com>

On 12/18/2012 08:39 AM, David Laight wrote:
> There are problems with only implementing the acks
> specified by RFC1122.
>
> I've seen problems when the sending side is doing (I think)
> 'slow start' with Nagle disabled.
> The sender would only send 4 segments before waiting for an
> ACK - even when it had more than a full sized segment waiting.
> Sender was Linux 2.6.something (probably low 20s).
> I changed the application flow to send data in the reverse
> direction to avoid the problem.
> That was on a ~0 delay local connection - which means that
> there is almost never outstanding data, and the 'slow start'
> happened almost all the time.
> Nagle is completely the wrong algorithm for the data flow.

If Nagle was already disabled, why the last sentence?  And from your 
description, even if Nagle were enabled, I would think that it was 
remote ACK+cwnd behaviour getting in your way, not Nagle, given that 
Nagle is to be decided on a user-send by user-send basis and release 
queued data (to the mercies of other heuristics) when it gets to be an 
MSS-worth.

The joys of intertwined heuristics I suppose.

Personally, I would love for there to be a way to have a cwnd's 
byte-limit's-worth of small segments outstanding at one time - it would 
make my netperf-life much easier as I could get rid of the netperf-level 
congestion window intended to keep successive requests (with Nagle 
already disabled) from getting coalesced by cwnd in a "burst-mode" test. 
* And perhaps make things nicer for the test when there is the 
occasional retransmission.  I used to think that netperf was just 
"unique" in that regard, but it sounds like you have an actual 
application looking to do that??

rick jones

* because I am trying to (ab)use the burst mode TCP_RR test for a 
maximum packets per second through the stack+NIC measurement that isn't 
also a context  switching benchmark. But I cannot really come-up with a 
real-world rationale to support further cwnd behaviour changes. 
Allowing a byte-limit-cwnd's worth of single-byte-payload TCP segments 
could easily be seen as being rather anti-social :)  And 
forcing/maintaining the original segment boundaries in retransmissions 
for small packets isn't such a hot idea either.

^ permalink raw reply

* Re: [PATCH 11/11] bridge: Dump vlan information from a bridge port
From: Vlad Yasevich @ 2012-12-18 17:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem, mst, john.r.fastabend
In-Reply-To: <20121218090332.2f18f393@nehalam.linuxnetplumber.net>

On 12/18/2012 12:03 PM, Stephen Hemminger wrote:
> On Wed, 12 Dec 2012 15:01:17 -0500
> Vlad Yasevich <vyasevic@redhat.com> wrote:
>
>> Using the RTM_GETLINK dump the vlan filter list of a given
>> bridge port.  The information depends on setting the filter
>> flag similar to how nic VF info is dumped.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>
> I will put these in the bridge tree with other patches that
> are being staged for net-next.
>

Hold on a bit.  I was running some more tests and found some
scenarios that didn't work right.

I am preparing v2 of the patches.

Thanks
-vlad

^ permalink raw reply

* Re: [PATCH 04/11] bridge: Cache vlan in the cb for faster egress lookup.
From: Vlad Yasevich @ 2012-12-18 17:50 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem, mst, john.r.fastabend
In-Reply-To: <20121218090448.69c21daa@nehalam.linuxnetplumber.net>

On 12/18/2012 12:04 PM, Stephen Hemminger wrote:
> On Wed, 12 Dec 2012 15:01:10 -0500
> Vlad Yasevich <vyasevic@redhat.com> wrote:
>
>> On input, cache the pointer to the bridge vlan info, so that
>> on egress, we have can simply look at the port bitmap instead
>> of traversing a vlan list.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>
> This isn't going to be safe. Once packet is passed up, the cb[]
> can get overwritten by other things.
>

Right, but only care about it while in bridging code.  We don't look
at it anywhere else...

Or are you saying that cb is not guaranteed to be preserved between 
br_handle_frame_finish and br_forward?

-vlad

^ permalink raw reply

* Re: [PATCH] build: unbreak linkage of m_xt.so
From: Stephen Hemminger @ 2012-12-18 17:21 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: stephen.hemminger, vapier, netdev, jhs, urykhy, shemonc, pablo,
	netfilter-devel
In-Reply-To: <1355617968-26138-1-git-send-email-jengelh@inai.de>

On Sun, 16 Dec 2012 01:32:48 +0100
Jan Engelhardt <jengelh@inai.de> wrote:

> Commit v3.7.0~10 caused the variable new PKG_CONFIG variable never
> to be present at the time of calling make, leading to tc/m_xt.so
> not linked with -lxtables (result from pkg-config xtables --libs),
> that in turn leading to
> 
> tc: symbol lookup error: /usr/lib64/tc//m_xt.so: undefined symbol:
> xtables_init_all
> 
> Fixing that.
> 
> Signed-off-by: Jan Engelhardt <jengelh@inai.de>
> ---
>  configure |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/configure b/configure
> index 9912114..573ee55 100755
> --- a/configure
> +++ b/configure
> @@ -4,7 +4,6 @@
>  INCLUDE=${1:-"$PWD/include"}
>  : ${PKG_CONFIG:=pkg-config}
>  : ${CC=gcc}
> -echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
>  
>  # Make a temp directory in build tree.
>  TMPDIR=$(mktemp -d config.XXXXXX)
> @@ -224,6 +223,7 @@ rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
>  }
>  
>  echo "# Generated config based on" $INCLUDE >Config
> +echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
>  
>  echo "TC schedulers"
>  

Ok, manually did the diff (conflicted with other previous changes).

^ permalink raw reply

* Re: network namespace and DNS lookups
From: Dan Williams @ 2012-12-18 17:22 UTC (permalink / raw)
  To: Ravi Aysola; +Cc: netdev
In-Reply-To: <CAFaHj6HT0c59us_7F9Uh7BYh_YXGRt30=yLOrV=hcrG29YO-qA@mail.gmail.com>

On Tue, 2012-12-18 at 10:49 -0500, Ravi Aysola wrote:
> I think I sent my earlier email a bit prematurely.  I do have
> /etc/netns/<namespace-name>/resolv.conf
> files under each of my namespaces. Now the question is, how does a
> user space process
> (say bind)  look at a namespace specific resolv.conf instead of
> default one?  Have any of
> these standard applications been modified to work with namespace
> specific config files?

Wouldn't that be the glibc resolver's domain?  DNS lookups aren't done
by the kernel, but by glibc in userspace.  And glibc is also what reads
resolv.conf, so most things DNS-namespace related would need to happen
there.

Dan

> thanks again
> ravi/
> 
> On Tue, Dec 18, 2012 at 10:09 AM, Ravi Aysola <ravi.mlists@gmail.com> wrote:
> > Has there been any work in any of the recent kernels to limit the DNS lookup
> > to a particular network namespace?  Do we have any facility to specify the
> > DNS resolvers on network namespace basis (such as /etc/ns/resolv.conf)?
> >
> > thank you
> > ravi/
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2] netlink: align attributes on 64-bits
From: Thomas Graf @ 2012-12-18 17:11 UTC (permalink / raw)
  To: David Laight; +Cc: nicolas.dichtel, bhutchings, netdev, davem
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B70F5@saturn3.aculab.com>

On 12/18/12 at 04:50pm, David Laight wrote:
> > 2/ Suppose that the attribute is:
> > 
> >    struct foo {
> >    	__u64 bar1;
> >    	__u32 bar2;
> >    }
> >    => sizeof(struct foo) = 12 (= payload)
> 
> That is only true if the host architecture aligns 64bit items
> on 32 it boundaries (as i386 does).
> Otherwise there are 4 bytes of padding at the end and the
> size is 16.
> 
> Actually it is worse than that.
> Consider the structure:
> 	struct bar {
> 		__u32 foo1;
> 		__u64 foo2;
> 	}
> On i386 it will have size 12 and foo2 will be at offset 4.
> On sparc32 (and most 64bit) it will have size 16 with foo2
> at offset 8 (and 4 bytes of pad after foo1).

This is a known problem and I can't think of anything
that can be done about it except for memcpy()ing the
data before accessing it.

If you have ideas, I'm more that willing to listen :)

> Do these messages move between systems?
> If they do then any 64bit items need an explicit alignment
> eg tag with __attribute__((aligned(8))) (or aligned(4)).

They don't. Netlink has and will be host bound. It also
uses host byte order for that reason.

^ permalink raw reply

* Re: [PATCH v2] netlink: align attributes on 64-bits
From: Thomas Graf @ 2012-12-18 17:08 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: bhutchings, netdev, davem, David.Laight
In-Reply-To: <50D09897.8030508@6wind.com>

On 12/18/12 at 05:23pm, Nicolas Dichtel wrote:
> Le 18/12/2012 13:57, Thomas Graf a écrit :
> >-static inline int nla_padlen(int payload)
> >-{
> >-	return nla_total_size(payload) - nla_attr_size(payload);
> >+	if (!IS_ALIGNED(len, NLA_ATTR_ALIGN))
> >+		len = ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN);
> Two comments:
> 1/ should it be ALIGN(len, NLA_ATTR_ALIGN)? If we want to add a __u64:
>    => nla_attr_size(sizeof(__u64)) = 12
>    => NLA_ALIGN(nla_attr_size(sizeof(__u64))) => 12 (= len)
>    => ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN) = 0 but it should be 4

We can't add 1-3 bytes of padding, therefore we need to add
NLA_HDRLEN to len before aligning it to enforce a minimal
padding. We can't hit it right now because 4 byte alignment
of the previous attribute is a given but if we ever change
the alignment it could become an issue and the above should
be bullet proof.

Your example would come out like this:
  nla_attr_size(8) = 12
  ALIGN(12 + 4, 8) = 16

> 2/ Suppose that the attribute is:
> 
>   struct foo {
>   	__u64 bar1;
>   	__u32 bar2;
>   }
>   => sizeof(struct foo) = 12 (= payload)
>   => nla_attr_size(payload) = 16
>   => NLA_ALIGN(nla_attr_size(payload)) = 16 (= len)
>   => IS_ALIGNED(len, NLA_ATTR_ALIGN) = true
>   => extra room is not reserved
>   But it's not guaranteed that bar1 is aligned on 8 bytes, only on 4 bytes.

That's correct, that's why I have added the additional
NLA_ATTR_ALIGN of room in nlmsg_new(). It will account
for the one time padding that is needed before we add
the very first attribute.

If all attributes after that have a size aligned to 8
bytes no padding is needed. Padding will only be needed
again if a struct is missized in which case we reserve
room with the above. Correct?

> >+	offset = (size_t) skb_tail_pointer(skb);
> >+	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN)) {
> With the previous struct foo, this test may be true even if we don't
> have reserved extra room. This test depends on previous attribute.
> I think the exact size of the netlink message depends on the order
> of attributes, not only on the attribute itself.
> What about taking the assumption that the start will never be
> aligned and always allocating extra room: ALIGN(NLA_ALIGNTO,
> NLA_ATTR_ALIGN) (= 4)?

See my explanation above. I think this works. The order does not
matter, the sum of all padding required will always be the same.

> >+static bool nla_insufficient_space(struct sk_buff *skb, int attrlen)
> >+{
> >+	size_t needed = nla_pre_padlen(skb) + nla_total_size(attrlen);
> If nla_total_size() was right, nla_pre_padlen(skb) should already be
> included. Am I wrong?

No, nla_pre_padlen() contains the number of bytes needed to align
skb_tail_pointer() to an alignment of 8. If that is > 0 but the
attribute to follow is already aligned.

The tricky part here is that accounting for padding in
nla_total_size() only works for the sum of all attributes.
It does not account for the specific padding required for the
previous attribute.

Therefore the above check. The above could be changed to
nla_attr_size() theoretically as we don't need space for the
final padding eventually but we checked for space before so I
kept it that way.

I realize it's slightly confusign and needs better documentation
and please double check my thinking :-)

^ permalink raw reply

* Re: [PATCH 04/11] bridge: Cache vlan in the cb for faster egress lookup.
From: Stephen Hemminger @ 2012-12-18 17:04 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, davem, mst, john.r.fastabend
In-Reply-To: <1355342477-4971-5-git-send-email-vyasevic@redhat.com>

On Wed, 12 Dec 2012 15:01:10 -0500
Vlad Yasevich <vyasevic@redhat.com> wrote:

> On input, cache the pointer to the bridge vlan info, so that
> on egress, we have can simply look at the port bitmap instead
> of traversing a vlan list.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>

This isn't going to be safe. Once packet is passed up, the cb[]
can get overwritten by other things.

^ permalink raw reply

* Re: [PATCH 11/11] bridge: Dump vlan information from a bridge port
From: Stephen Hemminger @ 2012-12-18 17:03 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, davem, mst, john.r.fastabend
In-Reply-To: <1355342477-4971-12-git-send-email-vyasevic@redhat.com>

On Wed, 12 Dec 2012 15:01:17 -0500
Vlad Yasevich <vyasevic@redhat.com> wrote:

> Using the RTM_GETLINK dump the vlan filter list of a given
> bridge port.  The information depends on setting the filter
> flag similar to how nic VF info is dumped.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>

I will put these in the bridge tree with other patches that
are being staged for net-next.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox