Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH 0/3] ath6kl: neatening
From: Kalle Valo @ 2011-07-17 20:45 UTC (permalink / raw)
  To: Joe Perches; +Cc: devel, gregkh, error27, linux-kernel, linux-wireless, netdev
In-Reply-To: <1310932335.2286.8.camel@Joe-Laptop>

On 07/17/2011 10:52 PM, Joe Perches wrote:
> On Sun, 2011-07-17 at 21:56 +0300, Kalle Valo wrote:
>> On 07/17/2011 09:43 PM, Joe Perches wrote:
>>> Joe Perches (3):
>>>   ath6kl: Add missing newlines and coalesce messages
>>>   ath6kl: Remove __func__ uses from ath6kl_err
>>>   ath6kl: cfg80211: Add and use ath6kl_cfg80211_ready helper
>> Nice, thanks a lot. I'm planning to send v3 of athk6kl patches later
>> today. Is it okay for you if I amend your three patches to that patchset?
> 
> 'course.

Great, thanks. I have now applied your patches to ath6kl-cleanup tree
and they will be included in patchset v3.

Kalle

^ permalink raw reply

* [RFC PATCH] net: vlan: 802.1ad S-VLAN support
From: David Lamparter @ 2011-07-17 20:55 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, David Lamparter

this adds support for 802.1ad S-VLANs, which basically are regular VLANs
with a different protocol field. also supported are the legacy QinQ
9100/9200/9300 ethertypes. as with the CFI bit for 802.1Q, the DEI bit
is blissfully ignored.

this patch modifies the 802.1Q code, but keeps the regular VLAN
acceleration architecture unchanged. the S-VLAN code does not use that;
I am not aware of any NIC implementing it for ethertypes other than
8100.

all in-kernel interfaces and definitions are kept compatible; 802.1Q
performance should not experience significant changes.

tested in a simple setup with kvm, including some random stackings of
S-VLANs and C-VLANs.
---

well, this isn't quite finished yet, but it's in a state where I need
some feedback... especially on:
 - int vlan_pidx(u16 protocol)
   do i do this with a table? that wastes a cacheline... as code it's
   around 32 bytes on x86_64. it's not called for regular 802.1Q frames
   from any hot paths btw, so maybe i shouldn't care?
 - is_vlan_dev / IFF_802_1Q_VLAN
   need to decide whether this should be set for S-VLAN devices, and
   after it's decided need to look at the users from drivers/net and
   drivers/scsi
 - vlan_dev_vlan_id - same as above
 - GRO & co. - basically i have no clue.

Any feedback very welcome,
David

 include/linux/if_link.h  |    1 +
 include/linux/if_vlan.h  |   43 +++++++----
 net/8021q/vlan.c         |  190 ++++++++++++++++++++++++++++++----------------
 net/8021q/vlan.h         |    9 ++-
 net/8021q/vlan_core.c    |    6 +-
 net/8021q/vlan_dev.c     |   15 +++-
 net/8021q/vlan_gvrp.c    |    6 ++
 net/8021q/vlan_netlink.c |   21 +++++-
 net/8021q/vlanproc.c     |    9 ++-
 net/core/dev.c           |    2 +-
 10 files changed, 207 insertions(+), 95 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 0ee969a..23653a7 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -225,6 +225,7 @@ enum {
 	IFLA_VLAN_FLAGS,
 	IFLA_VLAN_EGRESS_QOS,
 	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
 	__IFLA_VLAN_MAX,
 };
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index bc03e40..cd0269e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -45,7 +45,7 @@ struct vlan_hdr {
  *	struct vlan_ethhdr - vlan ethernet header (ethhdr + vlan_hdr)
  *	@h_dest: destination ethernet address
  *	@h_source: source ethernet address
- *	@h_vlan_proto: ethernet protocol (always 0x8100)
+ *	@h_vlan_proto: ethernet protocol (0x8100, 0x88a8, 0x9x00)
  *	@h_vlan_TCI: priority and VLAN ID
  *	@h_vlan_encapsulated_proto: packet type ID or len
  */
@@ -71,6 +71,16 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
 #define VLAN_N_VID		4096
 
+enum {
+	VLAN_PROTOIDX_8021Q = 0,
+	VLAN_PROTOIDX_8021AD,
+	VLAN_PROTOIDX_QINQ1,
+	VLAN_PROTOIDX_QINQ2,
+	VLAN_PROTOIDX_QINQ3,
+
+	VLAN_N_PROTOCOL
+};
+
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
@@ -86,27 +96,31 @@ struct vlan_group {
 					    * the vlan is attached to.
 					    */
 	unsigned int		nr_vlans;
-	struct hlist_node	hlist;	/* linked list */
-	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+	struct net_device **vlan_devices_arrays[VLAN_N_PROTOCOL]
+						[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
 };
 
-static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
-						       u16 vlan_id)
+#define vlan_group_get_device(vg, vlan_id) \
+	vlan_group_get_device_pidx(vg, VLAN_PROTOIDX_8021Q, vlan_id)
+static inline struct net_device *vlan_group_get_device_pidx(struct vlan_group *vg,
+							    int proto_idx, u16 vlan_id)
 {
 	struct net_device **array;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[proto_idx][vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
 }
 
-static inline void vlan_group_set_device(struct vlan_group *vg,
-					 u16 vlan_id,
-					 struct net_device *dev)
+#define vlan_group_set_device(vg, vlan_id, dev) \
+	vlan_group_set_device_pidx(vg, VLAN_PROTOIDX_8021Q, vlan_id, dev)
+static inline void vlan_group_set_device_pidx(struct vlan_group *vg,
+					      int proto_idx, u16 vlan_id,
+					      struct net_device *dev)
 {
 	struct net_device **array;
 	if (!vg)
 		return;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[proto_idx][vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
@@ -125,7 +139,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern bool vlan_do_receive(struct sk_buff **skb);
+extern bool vlan_do_receive(struct sk_buff **skb, int pidx, u16 protocol);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
@@ -226,7 +240,8 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+					      u16 protocol, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -241,7 +256,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 	skb->mac_header -= VLAN_HLEN;
 
 	/* first, the ethernet type */
-	veth->h_vlan_proto = htons(ETH_P_8021Q);
+	veth->h_vlan_proto = htons(protocol);
 
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
@@ -262,7 +277,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
  */
 static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 {
-	skb = vlan_insert_tag(skb, vlan_tci);
+	skb = vlan_insert_tag(skb, ETH_P_8021Q, vlan_tci);
 	if (skb)
 		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d24c464..2cdd886 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -46,17 +46,39 @@
 
 int vlan_net_id __read_mostly;
 
-const char vlan_fullname[] = "802.1Q VLAN Support";
+const char vlan_fullname[] = "802.1Q/.1ad VLAN Support";
 const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
+#if 0
+/* lookup table, use first byte */
+static const uint8_t vlan_protocol_idx[256] = {
+	[ETH_P_8021Q >> 8]	= VLAN_PROTOIDX_8021Q,
+	[ETH_P_8021AD >> 8]	= VLAN_PROTOIDX_8021AD,
+	[ETH_P_QINQ1 >> 8]	= VLAN_PROTOIDX_QINQ1,
+	[ETH_P_QINQ2 >> 8]	= VLAN_PROTOIDX_QINQ2,
+	[ETH_P_QINQ3 >> 8]	= VLAN_PROTOIDX_QINQ3,
+};
+#define vlan_pidx(protocol) (vlan_protocol_idx[(protocol) >> 8])
+#else
+static inline int vlan_pidx(u16 protocol)
+{
+	if (likely(protocol == ETH_P_8021Q))
+		return VLAN_PROTOIDX_8021Q;
+	if (protocol == ETH_P_8021AD)
+		return VLAN_PROTOIDX_8021AD;
+	return ((protocol - ETH_P_QINQ1) >> 8) + VLAN_PROTOIDX_QINQ1;
+}
+#endif
+
 static void vlan_group_free(struct vlan_group *grp)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
+	for (j = 0; j < VLAN_N_PROTOCOL; j++)
+		for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+			kfree(grp->vlan_devices_arrays[j][i]);
 	kfree(grp);
 }
 
@@ -72,14 +94,16 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
 	return grp;
 }
 
-static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
+static int vlan_group_prealloc_vid(struct vlan_group *vg,
+				   u16 protocol, u16 vlan_id)
 {
 	struct net_device **array;
 	unsigned int size;
 
 	ASSERT_RTNL();
 
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[vlan_pidx(protocol)]
+					[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	if (array != NULL)
 		return 0;
 
@@ -88,7 +112,8 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	if (array == NULL)
 		return -ENOBUFS;
 
-	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
+	vg->vlan_devices_arrays[vlan_pidx(protocol)]
+				[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
 	return 0;
 }
 
@@ -103,6 +128,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
+	u16 protocol = vlan->protocol;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
@@ -114,7 +140,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 * HW accelerating devices or SW vlan input packet processing if
 	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
+	if (vlan_id && protocol == ETH_P_8021Q &&
+			(real_dev->features & NETIF_F_HW_VLAN_FILTER))
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
 	grp->nr_vlans--;
@@ -122,7 +149,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
 
-	vlan_group_set_device(grp, vlan_id, NULL);
+	vlan_group_set_device_pidx(grp, vlan_pidx(protocol), vlan_id, NULL);
 	/* Because unregister_netdevice_queue() makes sure at least one rcu
 	 * grace period is respected before device freeing,
 	 * we dont need to call synchronize_net() here.
@@ -145,7 +172,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	dev_put(real_dev);
 }
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev,
+			u16 protocol, u16 vlan_id)
 {
 	const char *name = real_dev->name;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
@@ -161,7 +189,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 		return -EOPNOTSUPP;
 	}
 
-	if (vlan_find_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, vlan_pidx(protocol), vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -173,6 +201,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
+	u16 protocol = vlan->protocol;
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
@@ -186,7 +215,7 @@ int register_vlan_dev(struct net_device *dev)
 			goto out_free_group;
 	}
 
-	err = vlan_group_prealloc_vid(grp, vlan_id);
+	err = vlan_group_prealloc_vid(grp, protocol, vlan_id);
 	if (err < 0)
 		goto out_uninit_applicant;
 
@@ -203,7 +232,7 @@ int register_vlan_dev(struct net_device *dev)
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	vlan_group_set_device(grp, vlan_id, dev);
+	vlan_group_set_device_pidx(grp, vlan_pidx(protocol), vlan_id, dev);
 	grp->nr_vlans++;
 
 	if (ngrp) {
@@ -211,7 +240,7 @@ int register_vlan_dev(struct net_device *dev)
 			ops->ndo_vlan_rx_register(real_dev, ngrp);
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
 	}
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+	if (protocol == ETH_P_8021Q && real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
 
 	return 0;
@@ -230,18 +259,26 @@ out_free_group:
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns 0 if the device was created or a negative error code otherwise.
  */
-static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
+static int register_vlan_device(struct net_device *real_dev,
+				u16 protocol, u16 vlan_id)
 {
 	struct net_device *new_dev;
 	struct net *net = dev_net(real_dev);
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
 	char name[IFNAMSIZ];
+	static const char *protonames[VLAN_N_PROTOCOL] = {
+		[VLAN_PROTOIDX_8021Q]	= "",
+		[VLAN_PROTOIDX_8021AD]	= "1ad",
+		[VLAN_PROTOIDX_QINQ1]	= "91-",
+		[VLAN_PROTOIDX_QINQ2]	= "92-",
+		[VLAN_PROTOIDX_QINQ3]	= "93-"
+	};
 	int err;
 
 	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
-	err = vlan_check_real_dev(real_dev, vlan_id);
+	err = vlan_check_real_dev(real_dev, protocol, vlan_id);
 	if (err < 0)
 		return err;
 
@@ -249,26 +286,30 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	switch (vn->name_type) {
 	case VLAN_NAME_TYPE_RAW_PLUS_VID:
 		/* name will look like:	 eth1.0005 */
-		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
+		snprintf(name, IFNAMSIZ, "%s.%s%.4i", real_dev->name,
+				protonames[vlan_pidx(protocol)], vlan_id);
 		break;
 	case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 vlan5
 		 */
-		snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
+		snprintf(name, IFNAMSIZ, "vlan%s%i",
+				protonames[vlan_pidx(protocol)], vlan_id);
 		break;
 	case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 eth0.5
 		 */
-		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
+		snprintf(name, IFNAMSIZ, "%s.%s%i", real_dev->name,
+				protonames[vlan_pidx(protocol)], vlan_id);
 		break;
 	case VLAN_NAME_TYPE_PLUS_VID:
 		/* Put our vlan.VID in the name.
 		 * Name will look like:	 vlan0005
 		 */
 	default:
-		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
+		snprintf(name, IFNAMSIZ, "vlan%s%.4i",
+				protonames[vlan_pidx(protocol)], vlan_id);
 	}
 
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
@@ -282,6 +323,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 */
 	new_dev->mtu = real_dev->mtu;
 
+	vlan_dev_info(new_dev)->protocol = protocol;
 	vlan_dev_info(new_dev)->vlan_id = vlan_id;
 	vlan_dev_info(new_dev)->real_dev = real_dev;
 	vlan_dev_info(new_dev)->dent = NULL;
@@ -359,6 +401,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
 	}
 }
 
+#define vlangrp_for_each_dev(i, grp, vlandev) \
+	for (i = 0; i < VLAN_N_VID * VLAN_N_PROTOCOL; i++) \
+		if ((vlandev = vlan_group_get_device_pidx(grp, \
+					i / VLAN_N_VID, i % VLAN_N_VID)))
+			/* { code here } */
+
 static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			     void *ptr)
 {
@@ -391,22 +439,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			netif_stacked_transfer_operstate(dev, vlandev);
 		}
 		break;
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -416,11 +456,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			if (vlandev->mtu <= dev->mtu)
 				continue;
 
@@ -430,11 +466,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			vlan_transfer_features(dev, vlandev);
 		}
 
@@ -442,11 +474,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -460,11 +488,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (flgs & IFF_UP)
 				continue;
@@ -481,17 +505,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
-			/* unregistration of last vlan destroys group, abort
-			 * afterwards */
-			if (grp->nr_vlans == 1)
-				i = VLAN_N_VID;
+		vlangrp_for_each_dev(i, grp, vlandev) {
+			unsigned int nr = grp->nr_vlans;
 
 			unregister_vlan_dev(vlandev, &list);
+
+			/* if it was the last VLAN, grp is now gone */
+			if (nr == 1)
+				break;
 		}
 		unregister_netdevice_many(&list);
 		break;
@@ -503,11 +524,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_BONDING_FAILOVER:
 		/* Propagate to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			call_netdevice_notifiers(event, vlandev);
 		}
 		break;
@@ -608,7 +625,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 		err = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		err = register_vlan_device(dev, args.u.VID);
+		err = register_vlan_device(dev, ETH_P_8021Q, args.u.VID);
 		break;
 
 	case DEL_VLAN_CMD:
@@ -668,6 +685,39 @@ static struct pernet_operations vlan_net_ops = {
 	.size = sizeof(struct vlan_net),
 };
 
+static int vlan_rcv(struct sk_buff *skb, struct net_device *dev,
+                   struct packet_type *pt, struct net_device *orig_dev)
+{
+	u16 protocol = be16_to_cpu(pt->type);
+
+	skb = vlan_untag(skb);
+	if (unlikely(!skb))
+		return 0;
+	if (vlan_do_receive(&skb, vlan_pidx(protocol), protocol))
+		return netif_receive_skb(skb);
+
+	if (likely(skb))
+		kfree_skb(skb);
+	return 0;
+}
+
+static struct packet_type vlan_1ad_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_8021AD),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq1_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ1),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq2_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ2),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq3_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ3),
+	.func = vlan_rcv,
+};
+
 static int __init vlan_proto_init(void)
 {
 	int err;
@@ -691,6 +741,11 @@ static int __init vlan_proto_init(void)
 		goto err4;
 
 	vlan_ioctl_set(vlan_ioctl_handler);
+
+	dev_add_pack(&vlan_1ad_type);
+	dev_add_pack(&vlan_qq1_type);
+	dev_add_pack(&vlan_qq2_type);
+	dev_add_pack(&vlan_qq3_type);
 	return 0;
 
 err4:
@@ -705,6 +760,11 @@ err0:
 
 static void __exit vlan_cleanup_module(void)
 {
+	dev_remove_pack(&vlan_1ad_type);
+	dev_remove_pack(&vlan_qq1_type);
+	dev_remove_pack(&vlan_qq2_type);
+	dev_remove_pack(&vlan_qq3_type);
+
 	vlan_ioctl_set(NULL);
 	vlan_netlink_fini();
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index b132f54..2043b06 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -46,6 +46,7 @@ struct vlan_pcpu_stats {
  *	@ingress_priority_map: ingress priority mappings
  *	@nr_egress_mappings: number of egress priority mappings
  *	@egress_priority_map: hash of egress priority mappings
+ *	@protocol: encapsulation protocol value (8100, 88a8, 9x00)
  *	@vlan_id: VLAN identifier
  *	@flags: device flags
  *	@real_dev: underlying netdevice
@@ -59,6 +60,7 @@ struct vlan_dev_info {
 	unsigned int				nr_egress_mappings;
 	struct vlan_priority_tci_mapping	*egress_priority_map[16];
 
+	u16					protocol;
 	u16					vlan_id;
 	u16					flags;
 
@@ -76,12 +78,12 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 
 /* Must be invoked with rcu_read_lock or with RTNL. */
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
-					       u16 vlan_id)
+					       int pidx, u16 vlan_id)
 {
 	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
 
 	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
+		return vlan_group_get_device_pidx(grp, pidx, vlan_id);
 
 	return NULL;
 }
@@ -94,7 +96,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev,
+			u16 protocol, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index fcc6846..921e240 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,14 +4,14 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-bool vlan_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp, int pidx, u16 protocol)
 {
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
-	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+	vlan_dev = vlan_find_dev(skb->dev, pidx, vlan_id);
 	if (!vlan_dev) {
 		if (vlan_id)
 			skb->pkt_type = PACKET_OTHERHOST;
@@ -41,7 +41,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		skb = *skbp = vlan_insert_tag(skb, protocol, skb->vlan_tci);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 49bb752..67c6a66 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -119,8 +119,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
-		skb->protocol = htons(ETH_P_8021Q);
-		type = ETH_P_8021Q;
+		type = vlan_dev_info(dev)->protocol;
+		skb->protocol = htons(type);
 		vhdrlen = VLAN_HLEN;
 	}
 
@@ -140,6 +140,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+	u16 protocol = vlan_dev_info(dev)->protocol;
 	unsigned int len;
 	int ret;
 
@@ -148,12 +149,15 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
+	if (veth->h_vlan_proto != htons(protocol) ||
 	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan_dev_info(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		if (protocol == ETH_P_8021Q)
+			skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		else
+			skb = vlan_insert_tag(skb, protocol, vlan_tci);
 	}
 
 	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
@@ -547,7 +551,8 @@ static int vlan_dev_init(struct net_device *dev)
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+	if (vlan_dev_info(dev)->protocol == ETH_P_8021Q
+			&& real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061cece..83c6728 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,9 @@ int vlan_gvrp_request_join(const struct net_device *dev)
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->protocol != ETH_P_8021Q)
+		return 0;
+
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
 				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
@@ -41,6 +44,9 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->protocol != ETH_P_8021Q)
+		return;
+
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
 			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index be9a5c1..418dc55 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -19,6 +19,7 @@
 
 static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
 	[IFLA_VLAN_ID]		= { .type = NLA_U16 },
+	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
 	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
 	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
@@ -57,6 +58,19 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		if (id >= VLAN_VID_MASK)
 			return -ERANGE;
 	}
+	if (data[IFLA_VLAN_PROTOCOL]) {
+		id = nla_get_u16(data[IFLA_VLAN_PROTOCOL]);
+		switch (id) {
+		case ETH_P_8021Q:
+		case ETH_P_8021AD:
+		case ETH_P_QINQ1:
+		case ETH_P_QINQ2:
+		case ETH_P_QINQ3:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
@@ -118,10 +132,12 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 		return -ENODEV;
 
 	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
+	vlan->protocol = data[IFLA_VLAN_PROTOCOL]
+			? nla_get_u16(data[IFLA_VLAN_PROTOCOL]) : ETH_P_8021Q;
 	vlan->real_dev = real_dev;
 	vlan->flags    = VLAN_FLAG_REORDER_HDR;
 
-	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+	err = vlan_check_real_dev(real_dev, vlan->protocol, vlan->vlan_id);
 	if (err < 0)
 		return err;
 
@@ -150,7 +166,7 @@ static size_t vlan_get_size(const struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 
-	return nla_total_size(2) +	/* IFLA_VLAN_ID */
+	return nla_total_size(4) +	/* IFLA_VLAN_ID + _PROTOCOL */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,6 +182,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	unsigned int i;
 
 	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+	NLA_PUT_U16(skb, IFLA_VLAN_PROTOCOL, vlan_dev_info(dev)->protocol);
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d34b6da..7e6464c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -270,8 +270,11 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 		const struct net_device *vlandev = v;
 		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 
-		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
-			   dev_info->vlan_id,    dev_info->real_dev->name);
+		seq_printf(seq, "%-15s| ", vlandev->name);
+		if (dev_info->protocol != ETH_P_8021Q)
+			seq_printf(seq, "%04x:", dev_info->protocol);
+		seq_printf(seq, "%d  | %s\n", dev_info->vlan_id,
+				dev_info->real_dev->name);
 	}
 	return 0;
 }
@@ -301,6 +304,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	if (dev_info->protocol != ETH_P_8021Q)
+		seq_printf(seq, ", protocol 0x%04x", dev_info->protocol);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
 			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
diff --git a/net/core/dev.c b/net/core/dev.c
index 9444c5c..ece5fd3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3187,7 +3187,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_do_receive(&skb)) {
+		if (vlan_do_receive(&skb, VLAN_PROTOIDX_8021Q, ETH_P_8021Q)) {
 			ret = __netif_receive_skb(skb);
 			goto out;
 		} else if (unlikely(!skb))
-- 
1.7.5.3


^ permalink raw reply related

* [PATCH RFC 1/2] r8169: expand received packet length indication.
From: Francois Romieu @ 2011-07-17 20:51 UTC (permalink / raw)
  To: netdev; +Cc: Hayes Wang

8168d and above allow jumbo frames beyond 8k. Bump the received
packet length check before enabling jumbo frames on these chipsets.

Frame length indication covers bits 0..13 of the first Rx descriptor
32 bits for the 8169 and 8168. I only have authoritative documentation
for the allowed use of the extra (13) bit with the 8169 and 8168c.
Realtek's drivers use the same mask for the 816x and the fast ethernet
only 810x.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 drivers/net/r8169.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 3ddd339..ea8ee93 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -5311,7 +5311,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 		} else {
 			struct sk_buff *skb;
 			dma_addr_t addr = le64_to_cpu(desc->addr);
-			int pkt_size = (status & 0x00001FFF) - 4;
+			int pkt_size = (status & 0x00003fff) - 4;

 			/*
 			 * The driver does not support incoming fragmented
-- 
1.7.4.4

^ permalink raw reply related

* Re: [patch net-next-2.6] vlan: introduce ndo_vlan_[enable/disable]
From: Michał Mirosław @ 2011-07-17 21:06 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, shemminger, eric.dumazet, greearb
In-Reply-To: <20110717194421.GA2153@minipsycho>

W dniu 17 lipca 2011 21:44 użytkownik Jiri Pirko <jpirko@redhat.com> napisał:
> Sun, Jul 17, 2011 at 10:36:04AM CEST, mirqus@gmail.com wrote:
>>W dniu 17 lipca 2011 09:30 użytkownik Jiri Pirko <jpirko@redhat.com> napisał:
>>> Sat, Jul 16, 2011 at 04:14:36PM CEST, mirqus@gmail.com wrote:
>>>>2011/7/16 Jiri Pirko <jpirko@redhat.com>:
>>>>> Some devices are not able to enable/disable rx/tw vlan accel separately.
>>>>> they depend on ndo_vlan_rx_register to know if to enable of disable
>>>>> hw accel. And since ndo_vlan_rx_register is going to die soon,
>>>>> this must be resolved.
>>>>>
>>>>> One solution might be to enable accel on device start every time, even
>>>>> if there are no vlan up on. But this would change behaviour and might
>>>>> lead to possible regression (on older devices).
>>>>[...]
>>>>
>>>>Please describe the possible regression. As I see it, there won't be
>>>>any user visible change of behaviour - network code takes care of
>>>>reinserting VLAN tag when necessary. If you think that disabling tag
>>>>stripping is beneficial for cases where no VLANs are configured, it's
>>>>better to do that in netdev_fix_features() for devices which advertise
>>>>NETIF_F_HW_VLAN_RX in hw_features.
>>>
>>> Well I just wanted to preserve current behaviour which is that in many
>>> drivers vlan accel is enabled only if some vid is registered upon the
>>> device and it's disabled again when no vid is registered. I can see
>>> no way to do this with current code after removing ndo_vlan_rx_register.
>>>
>>> I expect unexpected
>>
>>:-D
>>
>>> ... problems on old cards when vlan accel would be
>>> enabled all the time, but maybe I'm wrong...
>>
>>Device has no way of knowing how the system uses VLAN tags, stripped
>>or not. Any problems would be driver problems and since you're making
>>it all use generic code, bugs will hit all drivers simultaneously or
>>(preferably) won't happen at all.
>>
>>> One idea is for device which do not support sepatate rx/tx vlan accel
>>> enabling/disabling they can probably use ndo_fix_features force to
>>> enable/disable rx/tx pair together. That would resolve the situation as
>>> well giving user possibility to turn off vlan accel in case of any issues.
>>
>>That is exactly the idea behind ndo_fix_features.

> In netdev_fix_features add check if either one of NETIF_F_HW_VLAN_TX or
> NETIF_F_HW_VLAN_RX is set and in that case set the other one. Of course
> this would be done only for devices what do not support separate rx/tx
> vlan on/off. But how to distinguish? NETIF_F_HW_VLAN_BOTH feature flag?

Not in netdev_fix_features(). This case you describe should be handled
in driver-specific
ndo_fix_features callback. Because if NETIF_F_HW_VLAN_TX is disabled,
the network core will insert the tag anyway, this would cover the
restriction:

if (!(features & NEITF_F_HW_VLAN_RX))
  features &= ~NETIF_F_HW_VLAN_TX;

This works, because if HW_VLAN_TX is disabled, the driver never gets
packets which need tag insertion. Besides those already in the queue,
of course.

For netdev_fix_features() I thought that if no VLANs are configured, then it
could disable NETIF_F_HW_VLAN_RX (if set in hw_features). The
conditions for disabling it need some thought. For example, if the
device is not slave to some stacking device (bonding doesn't count, as
it passes VLAN registeration to its slaves) then packets with tags
would be either dropped or have tag reinserted (for network taps).

Best Regards,
Michał Mirosław

^ permalink raw reply

* [PATCH RFC 2/2] r8169: jumbo fixes.
From: Francois Romieu @ 2011-07-17 20:53 UTC (permalink / raw)
  To: netdev; +Cc: Hayes Wang

(Hayes, I'd welcome comments regarding the jumbo enable/disable stuff)

- fix features : jumbo frames and checksumming can not be used at the
  same time.

- introduce hw_jumbo_{enable / disable} helpers. Their content has been
  creatively extracted from Realtek's own drivers. As an illustration,
  it would be nice to know how/if the MaxTxPacketSize register operates
  when the device can work with a 9k jumbo frame as its documentation
  (8168c) can not be applied beyond ~7k.

- rtl_tx_performance_tweak is moved forward. No change.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 drivers/net/r8169.c |  302 ++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 248 insertions(+), 54 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index ea8ee93..9451958 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -141,84 +141,104 @@ enum rtl_tx_desc_version {
 	RTL_TD_1	= 1,
 };
 
-#define _R(NAME,TD,FW) \
-	{ .name = NAME, .txd_version = TD, .fw_name = FW }
+#define JUMBO_1K	ETH_DATA_LEN
+#define JUMBO_4K	(4*1024 - ETH_HLEN - 2)
+#define JUMBO_6K	(6*1024 - ETH_HLEN - 2)
+#define JUMBO_7K	(7*1024 - ETH_HLEN - 2)
+#define JUMBO_9K	(9*1024 - ETH_HLEN - 2)
+
+#define _R(NAME,TD,FW,SZ,B) {	\
+	.name = NAME,		\
+	.txd_version = TD,	\
+	.fw_name = FW,		\
+	.jumbo_max = SZ,	\
+	.jumbo_csum_ok = B	\
+}
 
 static const struct {
 	const char *name;
 	enum rtl_tx_desc_version txd_version;
 	const char *fw_name;
+	u16 jumbo_max;
+	bool jumbo_csum_ok;
 } rtl_chip_infos[] = {
 	/* PCI devices. */
 	[RTL_GIGA_MAC_VER_01] =
-		_R("RTL8169",		RTL_TD_0, NULL),
+		_R("RTL8169",		RTL_TD_0, NULL, JUMBO_7K, true),
 	[RTL_GIGA_MAC_VER_02] =
-		_R("RTL8169s",		RTL_TD_0, NULL),
+		_R("RTL8169s",		RTL_TD_0, NULL, JUMBO_7K, true),
 	[RTL_GIGA_MAC_VER_03] =
-		_R("RTL8110s",		RTL_TD_0, NULL),
+		_R("RTL8110s",		RTL_TD_0, NULL, JUMBO_7K, true),
 	[RTL_GIGA_MAC_VER_04] =
-		_R("RTL8169sb/8110sb",	RTL_TD_0, NULL),
+		_R("RTL8169sb/8110sb",	RTL_TD_0, NULL, JUMBO_7K, true),
 	[RTL_GIGA_MAC_VER_05] =
-		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL),
+		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K, true),
 	[RTL_GIGA_MAC_VER_06] =
-		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL),
+		_R("RTL8169sc/8110sc",	RTL_TD_0, NULL, JUMBO_7K, true),
 	/* PCI-E devices. */
 	[RTL_GIGA_MAC_VER_07] =
-		_R("RTL8102e",		RTL_TD_1, NULL),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_08] =
-		_R("RTL8102e",		RTL_TD_1, NULL),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_09] =
-		_R("RTL8102e",		RTL_TD_1, NULL),
+		_R("RTL8102e",		RTL_TD_1, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_10] =
-		_R("RTL8101e",		RTL_TD_0, NULL),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_11] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL),
+		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K, false),
 	[RTL_GIGA_MAC_VER_12] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL),
+		_R("RTL8168b/8111b",	RTL_TD_0, NULL, JUMBO_4K, false),
 	[RTL_GIGA_MAC_VER_13] =
-		_R("RTL8101e",		RTL_TD_0, NULL),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_14] =
-		_R("RTL8100e",		RTL_TD_0, NULL),
+		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_15] =
-		_R("RTL8100e",		RTL_TD_0, NULL),
+		_R("RTL8100e",		RTL_TD_0, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_16] =
-		_R("RTL8101e",		RTL_TD_0, NULL),
+		_R("RTL8101e",		RTL_TD_0, NULL, JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_17] =
-		_R("RTL8168b/8111b",	RTL_TD_0, NULL),
+		_R("RTL8168b/8111b",	RTL_TD_1, NULL, JUMBO_4K, false),
 	[RTL_GIGA_MAC_VER_18] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_19] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_20] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_21] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_22] =
-		_R("RTL8168c/8111c",	RTL_TD_1, NULL),
+		_R("RTL8168c/8111c",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_23] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_24] =
-		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL),
+		_R("RTL8168cp/8111cp",	RTL_TD_1, NULL, JUMBO_6K, false),
 	[RTL_GIGA_MAC_VER_25] =
-		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_1),
+		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_1,
+							JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_26] =
-		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_2),
+		_R("RTL8168d/8111d",	RTL_TD_1, FIRMWARE_8168D_2,
+							JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_27] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_28] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_29] =
-		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1),
+		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1,
+							JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_30] =
-		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1),
+		_R("RTL8105e",		RTL_TD_1, FIRMWARE_8105E_1,
+							JUMBO_1K, true),
 	[RTL_GIGA_MAC_VER_31] =
-		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL),
+		_R("RTL8168dp/8111dp",	RTL_TD_1, NULL, JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_32] =
-		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_1),
+		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_1,
+							JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_33] =
-		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_2),
+		_R("RTL8168e/8111e",	RTL_TD_1, FIRMWARE_8168E_2,
+							JUMBO_9K, false),
 	[RTL_GIGA_MAC_VER_34] =
-		_R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3)
+		_R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3,
+							JUMBO_9K, false),
 };
 #undef _R
 
@@ -457,8 +477,12 @@ enum rtl_register_content {
 	/* Config3 register p.25 */
 	MagicPacket	= (1 << 5),	/* Wake up when receives a Magic Packet */
 	LinkUp		= (1 << 4),	/* Wake up when the cable connection is re-established */
+	Jumbo_En0	= (1 << 2),	/* 8168 only. Reserved in the 8168b */
 	Beacon_en	= (1 << 0),	/* 8168 only. Reserved in the 8168b */
 
+	/* Config4 register */
+	Jumbo_En1	= (1 << 1),	/* 8168 only. Reserved in the 8168b */
+
 	/* Config5 register p.27 */
 	BWF		= (1 << 6),	/* Accept Broadcast wakeup frame */
 	MWF		= (1 << 5),	/* Accept Multicast wakeup frame */
@@ -667,6 +691,11 @@ struct rtl8169_private {
 		void (*up)(struct rtl8169_private *);
 	} pll_power_ops;
 
+	struct jumbo_ops {
+		void (*enable)(struct rtl8169_private *);
+		void (*disable)(struct rtl8169_private *);
+	} jumbo_ops;
+
 	int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv);
 	int (*get_settings)(struct net_device *, struct ethtool_cmd *);
 	void (*phy_reset_enable)(struct rtl8169_private *tp);
@@ -727,6 +756,19 @@ static void rtl8169_down(struct net_device *dev);
 static void rtl8169_rx_clear(struct rtl8169_private *tp);
 static int rtl8169_poll(struct napi_struct *napi, int budget);
 
+static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
+{
+	int cap = pci_pcie_cap(pdev);
+
+	if (cap) {
+		u16 ctl;
+
+		pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl);
+		ctl = (ctl & ~PCI_EXP_DEVCTL_READRQ) | force;
+		pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl);
+	}
+}
+
 static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -1467,9 +1509,15 @@ static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 static u32 rtl8169_fix_features(struct net_device *dev, u32 features)
 {
+	struct rtl8169_private *tp = netdev_priv(dev);
+
 	if (dev->mtu > TD_MSS_MAX)
 		features &= ~NETIF_F_ALL_TSO;
 
+	if (dev->mtu > JUMBO_1K &&
+	    !rtl_chip_infos[tp->mac_version].jumbo_csum_ok)
+		features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM);
+
 	return features;
 }
 
@@ -3438,8 +3486,8 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	r8168_phy_power_up(tp);
 }
 
-static void rtl_pll_power_op(struct rtl8169_private *tp,
-			     void (*op)(struct rtl8169_private *))
+static void rtl_generic_op(struct rtl8169_private *tp,
+			   void (*op)(struct rtl8169_private *))
 {
 	if (op)
 		op(tp);
@@ -3447,12 +3495,12 @@ static void rtl_pll_power_op(struct rtl8169_private *tp,
 
 static void rtl_pll_power_down(struct rtl8169_private *tp)
 {
-	rtl_pll_power_op(tp, tp->pll_power_ops.down);
+	rtl_generic_op(tp, tp->pll_power_ops.down);
 }
 
 static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
-	rtl_pll_power_op(tp, tp->pll_power_ops.up);
+	rtl_generic_op(tp, tp->pll_power_ops.up);
 }
 
 static void __devinit rtl_init_pll_power_ops(struct rtl8169_private *tp)
@@ -3541,6 +3589,150 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 	tp->dirty_tx = tp->dirty_rx = tp->cur_tx = tp->cur_rx = 0;
 }
 
+static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	rtl_generic_op(tp, tp->jumbo_ops.enable);
+}
+
+static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	rtl_generic_op(tp, tp->jumbo_ops.disable);
+}
+
+static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+	RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
+	rtl_tx_performance_tweak(tp->pci_dev, 0x2 << MAX_READ_REQUEST_SHIFT);
+}
+
+static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+	RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
+	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+}
+
+static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+}
+
+static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+}
+
+static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
+
+	RTL_W8(MaxTxPacketSize, 0x3f);
+	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+	RTL_W8(Config4, RTL_R8(Config4) | 0x01);
+	pci_write_config_byte(pdev, 0x79, 0x20);
+}
+
+static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
+
+	RTL_W8(MaxTxPacketSize, 0x0c);
+	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+	RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
+	pci_write_config_byte(pdev, 0x79, 0x50);
+}
+
+static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	rtl_tx_performance_tweak(tp->pci_dev,
+		(0x2 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
+}
+
+static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	rtl_tx_performance_tweak(tp->pci_dev,
+		(0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
+}
+
+static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	r8168b_0_hw_jumbo_enable(tp);
+
+	RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+}
+
+static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	r8168b_0_hw_jumbo_disable(tp);
+
+	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+}
+
+static void __devinit rtl_init_jumbo_ops(struct rtl8169_private *tp)
+{
+	struct jumbo_ops *ops = &tp->jumbo_ops;
+
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_11:
+		ops->disable	= r8168b_0_hw_jumbo_disable;
+		ops->enable	= r8168b_0_hw_jumbo_enable;
+		break;
+	case RTL_GIGA_MAC_VER_12:
+	case RTL_GIGA_MAC_VER_17:
+		ops->disable	= r8168b_1_hw_jumbo_disable;
+		ops->enable	= r8168b_1_hw_jumbo_enable;
+		break;
+	case RTL_GIGA_MAC_VER_18: /* Wild guess. Needs info from Realtek. */
+	case RTL_GIGA_MAC_VER_19:
+	case RTL_GIGA_MAC_VER_20:
+	case RTL_GIGA_MAC_VER_21: /* Wild guess. Needs info from Realtek. */
+	case RTL_GIGA_MAC_VER_22:
+	case RTL_GIGA_MAC_VER_23:
+	case RTL_GIGA_MAC_VER_24:
+	case RTL_GIGA_MAC_VER_25:
+	case RTL_GIGA_MAC_VER_26:
+		ops->disable	= r8168c_hw_jumbo_disable;
+		ops->enable	= r8168c_hw_jumbo_enable;
+		break;
+	case RTL_GIGA_MAC_VER_27:
+	case RTL_GIGA_MAC_VER_28:
+		ops->disable	= r8168dp_hw_jumbo_disable;
+		ops->enable	= r8168dp_hw_jumbo_enable;
+		break;
+	case RTL_GIGA_MAC_VER_31: /* Wild guess. Needs info from Realtek. */
+	case RTL_GIGA_MAC_VER_32:
+	case RTL_GIGA_MAC_VER_33:
+	case RTL_GIGA_MAC_VER_34:
+		ops->disable	= r8168e_hw_jumbo_disable;
+		ops->enable	= r8168e_hw_jumbo_enable;
+		break;
+
+	/*
+	 * No action needed for jumbo frames with 8169.
+	 * No jumbo for 810x at all.
+	 */
+	default:
+		ops->disable	= NULL;
+		ops->enable	= NULL;
+		break;
+	}
+}
+
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -3685,6 +3877,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rtl_init_mdio_ops(tp);
 	rtl_init_pll_power_ops(tp);
+	rtl_init_jumbo_ops(tp);
 
 	rtl8169_print_mac_version(tp);
 
@@ -3766,6 +3959,12 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netif_info(tp, probe, dev, "%s at 0x%lx, %pM, XID %08x IRQ %d\n",
 		   rtl_chip_infos[chipset].name, dev->base_addr, dev->dev_addr,
 		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), dev->irq);
+	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
+		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
+			   "rx/tx checksumming: %s]\n",
+			   rtl_chip_infos[chipset].jumbo_max,
+			   rtl_chip_infos[chipset].jumbo_csum_ok ? "ok" : "ko");
+	}
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_28 ||
@@ -4122,19 +4321,6 @@ static void rtl_hw_start_8169(struct net_device *dev)
 	RTL_W16(IntrMask, tp->intr_event);
 }
 
-static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
-{
-	int cap = pci_pcie_cap(pdev);
-
-	if (cap) {
-		u16 ctl;
-
-		pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl);
-		ctl = (ctl & ~PCI_EXP_DEVCTL_READRQ) | force;
-		pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl);
-	}
-}
-
 static void rtl_csi_access_enable(void __iomem *ioaddr, u32 bits)
 {
 	u32 csi;
@@ -4714,9 +4900,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if (new_mtu < ETH_ZLEN || new_mtu > SafeMtu)
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	if (new_mtu < ETH_ZLEN ||
+	    new_mtu > rtl_chip_infos[tp->mac_version].jumbo_max)
 		return -EINVAL;
 
+	if (new_mtu > ETH_DATA_LEN)
+		rtl_hw_jumbo_enable(tp);
+	else
+		rtl_hw_jumbo_disable(tp);
+
 	dev->mtu = new_mtu;
 	netdev_update_features(dev);
 
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH] Add 802.1ad / QinQ support
From: David Lamparter @ 2011-07-17 21:08 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, David Lamparter
In-Reply-To: <1310936105-3494206-1-git-send-email-equinox@diac24.net>

(work in progress, patch for testing)
---
 include/linux/if_link.h |    1 +
 ip/iplink_vlan.c        |   22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 304c44f..0e6eeec 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -223,6 +223,7 @@ enum {
 	IFLA_VLAN_FLAGS,
 	IFLA_VLAN_EGRESS_QOS,
 	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
 	__IFLA_VLAN_MAX,
 };
 
diff --git a/ip/iplink_vlan.c b/ip/iplink_vlan.c
index 223feb3..f5e5a5f 100644
--- a/ip/iplink_vlan.c
+++ b/ip/iplink_vlan.c
@@ -21,7 +21,7 @@
 static void explain(void)
 {
 	fprintf(stderr,
-		"Usage: ... vlan id VLANID [ FLAG-LIST ]\n"
+		"Usage: ... vlan id VLANID [ protocol ENCAPSULATION ] [ FLAG-LIST ]\n"
 		"                          [ ingress-qos-map QOS-MAP ] [ egress-qos-map QOS-MAP ]\n"
 		"\n"
 		"VLANID := 0-4095\n"
@@ -30,6 +30,7 @@ static void explain(void)
 		"        [ loose_binding { on | off } ]\n"
 		"QOS-MAP := [ QOS-MAP ] QOS-MAPPING\n"
 		"QOS-MAPPING := FROM:TO\n"
+		"ENCAPSULATION := 802.1Q | 802.1ad | 9100 | 9200 | 9300\n"
 	);
 }
 
@@ -77,7 +78,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv,
 			  struct nlmsghdr *n)
 {
 	struct ifla_vlan_flags flags = { 0 };
-	__u16 id;
+	__u16 id, proto;
 
 	while (argc > 0) {
 		if (matches(*argv, "id") == 0) {
@@ -85,6 +86,21 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv,
 			if (get_u16(&id, *argv, 0))
 				invarg("id is invalid", *argv);
 			addattr_l(n, 1024, IFLA_VLAN_ID, &id, 2);
+		} else if (matches(*argv, "protocol") == 0) {
+			NEXT_ARG();
+			if (strcmp(*argv, "802.1Q") == 0)
+				proto = 0x8100;
+			else if (strcmp(*argv, "802.1ad") == 0)
+				proto = 0x88a8;
+			else if (strcmp(*argv, "9100") == 0)
+				proto = 0x9100;
+			else if (strcmp(*argv, "9200") == 0)
+				proto = 0x9200;
+			else if (strcmp(*argv, "9300") == 0)
+				proto = 0x9300;
+			else
+				invarg("protocol is invalid", *argv);
+			addattr_l(n, 1024, IFLA_VLAN_PROTOCOL, &proto, 2);
 		} else if (matches(*argv, "reorder_hdr") == 0) {
 			NEXT_ARG();
 			flags.mask |= VLAN_FLAG_REORDER_HDR;
@@ -183,6 +199,8 @@ static void vlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 	    RTA_PAYLOAD(tb[IFLA_VLAN_ID]) < sizeof(__u16))
 		return;
 
+	if (tb[IFLA_VLAN_PROTOCOL])
+		fprintf(f, "protocol 0x%04x ", *(__u16 *)RTA_DATA(tb[IFLA_VLAN_PROTOCOL]));
 	fprintf(f, "id %u ", *(__u16 *)RTA_DATA(tb[IFLA_VLAN_ID]));
 
 	if (tb[IFLA_VLAN_FLAGS]) {
-- 
1.7.5.3


^ permalink raw reply related

* Re: iproute2: Add 802.1ad / QinQ support
From: David Lamparter @ 2011-07-17 21:11 UTC (permalink / raw)
  To: David Lamparter; +Cc: netdev, Patrick McHardy
In-Reply-To: <1310936903-26251-1-git-send-email-equinox@diac24.net>

probably should've put "iproute2" in the subject line... sorry.

-David

On Sun, Jul 17, 2011 at 11:08:23PM +0200, David Lamparter wrote:
> +		} else if (matches(*argv, "protocol") == 0) {
> +			NEXT_ARG();
> +			if (strcmp(*argv, "802.1Q") == 0)
> +				proto = 0x8100;
> +			else if (strcmp(*argv, "802.1ad") == 0)
> +				proto = 0x88a8;
> +			else if (strcmp(*argv, "9100") == 0)
> +				proto = 0x9100;
> +			else if (strcmp(*argv, "9200") == 0)
> +				proto = 0x9200;
> +			else if (strcmp(*argv, "9300") == 0)
> +				proto = 0x9300;
> +			else
> +				invarg("protocol is invalid", *argv);
> +			addattr_l(n, 1024, IFLA_VLAN_PROTOCOL, &proto, 2);

^ permalink raw reply

* [PATCH 0/2] ath6kl: more neatening
From: Joe Perches @ 2011-07-17 21:38 UTC (permalink / raw)
  To: Kalle Valo; +Cc: netdev, gregkh, linux-wireless, linux-kernel, devel
In-Reply-To: <4E2349D4.4000204@qca.qualcomm.com>

> Great, thanks. I have now applied your patches to ath6kl-cleanup tree
> and they will be included in patchset v3.

Here's a couple of more.

Joe Perches (2):
  ath6kl: Remove __func__ uses from ath6kl_dbg
  ath6kl: Remove trailing unpaired close paren from ath6kl_err uses

 drivers/net/wireless/ath/ath6kl/bmi.c      |    4 +-
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  101 ++++++++++++---------------
 drivers/net/wireless/ath/ath6kl/debug.c    |   20 +++---
 drivers/net/wireless/ath/ath6kl/debug.h    |   27 ++++----
 drivers/net/wireless/ath/ath6kl/htc.c      |   11 ++--
 drivers/net/wireless/ath/ath6kl/init.c     |    8 +-
 drivers/net/wireless/ath/ath6kl/main.c     |   13 ++--
 drivers/net/wireless/ath/ath6kl/sdio.c     |   12 ++--
 drivers/net/wireless/ath/ath6kl/txrx.c     |   30 ++++-----
 drivers/net/wireless/ath/ath6kl/wmi.c      |    8 +--
 10 files changed, 107 insertions(+), 127 deletions(-)

-- 
1.7.6.131.g99019

^ permalink raw reply

* [PATCH 2/2] ath6kl: Remove trailing unpaired close paren from ath6kl_err uses
From: Joe Perches @ 2011-07-17 21:38 UTC (permalink / raw)
  To: Kalle Valo, linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: devel-tBiZLqfeLfOHmIFyCCdPziST3g8Odh+X, gregkh-l3A5Bk7waGM,
	error27-Re5JQEeQqe8AvxtiuMwx3w,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, John W. Linville
In-Reply-To: <cover.1310938421.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>

Unpaired parentheses are unsightly.

Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
---
 drivers/net/wireless/ath/ath6kl/sdio.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c
index c58a107..e399b17 100644
--- a/drivers/net/wireless/ath/ath6kl/sdio.c
+++ b/drivers/net/wireless/ath/ath6kl/sdio.c
@@ -511,7 +511,7 @@ static int ath6kl_sdio_power_on(struct ath6kl_sdio *ar_sdio)
 
 	ret = sdio_enable_func(func);
 	if (ret) {
-		ath6kl_err("Unable to enable sdio func: %d)\n", ret);
+		ath6kl_err("Unable to enable sdio func: %d\n", ret);
 		sdio_release_host(func);
 		return ret;
 	}
@@ -760,7 +760,7 @@ static int ath6kl_sdio_probe(struct sdio_func *func,
 
 	ret = sdio_set_block_size(func, HIF_MBOX_BLOCK_SIZE);
 	if (ret) {
-		ath6kl_err("Set sdio block size %d failed: %d)\n",
+		ath6kl_err("Set sdio block size %d failed: %d\n",
 			   HIF_MBOX_BLOCK_SIZE, ret);
 		sdio_release_host(func);
 		goto err_off;
-- 
1.7.6.131.g99019

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 1/2] ath6kl: Remove __func__ uses from ath6kl_dbg
From: Joe Perches @ 2011-07-17 21:38 UTC (permalink / raw)
  To: Kalle Valo, linux-kernel
  Cc: devel, gregkh, error27, linux-wireless, netdev, John W. Linville
In-Reply-To: <cover.1310938421.git.joe@perches.com>

Make the ath6kl_dbg uses consistly _not_ use __func__.
Add __func__ to #define ath6kl_dbg.

Remove __func__ from single ath6kl_err.

Signed-off-by: Joe Perches <joe@perches.com>
---
 drivers/net/wireless/ath/ath6kl/bmi.c      |    4 +-
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  101 ++++++++++++---------------
 drivers/net/wireless/ath/ath6kl/debug.c    |   20 +++---
 drivers/net/wireless/ath/ath6kl/debug.h    |   27 ++++----
 drivers/net/wireless/ath/ath6kl/htc.c      |   11 ++--
 drivers/net/wireless/ath/ath6kl/init.c     |    8 +-
 drivers/net/wireless/ath/ath6kl/main.c     |   13 ++--
 drivers/net/wireless/ath/ath6kl/sdio.c     |    8 +-
 drivers/net/wireless/ath/ath6kl/txrx.c     |   30 ++++-----
 drivers/net/wireless/ath/ath6kl/wmi.c      |    8 +--
 10 files changed, 105 insertions(+), 125 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c
index 8467669..22dd247 100644
--- a/drivers/net/wireless/ath/ath6kl/bmi.c
+++ b/drivers/net/wireless/ath/ath6kl/bmi.c
@@ -407,7 +407,7 @@ int ath6kl_bmi_execute(struct ath6kl *ar, u32 addr, u32 *param)
 	}
 	memset(ar->bmi.cmd_buf, 0, size);
 
-	ath6kl_dbg(ATH6KL_DBG_BMI, "bmi execute: addr: 0x%x, param: %d)\n",
+	ath6kl_dbg(ATH6KL_DBG_BMI, "bmi execute: addr: 0x%x, param: %d\n",
 		   addr, *param);
 
 	offset = 0;
@@ -575,7 +575,7 @@ int ath6kl_bmi_lz_data(struct ath6kl *ar, u8 *buf, u32 len)
 	}
 	memset(ar->bmi.cmd_buf, 0, size);
 
-	ath6kl_dbg(ATH6KL_DBG_BMI, "bmi send LZ data: len: %d)\n",
+	ath6kl_dbg(ATH6KL_DBG_BMI, "bmi send LZ data: len: %d\n",
 		   len);
 
 	len_remain = len;
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 71515bb..6c7970f 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -118,7 +118,7 @@ static struct ieee80211_supported_band ath6kl_band_5ghz = {
 static int ath6kl_set_wpa_version(struct ath6kl *ar,
 				  enum nl80211_wpa_versions wpa_version)
 {
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: %u\n", __func__, wpa_version);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "wpa version: %u\n", wpa_version);
 
 	if (!wpa_version) {
 		ar->auth_mode = NONE_AUTH;
@@ -127,7 +127,7 @@ static int ath6kl_set_wpa_version(struct ath6kl *ar,
 	} else if (wpa_version & NL80211_WPA_VERSION_2) {
 		ar->auth_mode = WPA2_AUTH;
 	} else {
-		ath6kl_err("%s: %u not supported\n", __func__, wpa_version);
+		ath6kl_err("wpa version %u not supported\n", wpa_version);
 		return -ENOTSUPP;
 	}
 
@@ -138,7 +138,7 @@ static int ath6kl_set_auth_type(struct ath6kl *ar,
 				enum nl80211_auth_type auth_type)
 {
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: 0x%x\n", __func__, auth_type);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "auth type: 0x%x\n", auth_type);
 
 	switch (auth_type) {
 	case NL80211_AUTHTYPE_OPEN_SYSTEM:
@@ -158,7 +158,7 @@ static int ath6kl_set_auth_type(struct ath6kl *ar,
 
 	default:
 		ar->dot11_auth_mode = OPEN_AUTH;
-		ath6kl_err("%s: 0x%x not spported\n", __func__, auth_type);
+		ath6kl_err("auth type 0x%x not supported\n", auth_type);
 		return -ENOTSUPP;
 	}
 
@@ -170,8 +170,8 @@ static int ath6kl_set_cipher(struct ath6kl *ar, u32 cipher, bool ucast)
 	u8 *ar_cipher = ucast ? &ar->prwise_crypto : &ar->grp_crypto;
 	u8 *ar_cipher_len = ucast ? &ar->prwise_crypto_len : &ar->grp_crpto_len;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: cipher 0x%x, ucast %u\n",
-		   __func__, cipher, ucast);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "cipher 0x%x, ucast %u\n",
+		   cipher, ucast);
 
 	switch (cipher) {
 	case 0:
@@ -205,7 +205,7 @@ static int ath6kl_set_cipher(struct ath6kl *ar, u32 cipher, bool ucast)
 
 static void ath6kl_set_key_mgmt(struct ath6kl *ar, u32 key_mgmt)
 {
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: 0x%x\n", __func__, key_mgmt);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "set key: 0x%x\n", key_mgmt);
 
 	if (key_mgmt == WLAN_AKM_SUITE_PSK) {
 		if (ar->auth_mode == WPA_AUTH)
@@ -365,10 +365,9 @@ static int ath6kl_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev,
 	ar->nw_type = ar->next_mode;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: connect called with authmode %d dot11 auth %d"
+		   "connect called with authmode %d dot11 auth %d"
 		   " PW crypto %d PW crypto len %d GRP crypto %d"
 		   " GRP crypto len %d channel hint %u\n",
-		   __func__,
 		   ar->auth_mode, ar->dot11_auth_mode, ar->prwise_crypto,
 		   ar->prwise_crypto_len, ar->grp_crypto,
 		   ar->grp_crpto_len, ar->ch_hint);
@@ -447,7 +446,7 @@ void ath6kl_cfg80211_connect_event(struct ath6kl *ar, u16 channel,
 	if (nw_type & ADHOC_NETWORK) {
 		if (ar->wdev->iftype != NL80211_IFTYPE_ADHOC) {
 			ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-				   "%s: ath6k not in ibss mode\n", __func__);
+				   "ath6k not in ibss mode\n");
 			return;
 		}
 	}
@@ -455,7 +454,7 @@ void ath6kl_cfg80211_connect_event(struct ath6kl *ar, u16 channel,
 	if (nw_type & INFRA_NETWORK) {
 		if (ar->wdev->iftype != NL80211_IFTYPE_STATION) {
 			ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-				   "%s: ath6k not in station mode\n", __func__);
+				   "ath6k not in station mode\n");
 			return;
 		}
 	}
@@ -545,9 +544,8 @@ void ath6kl_cfg80211_connect_event(struct ath6kl *ar, u16 channel,
 	ibss_ch = ieee80211_get_channel(ar->wdev->wiphy, (int)channel);
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: inform bss with bssid %pM channel %d beacon_intvl %d capability 0x%x\n",
-		   __func__, mgmt->bssid, ibss_ch->hw_value,
-		   beacon_intvl, capability);
+		   "inform bss with bssid %pM channel %d beacon_intvl %d capability 0x%x\n",
+		   mgmt->bssid, ibss_ch->hw_value, beacon_intvl, capability);
 
 	bss = cfg80211_inform_bss_frame(ar->wdev->wiphy,
 					ibss_ch, mgmt,
@@ -580,8 +578,7 @@ static int ath6kl_cfg80211_disconnect(struct wiphy *wiphy,
 {
 	struct ath6kl *ar = (struct ath6kl *)ath6kl_priv(dev);
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: reason=%u\n", __func__,
-		   reason_code);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "disconnect reason=%u\n", reason_code);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
@@ -624,7 +621,7 @@ void ath6kl_cfg80211_disconnect_event(struct ath6kl *ar, u8 reason,
 	if (ar->nw_type & ADHOC_NETWORK) {
 		if (ar->wdev->iftype != NL80211_IFTYPE_ADHOC) {
 			ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-				   "%s: ath6k not in ibss mode\n", __func__);
+				   "ath6k not in ibss mode\n");
 			return;
 		}
 		memset(bssid, 0, ETH_ALEN);
@@ -635,7 +632,7 @@ void ath6kl_cfg80211_disconnect_event(struct ath6kl *ar, u8 reason,
 	if (ar->nw_type & INFRA_NETWORK) {
 		if (ar->wdev->iftype != NL80211_IFTYPE_STATION) {
 			ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-				   "%s: ath6k not in station mode\n", __func__);
+				   "ath6k not in station mode\n");
 			return;
 		}
 	}
@@ -767,7 +764,7 @@ static void ath6kl_cfg80211_scan_node(void *arg, struct bss *ni)
 	signal = ni->ni_snr * 100;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: bssid %pM ch %d freq %d size %d\n", __func__,
+		   "bssid %pM ch %d freq %d size %d\n",
 		   mgmt->bssid, channel->hw_value, freq, size);
 	cfg80211_inform_bss_frame(wiphy, channel, mgmt,
 				  size, signal, GFP_KERNEL);
@@ -825,7 +822,7 @@ static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev,
 void ath6kl_cfg80211_scan_complete_event(struct ath6kl *ar, int status)
 {
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: status %d\n", __func__, status);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "scan complete status %d\n", status);
 
 	if (ar->scan_req) {
 		/* Translate data to cfg80211 mgmt format */
@@ -864,8 +861,7 @@ static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev,
 		return -EIO;
 
 	if (key_index < WMI_MIN_KEY_INDEX || key_index > WMI_MAX_KEY_INDEX) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-			   "%s: key index %d out of bounds\n", __func__,
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d out of bounds\n",
 			   key_index);
 		return -ENOENT;
 	}
@@ -914,9 +910,8 @@ static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev,
 		del_timer(&ar->disconnect_timer);
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: index %d, key_len %d, key_type 0x%x, key_usage 0x%x, seq_len %d\n",
-		   __func__, key_index, key->key_len, key_type,
-		   key_usage, key->seq_len);
+		   "index %d, key_len %d, key_type 0x%x, key_usage 0x%x, seq_len %d\n",
+		   key_index, key->key_len, key_type, key_usage, key->seq_len);
 
 	ar->def_txkey_index = key_index;
 	status = ath6kl_wmi_addkey_cmd(ar->wmi, ar->def_txkey_index,
@@ -936,21 +931,20 @@ static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev,
 {
 	struct ath6kl *ar = (struct ath6kl *)ath6kl_priv(ndev);
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "del key index %d\n", key_index);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
 
 	if (key_index < WMI_MIN_KEY_INDEX || key_index > WMI_MAX_KEY_INDEX) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-			   "%s: key index %d out of bounds\n", __func__,
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d out of bounds\n",
 			   key_index);
 		return -ENOENT;
 	}
 
 	if (!ar->keys[key_index].key_len) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-			   "%s: index %d is empty\n", __func__, key_index);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d is empty\n",
+			   key_index);
 		return 0;
 	}
 
@@ -969,14 +963,13 @@ static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev,
 	struct ath6kl_key *key = NULL;
 	struct key_params params;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d\n", key_index);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
 
 	if (key_index < WMI_MIN_KEY_INDEX || key_index > WMI_MAX_KEY_INDEX) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-			   "%s: key index %d out of bounds\n", __func__,
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d out of bounds\n",
 			   key_index);
 		return -ENOENT;
 	}
@@ -1004,21 +997,20 @@ static int ath6kl_cfg80211_set_default_key(struct wiphy *wiphy,
 	int status = 0;
 	u8 key_usage;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "default key index %d\n", key_index);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
 
 	if (key_index < WMI_MIN_KEY_INDEX || key_index > WMI_MAX_KEY_INDEX) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-			   "%s: key index %d out of bounds\n",
-			   __func__, key_index);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "key index %d out of bounds\n",
+			   key_index);
 		return -ENOENT;
 	}
 
 	if (!ar->keys[key_index].key_len) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: invalid key index %d\n",
-			   __func__, key_index);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "invalid key index %d\n",
+			   key_index);
 		return -EINVAL;
 	}
 
@@ -1045,20 +1037,20 @@ static int ath6kl_cfg80211_set_default_mgmt_key(struct wiphy *wiphy,
 {
 	struct ath6kl *ar = (struct ath6kl *)ath6kl_priv(ndev);
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "mgmt index %d\n", key_index);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: not supported\n", __func__);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "mgnt index not supported\n");
 	return -ENOTSUPP;
 }
 
 void ath6kl_cfg80211_tkip_micerr_event(struct ath6kl *ar, u8 keyid,
 				       bool ismcast)
 {
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: keyid %d, ismcast %d\n", __func__, keyid, ismcast);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "keyid %d, ismcast %d\n",
+		   keyid, ismcast);
 
 	cfg80211_michael_mic_failure(ar->net_dev, ar->bssid,
 				     (ismcast ? NL80211_KEYTYPE_GROUP :
@@ -1071,8 +1063,7 @@ static int ath6kl_cfg80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy);
 	int ret;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: changed 0x%x\n", __func__,
-		   changed);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "wiphy changed 0x%x\n", changed);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
@@ -1099,8 +1090,7 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy,
 	struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy);
 	u8 ath6kl_dbm;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type 0x%x, dbm %d\n", __func__,
-		   type, dbm);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "type 0x%x, dbm %d\n", type, dbm);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
@@ -1112,8 +1102,8 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy,
 		ar->tx_pwr = ath6kl_dbm = dbm;
 		break;
 	default:
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type 0x%x not supported\n",
-			   __func__, type);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "type 0x%x not supported\n",
+			   type);
 		return -EOPNOTSUPP;
 	}
 
@@ -1157,17 +1147,17 @@ static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy,
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct wmi_power_mode_cmd mode;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: pmgmt %d, timeout %d\n",
-		   __func__, pmgmt, timeout);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "pmgmt %d, timeout %d\n",
+		   pmgmt, timeout);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
 
 	if (pmgmt) {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: max perf\n", __func__);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "max perf\n");
 		mode.pwr_mode = REC_POWER;
 	} else {
-		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: rec power\n", __func__);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "rec power\n");
 		mode.pwr_mode = MAX_PERF_POWER;
 	}
 
@@ -1187,7 +1177,7 @@ static int ath6kl_cfg80211_change_iface(struct wiphy *wiphy,
 	struct ath6kl *ar = ath6kl_priv(ndev);
 	struct wireless_dev *wdev = ar->wdev;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type %u\n", __func__, type);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "iface type %u\n", type);
 
 	if (!ath6kl_cfg80211_ready(ar))
 		return -EIO;
@@ -1258,10 +1248,9 @@ static int ath6kl_cfg80211_join_ibss(struct wiphy *wiphy,
 	ar->nw_type = ar->next_mode;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
-		   "%s: connect called with authmode %d dot11 auth %d"
+		   "connect called with authmode %d dot11 auth %d"
 		   " PW crypto %d PW crypto len %d GRP crypto %d"
 		   " GRP crypto len %d channel hint %u\n",
-		   __func__,
 		   ar->auth_mode, ar->dot11_auth_mode, ar->prwise_crypto,
 		   ar->prwise_crypto_len, ar->grp_crypto,
 		   ar->grp_crpto_len, ar->ch_hint);
diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index 316136c..b30d583 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -41,33 +41,33 @@ void ath6kl_dump_registers(struct ath6kl_device *dev,
 			   struct ath6kl_irq_enable_reg *irq_enable_reg)
 {
 
-	ath6kl_dbg(ATH6KL_DBG_ANY, ("<------- Register Table -------->\n"));
+	ath6kl_dbg(ATH6KL_DBG_ANY, "<------- Register Table -------->\n");
 
 	if (irq_proc_reg != NULL) {
 		ath6kl_dbg(ATH6KL_DBG_ANY,
-			"Host Int status:           0x%x\n",
-			irq_proc_reg->host_int_status);
+			   "Host Int status:           0x%x\n",
+			   irq_proc_reg->host_int_status);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "CPU Int status:            0x%x\n",
-			irq_proc_reg->cpu_int_status);
+			   irq_proc_reg->cpu_int_status);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Error Int status:          0x%x\n",
-			irq_proc_reg->error_int_status);
+			   irq_proc_reg->error_int_status);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Counter Int status:        0x%x\n",
-			irq_proc_reg->counter_int_status);
+			   irq_proc_reg->counter_int_status);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Mbox Frame:                0x%x\n",
-			irq_proc_reg->mbox_frame);
+			   irq_proc_reg->mbox_frame);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Rx Lookahead Valid:        0x%x\n",
-			irq_proc_reg->rx_lkahd_valid);
+			   irq_proc_reg->rx_lkahd_valid);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Rx Lookahead 0:            0x%x\n",
-			irq_proc_reg->rx_lkahd[0]);
+			   irq_proc_reg->rx_lkahd[0]);
 		ath6kl_dbg(ATH6KL_DBG_ANY,
 			   "Rx Lookahead 1:            0x%x\n",
-			irq_proc_reg->rx_lkahd[1]);
+			   irq_proc_reg->rx_lkahd[1]);
 
 		if (dev->ar->mbox_info.gmbox_addr != 0) {
 			/*
diff --git a/drivers/net/wireless/ath/ath6kl/debug.h b/drivers/net/wireless/ath/ath6kl/debug.h
index 2e60588..ece20c9 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.h
+++ b/drivers/net/wireless/ath/ath6kl/debug.h
@@ -42,7 +42,7 @@ extern unsigned int debug_mask;
 extern int ath6kl_printk(const char *level, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 
-#define ath6kl_info(fmt, ...)				\
+#define ath6kl_info(fmt, ...)					\
 	ath6kl_printk(KERN_INFO, fmt, ##__VA_ARGS__)
 #define ath6kl_err(fmt, ...)					\
 	ath6kl_printk(KERN_ERR, fmt, ##__VA_ARGS__)
@@ -52,16 +52,17 @@ extern int ath6kl_printk(const char *level, const char *fmt, ...)
 #define AR_DBG_LVL_CHECK(mask)	(debug_mask & mask)
 
 #ifdef CONFIG_ATH6KL_DEBUG
-#define ath6kl_dbg(mask, fmt, ...)					\
-	({								\
-	 int rtn;							\
-	 if (debug_mask & mask)						\
-		rtn = ath6kl_printk(KERN_DEBUG, fmt, ##__VA_ARGS__);	\
-	 else								\
-		rtn = 0;						\
-									\
-	 rtn;								\
-	 })
+#define ath6kl_dbg(mask, fmt, ...)				\
+({								\
+	 int rtn;						\
+	 if (debug_mask & mask)					\
+		 rtn = ath6kl_printk(KERN_DEBUG, "%s: " fmt,	\
+				     __func__, ##__VA_ARGS__);	\
+	 else							\
+		 rtn = 0;					\
+								\
+	 rtn;							\
+})
 
 static inline void ath6kl_dbg_dump(enum ATH6K_DEBUG_MASK mask,
 				   const char *msg, const void *buf,
@@ -78,8 +79,8 @@ void ath6kl_dump_registers(struct ath6kl_device *dev,
 			   struct ath6kl_irq_enable_reg *irq_en_reg);
 void dump_cred_dist_stats(struct htc_target *target);
 #else
-static inline int ath6kl_dbg(enum ATH6K_DEBUG_MASK dbg_mask,
-			     const char *fmt, ...)
+static inline __attribute__ ((format (printf, 2, 3)))
+int ath6kl_dbg(enum ATH6K_DEBUG_MASK dbg_mask, const char *fmt, ...)
 {
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath6kl/htc.c b/drivers/net/wireless/ath/ath6kl/htc.c
index 95c47bb..ab651fb 100644
--- a/drivers/net/wireless/ath/ath6kl/htc.c
+++ b/drivers/net/wireless/ath/ath6kl/htc.c
@@ -175,16 +175,15 @@ static int htc_issue_send(struct htc_target *target, struct htc_packet *packet)
 
 	send_len = packet->act_len + HTC_HDR_LENGTH;
 
-	ath6kl_dbg(ATH6KL_DBG_HTC_SEND, "%s: transmit len : %d (%s)\n",
-		   __func__, send_len, sync ? "sync" : "async");
+	ath6kl_dbg(ATH6KL_DBG_HTC_SEND, "transmit len : %d (%s)\n",
+		   send_len, sync ? "sync" : "async");
 
 	padded_len = CALC_TXRX_PADDED_LEN(target->dev, send_len);
 
 	ath6kl_dbg(ATH6KL_DBG_HTC_SEND,
-		"DevSendPacket, padded len: %d mbox:0x%X (mode:%s)\n",
-		padded_len,
-		target->dev->ar->mbox_info.htc_addr,
-		sync ? "sync" : "async");
+		   "DevSendPacket, padded len: %d mbox:0x%X (mode:%s)\n",
+		   padded_len, target->dev->ar->mbox_info.htc_addr,
+		   sync ? "sync" : "async");
 
 	if (sync) {
 		status = hif_read_write_sync(target->dev->ar,
diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c
index fe61871..a8f4452 100644
--- a/drivers/net/wireless/ath/ath6kl/init.c
+++ b/drivers/net/wireless/ath/ath6kl/init.c
@@ -1060,7 +1060,7 @@ static int ath6kl_init(struct net_device *dev)
 		goto ath6kl_init_done;
 	}
 
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: got wmi @ 0x%p.\n", __func__, ar->wmi);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "got wmi @ 0x%p\n", ar->wmi);
 
 	/*
 	 * The reason we have to wait for the target here is that the
@@ -1122,7 +1122,7 @@ static int ath6kl_init(struct net_device *dev)
 		goto err_htc_stop;
 	}
 
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: wmi is ready\n", __func__);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "wmi is ready\n");
 
 	/* communicate the wmi protocol verision to the target */
 	if ((ath6kl_set_host_app_area(ar)) != 0)
@@ -1208,8 +1208,8 @@ int ath6kl_core_init(struct ath6kl *ar)
 
 	set_bit(NETDEV_REGISTERED, &ar->flag);
 
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: name=%s dev=0x%p, ar=0x%p\n",
-			__func__, ar->net_dev->name, ar->net_dev, ar);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "name=%s dev=0x%p, ar=0x%p\n",
+		   ar->net_dev->name, ar->net_dev, ar);
 
 	return ret;
 
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index f325a23..6ef26a3 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -359,14 +359,12 @@ void ath6kl_stop_endpoint(struct net_device *dev, bool keep_profile,
 		ar->user_key_ctrl = 0;
 
 	} else {
-		ath6kl_dbg(ATH6KL_DBG_TRC,
-			   "%s: wmi is not ready 0x%p 0x%p\n",
-			   __func__, ar, ar->wmi);
+		ath6kl_dbg(ATH6KL_DBG_TRC, "wmi is not ready 0x%p 0x%p\n",
+			   ar, ar->wmi);
 
 		/* Shut down WMI if we have started it */
 		if (test_bit(WMI_ENABLED, &ar->flag)) {
-			ath6kl_dbg(ATH6KL_DBG_TRC,
-				   "%s: shut down wmi\n", __func__);
+			ath6kl_dbg(ATH6KL_DBG_TRC, "shut down wmi\n");
 			ath6kl_wmi_shutdown(ar->wmi);
 			clear_bit(WMI_ENABLED, &ar->flag);
 			ar->wmi = NULL;
@@ -374,7 +372,7 @@ void ath6kl_stop_endpoint(struct net_device *dev, bool keep_profile,
 	}
 
 	if (ar->htc_target) {
-		ath6kl_dbg(ATH6KL_DBG_TRC, "%s: shut down htc\n", __func__);
+		ath6kl_dbg(ATH6KL_DBG_TRC, "shut down htc\n");
 		htc_stop(ar->htc_target);
 	}
 
@@ -801,8 +799,7 @@ void ath6kl_ready_event(void *devt, u8 *datap, u32 sw_ver, u32 abi_ver)
 	struct net_device *dev = ar->net_dev;
 
 	memcpy(dev->dev_addr, datap, ETH_ALEN);
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: mac addr = %pM\n",
-		   __func__, dev->dev_addr);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "mac addr = %pM\n", dev->dev_addr);
 
 	ar->version.wlan_ver = sw_ver;
 	ar->version.abi_ver = abi_ver;
diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c
index b38732a..c58a107 100644
--- a/drivers/net/wireless/ath/ath6kl/sdio.c
+++ b/drivers/net/wireless/ath/ath6kl/sdio.c
@@ -145,7 +145,7 @@ static struct bus_request *ath6kl_sdio_alloc_busreq(struct ath6kl_sdio *ar_sdio)
 	list_del(&bus_req->list);
 
 	spin_unlock_irqrestore(&ar_sdio->lock, flag);
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: bus request 0x%p\n", __func__, bus_req);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "bus request 0x%p\n", bus_req);
 
 	return bus_req;
 }
@@ -155,7 +155,7 @@ static void ath6kl_sdio_free_bus_req(struct ath6kl_sdio *ar_sdio,
 {
 	unsigned long flag;
 
-	ath6kl_dbg(ATH6KL_DBG_TRC, "%s: bus request 0x%p\n", __func__, bus_req);
+	ath6kl_dbg(ATH6KL_DBG_TRC, "bus request 0x%p\n", bus_req);
 
 	spin_lock_irqsave(&ar_sdio->lock, flag);
 	list_add_tail(&bus_req->list, &ar_sdio->bus_req_freeq);
@@ -683,8 +683,8 @@ static int ath6kl_sdio_probe(struct sdio_func *func,
 	int count;
 
 	ath6kl_dbg(ATH6KL_DBG_TRC,
-		   "%s: func: 0x%X, vendor id: 0x%X, dev id: 0x%X, block size: 0x%X/0x%X\n",
-		   __func__, func->num, func->vendor,
+		   "func: 0x%X, vendor id: 0x%X, dev id: 0x%X, block size: 0x%X/0x%X\n",
+		   func->num, func->vendor,
 		   func->device, func->max_blksize, func->cur_blksize);
 
 	ar_sdio = kzalloc(sizeof(struct ath6kl_sdio), GFP_KERNEL);
diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c
index 615b46d..e2e3673 100644
--- a/drivers/net/wireless/ath/ath6kl/txrx.c
+++ b/drivers/net/wireless/ath/ath6kl/txrx.c
@@ -184,8 +184,7 @@ int ath6kl_control_tx(void *devt, struct sk_buff *skb,
 
 	spin_lock_bh(&ar->lock);
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_TX,
-		   "%s: skb=0x%p, len=0x%x eid =%d\n", __func__,
+	ath6kl_dbg(ATH6KL_DBG_WLAN_TX, "skb=0x%p, len=0x%x eid=%d\n",
 		   skb, skb->len, eid);
 
 	if (test_bit(WMI_CTRL_EP_FULL, &ar->flag) && (eid == ar->ctrl_ep)) {
@@ -242,8 +241,7 @@ int ath6kl_data_tx(struct sk_buff *skb, struct net_device *dev)
 	struct wmi_tx_meta_v2 meta_v2;
 	int ret;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_TX,
-		   "%s: skb=0x%p, data=0x%p, len=0x%x\n", __func__,
+	ath6kl_dbg(ATH6KL_DBG_WLAN_TX, "skb=0x%p, data=0x%p, len=0x%x\n",
 		   skb, skb->data, skb->len);
 
 	/* If target is not associated */
@@ -563,13 +561,13 @@ void ath6kl_tx_complete(void *context, struct list_head *packet_queue)
 			if (status != -ENOSPC)
 				ath6kl_err("tx error, status: 0x%x\n", status);
 			ath6kl_dbg(ATH6KL_DBG_WLAN_TX,
-				   "%s: skb=0x%p data=0x%p len=0x%x eid=%d %s\n",
-				   __func__, skb, packet->buf, packet->act_len,
+				   "skb=0x%p data=0x%p len=0x%x eid=%d %s\n",
+				   skb, packet->buf, packet->act_len,
 				   eid, "error!");
 		} else {
 			ath6kl_dbg(ATH6KL_DBG_WLAN_TX,
-				   "%s: skb=0x%p data=0x%p len=0x%x eid=%d %s\n",
-				   __func__, skb, packet->buf, packet->act_len,
+				   "skb=0x%p data=0x%p len=0x%x eid=%d %s\n",
+				   skb, packet->buf, packet->act_len,
 				   eid, "OK");
 
 			flushing = false;
@@ -680,8 +678,8 @@ void ath6kl_rx_refill(struct htc_target *target, enum htc_endpoint_id endpoint)
 	INIT_LIST_HEAD(&queue);
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_RX,
-		   "%s: providing htc with %d buffers at eid=%d\n",
-		   __func__, n_buf_refill, endpoint);
+		   "providing htc with %d buffers at eid=%d\n",
+		   n_buf_refill, endpoint);
 
 	for (rx_buf = 0; rx_buf < n_buf_refill; rx_buf++) {
 		skb = ath6kl_buf_alloc(ATH6KL_BUFFER_SIZE);
@@ -731,8 +729,7 @@ struct htc_packet *ath6kl_alloc_amsdu_rxbuf(struct htc_target *target,
 	struct list_head *pkt_pos;
 	int refill_cnt = 0, depth = 0;
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_RX, "%s: eid=%d, len:%d\n",
-		   __func__, endpoint, len);
+	ath6kl_dbg(ATH6KL_DBG_WLAN_RX, "eid=%d, len:%d\n", endpoint, len);
 
 	if ((len <= ATH6KL_BUFFER_SIZE) ||
 	    (len > ATH6KL_AMSDU_BUFFER_SIZE))
@@ -1025,9 +1022,8 @@ void ath6kl_rx(struct htc_target *target, struct htc_packet *packet)
 	u8 tid;
 
 	ath6kl_dbg(ATH6KL_DBG_WLAN_RX,
-		   "%s: ar=0x%p eid=%d, skb=0x%p, data=0x%p, len=0x%x status:%d",
-		   __func__, ar, ept, skb, packet->buf,
-		   packet->act_len, status);
+		   "ar=0x%p eid=%d, skb=0x%p, data=0x%p, len=0x%x status:%d\n",
+		   ar, ept, skb, packet->buf, packet->act_len, status);
 
 	if (status || !(skb->data + HTC_HDR_LENGTH)) {
 		ar->net_stats.rx_errors++;
@@ -1324,8 +1320,8 @@ void aggr_recv_addba_req_evt(struct ath6kl *ar, u8 tid, u16 seq_no, u8 win_sz)
 	stats = &p_aggr->stat[tid];
 
 	if (win_sz < AGGR_WIN_SZ_MIN || win_sz > AGGR_WIN_SZ_MAX)
-		ath6kl_dbg(ATH6KL_DBG_WLAN_RX, "%s: win_sz %d, tid %d\n",
-			   __func__, win_sz, tid);
+		ath6kl_dbg(ATH6KL_DBG_WLAN_RX, "win_sz %d, tid %d\n",
+			   win_sz, tid);
 
 	if (rxtid->aggr)
 		aggr_delete_tid_state(p_aggr, tid);
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index a52d7d2..29790e5 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -102,8 +102,7 @@ int ath6kl_wmi_dix_2_dot3(struct wmi *wmi, struct sk_buff *skb)
 	type = eth_hdr->h_proto;
 
 	if (!is_ethertype(be16_to_cpu(type))) {
-		ath6kl_dbg(ATH6KL_DBG_WMI,
-			"%s: pkt is already in 802.3 format\n", __func__);
+		ath6kl_dbg(ATH6KL_DBG_WMI, "pkt is already in 802.3 format\n");
 		return 0;
 	}
 
@@ -496,8 +495,7 @@ static int ath6kl_wmi_connect_event_rx(struct wmi *wmi, u8 *datap, int len)
 
 	ev = (struct wmi_connect_event *) datap;
 
-	ath6kl_dbg(ATH6KL_DBG_WMI, "%s: freq %d bssid %pM\n",
-		   __func__, ev->ch, ev->bssid);
+	ath6kl_dbg(ATH6KL_DBG_WMI, "freq %d bssid %pM\n", ev->ch, ev->bssid);
 
 	memcpy(wmi->bssid, ev->bssid, ETH_ALEN);
 
@@ -2545,7 +2543,7 @@ int ath6kl_wmi_control_rx(struct wmi *wmi, struct sk_buff *skb)
 	datap = skb->data;
 	len = skb->len;
 
-	ath6kl_dbg(ATH6KL_DBG_WMI, "%s: wmi id: %d\n", __func__, id);
+	ath6kl_dbg(ATH6KL_DBG_WMI, "wmi id: %d\n", id);
 	ath6kl_dbg_dump(ATH6KL_DBG_RAW_BYTES, "msg payload ", datap, len);
 
 	switch (id) {
-- 
1.7.6.131.g99019


^ permalink raw reply related

* Re: [PATCH 0/2] ath6kl: more neatening
From: Kalle Valo @ 2011-07-17 21:49 UTC (permalink / raw)
  To: Joe Perches; +Cc: devel, gregkh, error27, linux-kernel, linux-wireless, netdev
In-Reply-To: <cover.1310938421.git.joe@perches.com>

On 07/18/2011 12:38 AM, Joe Perches wrote:
>> Great, thanks. I have now applied your patches to ath6kl-cleanup tree
>> and they will be included in patchset v3.
> 
> Here's a couple of more.
> 
> Joe Perches (2):
>   ath6kl: Remove __func__ uses from ath6kl_dbg
>   ath6kl: Remove trailing unpaired close paren from ath6kl_err uses

Thanks again.

I just sent v3. But if I need to create v4 (I hope not) I'll include
your patches. If v3 is ok, I can resend your patches to wireless-testing
so that you don't need to followup.

Kalle

^ permalink raw reply

* Confirm Your $0.00 Copy Of "Winning In The Online Home Business Arena" ($39.97 Value
From: nick @ 2011-07-17 23:48 UTC (permalink / raw)
  To: netdev

Inside Of Your Free 8 Part DVD \\\"Winning
In The Online Home Business Arena\\\"
($39.97 Value)

Go here now:

==> https://daegansmith.infusionsoft.com/go/freedvdbootcamp/a12144

You\\\'ll Discover . . .

What\\\'s Working Now! \\\"How To Get Up To 37
Checks Per Month, Earn Upwards Of
$4,954.55 While You Sleep At Night, And
Recruit WITHOUT Ever Having To Pick Up The Phone\\\"

Here\\\'s where to go:

==> https://daegansmith.infusionsoft.com/go/freedvdbootcamp/a12144

To the top,

Nick

Pick up now while it still costs zilch,
before I start charing for it again.
Make haste - Go here Now:

==> https://daegansmith.infusionsoft.com/go/freedvdbootcamp/a12144

^ permalink raw reply

* [PATCH net-next v3 af-packet 0/2] Enhance af-packet to provide (near zero)lossless packet capture functionality.
From: Chetan Loke @ 2011-07-18  3:26 UTC (permalink / raw)
  To: davem, netdev; +Cc: Chetan Loke

Changes in v3:
1) Stripped __packed__ attribute.			(Dave Miller)
   Replaced with aligned_u64 and padding.
2) Added 'feature_request_word'.
3) Added rx_hash field to the v3-header.		(Chetan L)

Changes in v2:

1) Aligned bdqc members, pr_err to WARN, sob email      (Joe Perches)
2) Added tp_padding                                     (Eric Dumazet)
3) Nuked useless ;) white space                         (Stephen H)
4) Use __u types in headers                             (Ben Hutchings)
5) Added field for creating private area                (Chetan Loke)

This patch attempts to:
1)Improve network capture visibility by increasing packet density
2)Assist in analyzing multiple(aggregated) capture ports.

Benefits:
  B1) ~15-20% reduction in cpu-usage.
  B2) ~20% increase in packet capture rate.
  B3) ~2x  increase in packet density.
  B4) Port aggregation analysis.
  B5) Non static frame size to capture entire packet payload.

With the current af_packet->rx::mmap based approach, the element size
in the block needs to be statically configured. Nothing wrong with this
config/implementation. But the traffic profile cannot be known in advance.
And so it would be nice if that configuration wasn't static. Normally,
one would configure the element-size to be '2048' so that you can atleast
capture the entire 'MTU-size'.But if the traffic profile varies then we
would end up either i)wasting memory or ii) end up getting a sliced frame.
In other words the packet density will be much less in the first case.

Detailed description of the test-setup etc can be viewed at:
http://thread.gmane.org/gmane.linux.kernel/1158216

Chetan Loke (2):

 include/linux/if_packet.h |  125 ++++++
 net/packet/af_packet.c    |  920 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 999 insertions(+), 46 deletions(-)

-- 
1.7.5.2

^ permalink raw reply

* [PATCH net-next v3 af-packet 1/2] Enhance af-packet to provide (near zero)lossless packet capture functionality.
From: Chetan Loke @ 2011-07-18  3:26 UTC (permalink / raw)
  To: davem, netdev; +Cc: Chetan Loke
In-Reply-To: <1310959610-1688-1-git-send-email-loke.chetan@gmail.com>


Signed-off-by: Chetan Loke <loke.chetan@gmail.com>
---
 include/linux/if_packet.h |  125 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 125 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index c148606..19427d7 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -61,6 +61,17 @@ struct tpacket_stats {
 	unsigned int	tp_drops;
 };
 
+struct tpacket_stats_v3 {
+	unsigned int	tp_packets;
+	unsigned int	tp_drops;
+	unsigned int	tp_freeze_q_cnt;
+};
+
+union tpacket_stats_u {
+	struct tpacket_stats stats1;
+	struct tpacket_stats_v3 stats3;
+};
+
 struct tpacket_auxdata {
 	__u32		tp_status;
 	__u32		tp_len;
@@ -78,6 +89,7 @@ struct tpacket_auxdata {
 #define TP_STATUS_LOSING	0x4
 #define TP_STATUS_CSUMNOTREADY	0x8
 #define TP_STATUS_VLAN_VALID   0x10 /* auxdata has valid tp_vlan_tci */
+#define TP_STATUS_BLK_TMO	0x20
 
 /* Tx ring - header status */
 #define TP_STATUS_AVAILABLE	0x0
@@ -85,6 +97,9 @@ struct tpacket_auxdata {
 #define TP_STATUS_SENDING	0x2
 #define TP_STATUS_WRONG_FORMAT	0x4
 
+/* Rx ring - feature request bits */
+#define TP_FT_REQ_FILL_RXHASH	0x1
+
 struct tpacket_hdr {
 	unsigned long	tp_status;
 	unsigned int	tp_len;
@@ -111,11 +126,106 @@ struct tpacket2_hdr {
 	__u16		tp_padding;
 };
 
+struct tpacket3_hdr {
+	__u32		tp_status;
+	__u32		tp_next_offset;
+	__u32		tp_len;
+	__u32		tp_snaplen;
+	__u16		tp_mac;
+	__u16		tp_net;
+	__u32		tp_sec;
+	__u32		tp_nsec;
+	__u32		tp_rxhash;
+	__u16		tp_vlan_tci;
+	__u16		tp_padding;
+	__u32		tp_next_offset;
+};
+
+struct bd_ts {
+	unsigned int ts_sec;
+	union {
+		unsigned int ts_usec;
+		unsigned int ts_nsec;
+	};
+};
+
+struct hdr_v1 {
+	__u32	block_status;
+	__u32	num_pkts;
+	__u32	offset_to_first_pkt;
+
+	/* Number of valid bytes (including padding)
+	 * blk_len <= tp_block_size
+	 */
+	__u32	blk_len;
+
+	/*
+	 * Quite a few uses of sequence number:
+	 * 1. Make sure cache flush etc worked.
+	 *    Well, one can argue - why not use the increasing ts below?
+	 *    But look at 2. below first.
+	 * 2. When you pass around blocks to other user space decoders,
+	 *    you can see which blk[s] is[are] outstanding etc.
+	 * 3. Validate kernel code.
+	 */
+	aligned_u64	seq_num;
+
+	/*
+	 * ts_last_pkt:
+	 *
+	 * Case 1.	Block has 'N'(N >=1) packets and TMO'd(timed out)
+	 *		ts_last_pkt == 'time-stamp of last packet' and NOT the
+	 *		time when the timer fired and the block was closed.
+	 *		By providing the ts of the last packet we can absolutely
+	 *		guarantee that time-stamp wise, the first packet in the next
+	 *		block will never precede the last packet of the previous
+	 *		block.
+	 * Case 2.	Block has zero packets and TMO'd
+	 *		ts_last_pkt = time when the timer fired and the block
+	 *		was closed.
+	 * Case 3.	Block has 'N' packets and NO TMO.
+	 *		ts_last_pkt = time-stamp of the last pkt in the block.
+	 *
+	 * ts_first_pkt:
+	 *		Is always the time-stamp when the block was opened.
+	 *		Case a)	ZERO packets
+	 *			No packets to deal with but atleast you know the
+	 *			time-interval of this block.
+	 *		Case b) Non-zero packets
+	 *			Use the ts of the first packet in the block.
+	 *
+	 */
+	struct bd_ts	ts_first_pkt, ts_last_pkt;
+};
+
+union bd_header_u {
+	struct hdr_v1 h1;
+};
+
+struct block_desc {
+	__u16 version;
+	__u16 offset_to_priv;
+	__u32 rsvd1;
+	union bd_header_u hdr;
+};
+
+
+
 #define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
+#define TPACKET3_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
+
+#define BLOCK_STATUS(x)	((x)->hdr.h1.block_status)
+#define BLOCK_NUM_PKTS(x)	((x)->hdr.h1.num_pkts)
+#define BLOCK_O2FP(x)		((x)->hdr.h1.offset_to_first_pkt)
+#define BLOCK_LEN(x)		((x)->hdr.h1.blk_len)
+#define BLOCK_SNUM(x)		((x)->hdr.h1.seq_num)
+#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)
+#define BLOCK_PRIV(x)		((void *)((char *)(x) + BLOCK_O2PRIV(x)))
 
 enum tpacket_versions {
 	TPACKET_V1,
 	TPACKET_V2,
+	TPACKET_V3,
 };
 
 /*
@@ -138,6 +248,21 @@ struct tpacket_req {
 	unsigned int	tp_frame_nr;	/* Total number of frames */
 };
 
+struct tpacket_req3 {
+	unsigned int	tp_block_size;	/* Minimal size of contiguous block */
+	unsigned int	tp_block_nr;	/* Number of blocks */
+	unsigned int	tp_frame_size;	/* Size of frame */
+	unsigned int	tp_frame_nr;	/* Total number of frames */
+	unsigned int	tp_retire_blk_tov; /* timeout in msecs */
+	unsigned int	tp_sizeof_priv; /* offset to private data area */
+	unsigned int	tp_feature_req_word;
+};
+
+union tpacket_req_u {
+	struct tpacket_req	req;
+	struct tpacket_req3	req3;
+};
+
 struct packet_mreq {
 	int		mr_ifindex;
 	unsigned short	mr_type;
-- 
1.7.5.2


^ permalink raw reply related

* [PATCH net-next v3 af-packet 2/2] Enhance af-packet to provide (near zero)lossless packet capture functionality.
From: Chetan Loke @ 2011-07-18  3:26 UTC (permalink / raw)
  To: davem, netdev; +Cc: Chetan Loke
In-Reply-To: <1310959610-1688-1-git-send-email-loke.chetan@gmail.com>

1) Blocks can now be configured with non-static frame format.
   Non-static frame format provides following benefits:
   1.1) Increases packet density by a factor of 2x.
   1.2) Ability to capture entire packet.
   1.3) Captures 99% 64-byte traffic as seen by the kernel.
2) Read/poll is now at a block-level rather than at packet level.
3) Added user-configurable timeout knob for timing out blocks on slow/bursty links.
4) Block level processing now allows monitoring multiple links as a single
   logical pipe.

Changes:
C1) tpacket_rcv()
    C1.1) packet_current_frame() is replaced by packet_current_rx_frame()
          The bulk of the processing is then moved in the following chain:
          packet_current_rx_frame()
            __packet_lookup_frame_in_block
              fill_curr_block()
              or
                retire_current_block
                dispatch_next_block
              or
              return NULL(queue is plugged/paused)

Signed-off-by: Chetan Loke <loke.chetan@gmail.com>
---
 net/packet/af_packet.c |  920 +++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 874 insertions(+), 46 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c698cec..a6df380 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -40,6 +40,10 @@
  *					byte arrays at the end of sockaddr_ll
  *					and packet_mreq.
  *		Johann Baudy	:	Added TX RING.
+ *		Chetan Loke	:	Implemented TPACKET_V3 block abstraction
+ *					layer.
+ *					Copyright (C) 2011, <lokec@ccs.neu.edu>
+ *
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -161,9 +165,62 @@ struct packet_mreq_max {
 	unsigned char	mr_address[MAX_ADDR_LEN];
 };
 
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		int closing, int tx_ring);
 
+
+#define V3_ALIGNMENT	(8)
+#define ALIGN_V3(x)	(((x)+V3_ALIGNMENT-1)&~(V3_ALIGNMENT-1))
+
+#define BLK_HDR_LEN	(ALIGN_V3(sizeof(struct block_desc)))
+
+#define BLK_PLUS_PRIV(sz_of_priv) \
+	(BLK_HDR_LEN + ALIGN_V3((sz_of_priv)))
+
+struct kbdq_ft_ops {
+	int num_ops;
+	int (*fn_ptrs[1])(void *, void *);
+};
+
+/* kbdq - kernel block descriptor queue */
+struct kbdq_core {
+	struct pgv	*pkbdq;
+	unsigned int	hdrlen;
+	unsigned char	reset_pending_on_curr_blk;
+	unsigned char   delete_blk_timer;
+	unsigned short	kactive_blk_num;
+	unsigned short	blk_sizeof_priv;
+
+	/* last_kactive_blk_num:
+	 * trick to see if user-space has caught up
+	 * in order to avoid refreshing timer when every single pkt arrives.
+	 */
+	unsigned short	last_kactive_blk_num;
+
+	char		*pkblk_start;
+	char		*pkblk_end;
+	int		kblk_size;
+	unsigned int	knum_blocks;
+	uint64_t	knxt_seq_num;
+	char		*prev;
+	char		*nxt_offset;
+	struct sk_buff	*skb;
+	struct kbdq_ft_ops kfops;
+
+	atomic_t	blk_fill_in_prog;
+
+	/* Default is set to 8ms */
+#define DEFAULT_PRB_RETIRE_TOV	(8)
+
+	unsigned short  retire_blk_tov;
+	unsigned short  version;
+	unsigned long	tov_in_jiffies;
+
+	/* timer to retire an outstanding block */
+	struct timer_list retire_blk_timer;
+};
+
+#define PGV_FROM_VMALLOC 1
 struct pgv {
 	char *buffer;
 };
@@ -179,12 +236,31 @@ struct packet_ring_buffer {
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
 
+	struct kbdq_core	prb_bdqc;
 	atomic_t		pending;
 };
 
 struct packet_sock;
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
 
+static void *packet_previous_frame(struct packet_sock *po,
+		struct packet_ring_buffer *rb,
+		int status);
+static void packet_increment_head(struct packet_ring_buffer *buff);
+static int prb_curr_blk_in_use(struct kbdq_core *,
+			struct block_desc *);
+static void *prb_dispatch_next_block(struct kbdq_core *,
+			struct packet_sock *);
+static void prb_retire_current_block(struct kbdq_core *,
+		struct packet_sock *, unsigned int status);
+static int prb_queue_frozen(struct kbdq_core *);
+static void prb_open_block(struct kbdq_core *, struct block_desc *);
+static void prb_retire_rx_blk_timer_expired(unsigned long);
+static void _prb_refresh_rx_retire_blk_timer(struct kbdq_core *);
+static void prb_init_blk_timer(struct packet_sock *, struct kbdq_core *,
+				void (*func) (unsigned long));
+static void prb_fill_rxhash(struct kbdq_core *,
+			struct tpacket3_hdr *);
 static void packet_flush_mclist(struct sock *sk);
 
 struct packet_fanout;
@@ -193,6 +269,7 @@ struct packet_sock {
 	struct sock		sk;
 	struct packet_fanout	*fanout;
 	struct tpacket_stats	stats;
+	union  tpacket_stats_u	stats_u;
 	struct packet_ring_buffer	rx_ring;
 	struct packet_ring_buffer	tx_ring;
 	int			copy_thresh;
@@ -242,6 +319,15 @@ struct packet_skb_cb {
 
 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
 
+#define GET_PBDQC_FROM_RB(x)	((struct kbdq_core *)(&(x)->prb_bdqc))
+#define GET_PBLOCK_DESC(x, bid)	\
+	((struct block_desc *)((x)->pkbdq[(bid)].buffer))
+#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
+	((struct block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
+#define GET_NEXT_PRB_BLK_NUM(x) \
+	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
+	((x)->kactive_blk_num+1) : 0)
+
 static inline struct packet_sock *pkt_sk(struct sock *sk)
 {
 	return (struct packet_sock *)sk;
@@ -325,8 +411,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 		h.h2->tp_status = status;
 		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 		break;
+	case TPACKET_V3:
 	default:
-		pr_err("TPACKET version not supported\n");
+		WARN(1, "TPACKET version not supported.\n");
 		BUG();
 	}
 
@@ -351,8 +438,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
 	case TPACKET_V2:
 		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 		return h.h2->tp_status;
+	case TPACKET_V3:
 	default:
-		pr_err("TPACKET version not supported\n");
+		WARN(1, "TPACKET version not supported.\n");
 		BUG();
 		return 0;
 	}
@@ -389,6 +477,644 @@ static inline void *packet_current_frame(struct packet_sock *po,
 	return packet_lookup_frame(po, rb, rb->head, status);
 }
 
+static void prb_del_retire_blk_timer(struct kbdq_core *pkc)
+{
+	del_timer_sync(&pkc->retire_blk_timer);
+}
+
+static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
+		int tx_ring,
+		struct sk_buff_head *rb_queue)
+{
+	struct kbdq_core *pkc;
+
+	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
+
+	spin_lock(&rb_queue->lock);
+	pkc->delete_blk_timer = 1;
+	spin_unlock(&rb_queue->lock);
+
+	prb_del_retire_blk_timer(pkc);
+}
+
+static void prb_init_blk_timer(struct packet_sock *po,
+		struct kbdq_core *pkc,
+		void (*func) (unsigned long))
+{
+	init_timer(&pkc->retire_blk_timer);
+	pkc->retire_blk_timer.data = (long)po;
+	pkc->retire_blk_timer.function = func;
+	pkc->retire_blk_timer.expires = jiffies;
+}
+
+static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
+{
+	struct kbdq_core *pkc;
+
+	if (tx_ring)
+		BUG();
+
+	pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
+	prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
+}
+
+static int prb_calc_retire_blk_tmo(struct packet_sock *po,
+				int blk_size_in_bytes)
+{
+	struct net_device *dev;
+	unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+
+	dev = dev_get_by_index(sock_net(&po->sk), po->ifindex);
+	if (unlikely(dev == NULL))
+		return DEFAULT_PRB_RETIRE_TOV;
+
+	if (dev->ethtool_ops && dev->ethtool_ops->get_settings) {
+		struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, };
+
+		if (!dev->ethtool_ops->get_settings(dev, &ecmd)) {
+			switch (ecmd.speed) {
+			case SPEED_10000:
+				msec = 1;
+				div = 10000/1000;
+				break;
+			case SPEED_1000:
+				msec = 1;
+				div = 1000/1000;
+				break;
+			/*
+			 * If the link speed is so slow you don't really
+			 * need to worry about perf anyways
+			 */
+			case SPEED_100:
+			case SPEED_10:
+			default:
+				return DEFAULT_PRB_RETIRE_TOV;
+			}
+		}
+	}
+
+	mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
+
+	if (div)
+		mbits /= div;
+
+	tmo = mbits * msec;
+
+	if (div)
+		return tmo+1;
+	return tmo;
+}
+
+static void init_prb_bdqc(struct packet_sock *po,
+			struct packet_ring_buffer *rb,
+			struct pgv *pg_vec,
+			union tpacket_req_u *req_u, int tx_ring)
+{
+	struct kbdq_core *p1 = &rb->prb_bdqc;
+	struct block_desc *pbd;
+
+	memset(p1, 0x0, sizeof(*p1));
+
+	if (req_u->req3.tp_feature_req_word) {
+		if (req_u->req3.tp_feature_req_word & TP_FT_REQ_FILL_RXHASH) {
+			p1->kfops.fn_ptrs[p1->kfops.num_ops] = prb_fill_rxhash;
+			p1->kfops.num_ops += 1;
+		}
+	}
+
+	p1->knxt_seq_num = 1;
+	p1->pkbdq = pg_vec;
+	pbd = (struct block_desc *)pg_vec[0].buffer;
+	p1->pkblk_start	= (char *)pg_vec[0].buffer;
+	p1->kblk_size = req_u->req3.tp_block_size;
+	p1->knum_blocks	= req_u->req3.tp_block_nr;
+	p1->hdrlen = po->tp_hdrlen;
+	p1->version = po->tp_version;
+	p1->last_kactive_blk_num = 0;
+	po->stats_u.stats3.tp_freeze_q_cnt = 0;
+	if (req_u->req3.tp_retire_blk_tov)
+		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
+	else
+		p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
+						req_u->req3.tp_block_size);
+	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
+	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+	prb_setup_retire_blk_timer(po, tx_ring);
+	prb_open_block(p1, pbd);
+}
+
+/*  Do NOT update the last_blk_num first.
+ *  Assumes sk_buff_head lock is held.
+ */
+static void _prb_refresh_rx_retire_blk_timer(struct kbdq_core *pkc)
+{
+	mod_timer(&pkc->retire_blk_timer,
+			jiffies + pkc->tov_in_jiffies);
+	pkc->last_kactive_blk_num = pkc->kactive_blk_num;
+}
+
+/*
+ * Timer logic:
+ * 1) We refresh the timer only when we open a block.
+ *    By doing this we don't waste cycles refreshing the timer
+ *	  on packet-by-packet basis.
+ *
+ * With a 1MB block-size, on a 1Gbps line, it will take
+ * i) ~8 ms to fill a block + ii) memcpy etc.
+ * In this cut we are not accounting for the memcpy time.
+ *
+ * So, if the user sets the 'tmo' to 10ms then the timer
+ * will never fire while the block is still getting filled
+ * (which is what we want). However, the user could choose
+ * to close a block early and that's fine.
+ *
+ * But when the timer does fire, we check whether or not to refresh it.
+ * Since the tmo granularity is in msecs, it is not too expensive
+ * to refresh the timer, lets say every '8' msecs.
+ * Either the user can set the 'tmo' or we can derive it based on
+ * a) line-speed and b) block-size.
+ * prb_calc_retire_blk_tmo() calculates the tmo.
+ *
+ */
+static void prb_retire_rx_blk_timer_expired(unsigned long data)
+{
+	struct packet_sock *po = (struct packet_sock *)data;
+	struct kbdq_core *pkc = &po->rx_ring.prb_bdqc;
+	unsigned int frozen;
+	struct block_desc *pbd;
+
+	spin_lock(&po->sk.sk_receive_queue.lock);
+
+	frozen = prb_queue_frozen(pkc);
+	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+	if (unlikely(pkc->delete_blk_timer))
+		goto out;
+
+	/* We only need to plug the race when the block is partially filled.
+	 * tpacket_rcv:
+	 *		lock(); increment BLOCK_NUM_PKTS; unlock()
+	 *		copy_bits() is in progress ...
+	 *		timer fires on other cpu:
+	 *		we can't retire the current block because copy_bits
+	 *		is in progress.
+	 *
+	 */
+	if (BLOCK_NUM_PKTS(pbd)) {
+		while (atomic_read(&pkc->blk_fill_in_prog)) {
+			/* Waiting for skb_copy_bits to finish... */
+			cpu_relax();
+		}
+	}
+
+	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
+		if (!frozen) {
+			prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
+			if (!prb_dispatch_next_block(pkc, po))
+				goto refresh_timer;
+			else
+				goto out;
+		} else {
+			/* Case 1. Queue was frozen because user-space was
+			 *	   lagging behind.
+			 */
+			if (prb_curr_blk_in_use(pkc, pbd)) {
+				/*
+				 * Ok, user-space is still behind.
+				 * So just refresh the timer.
+				 */
+				goto refresh_timer;
+			} else {
+			       /* Case 2. queue was frozen,user-space caught up,
+				* now the link went idle && the timer fired.
+				* We don't have a block to close.So we open this
+				* block and restart the timer.
+				* opening a block thaws the queue,restarts timer
+				* Thawing/timer-refresh is a side effect.
+				*/
+				prb_open_block(pkc, pbd);
+				goto out;
+			}
+		}
+	}
+
+refresh_timer:
+	_prb_refresh_rx_retire_blk_timer(pkc);
+
+out:
+	spin_unlock(&po->sk.sk_receive_queue.lock);
+}
+
+static inline void prb_flush_block(struct kbdq_core *pkc1,
+		struct block_desc *pbd1, __u32 status)
+{
+	/* Flush everything minus the block header */
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+	u8 *start, *end;
+
+	start = (u8 *)pbd1;
+
+	/* Skip the block header(we know header WILL fit in 4K) */
+	start += PAGE_SIZE;
+
+	end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
+	for (; start < end; start += PAGE_SIZE)
+		flush_dcache_page(pgv_to_page(start));
+
+	smp_wmb();
+#endif
+
+	/* Now update the block status. */
+
+	BLOCK_STATUS(pbd1) = status;
+
+	/* Flush the block header */
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+	start = (u8 *)pbd1;
+	flush_dcache_page(pgv_to_page(start));
+
+	smp_wmb();
+#endif
+}
+
+/*
+ * Side effect:
+ *
+ * 1) flush the block
+ * 2) Increment active_blk_num
+ *
+ * Note:We DONT refresh the timer on purpose.
+ *	Because almost always the next block will be opened.
+ */
+static void prb_close_block(struct kbdq_core *pkc1, struct block_desc *pbd1,
+		struct packet_sock *po, unsigned int stat)
+{
+	__u32 status = TP_STATUS_USER | stat;
+
+	struct tpacket3_hdr *last_pkt;
+	struct hdr_v1 *h1 = &pbd1->hdr.h1;
+
+	if (po->stats.tp_drops)
+		status |= TP_STATUS_LOSING;
+
+	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
+	last_pkt->tp_next_offset = 0;
+
+	/* Get the ts of the last pkt */
+	if (BLOCK_NUM_PKTS(pbd1)) {
+		h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
+		h1->ts_last_pkt.ts_nsec	= last_pkt->tp_nsec;
+	} else {
+		/* Ok, we tmo'd - so get the current time */
+		struct timespec ts;
+		getnstimeofday(&ts);
+		h1->ts_last_pkt.ts_sec = ts.tv_sec;
+		h1->ts_last_pkt.ts_nsec	= ts.tv_nsec;
+	}
+
+	smp_wmb();
+
+	/* Flush the block */
+	prb_flush_block(pkc1, pbd1, status);
+
+	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
+}
+
+static inline void prb_thaw_queue(struct kbdq_core *pkc)
+{
+	pkc->reset_pending_on_curr_blk = 0;
+}
+
+/*
+ * Side effect of opening a block:
+ *
+ * 1) prb_queue is thawed.
+ * 2) retire_blk_timer is refreshed.
+ *
+ */
+static void prb_open_block(struct kbdq_core *pkc1, struct block_desc *pbd1)
+{
+	struct timespec ts;
+	struct hdr_v1 *h1 = &pbd1->hdr.h1;
+
+	smp_rmb();
+
+	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
+
+		/* We could have just memset this but we will lose the
+		 * flexibility of making the priv area sticky
+		 */
+		BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
+		BLOCK_NUM_PKTS(pbd1) = 0;
+		BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+		getnstimeofday(&ts);
+		h1->ts_first_pkt.ts_sec = ts.tv_sec;
+		h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+		pkc1->pkblk_start = (char *)pbd1;
+		pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
+		BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
+		BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+		BLOCK_O2PRIV(pbd1) = (__u16)BLK_HDR_LEN;
+		pbd1->version = pkc1->version;
+		pkc1->prev = pkc1->nxt_offset;
+		pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
+		prb_thaw_queue(pkc1);
+		_prb_refresh_rx_retire_blk_timer(pkc1);
+
+		smp_wmb();
+
+		return;
+	}
+
+	WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
+		pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
+	dump_stack();
+	BUG();
+}
+
+/*
+ * Queue freeze logic:
+ * 1) Assume tp_block_nr = 8 blocks.
+ * 2) At time 't0', user opens Rx ring.
+ * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
+ * 4) user-space is either sleeping or processing block '0'.
+ * 5) tpacket_rcv is currently filling block '7', since there is no space left,
+ *    it will close block-7,loop around and try to fill block '0'.
+ *    call-flow:
+ *    __packet_lookup_frame_in_block
+ *      prb_retire_current_block()
+ *      prb_dispatch_next_block()
+ *        |->(BLOCK_STATUS == USER) evaluates to true
+ *    5.1) Since block-0 is currently in-use, we just freeze the queue.
+ * 6) Now there are two cases:
+ *    6.1) Link goes idle right after the queue is frozen.
+ *         But remember, the last open_block() refreshed the timer.
+ *         When this timer expires,it will refresh itself so that we can
+ *         re-open block-0 in near future.
+ *    6.2) Link is busy and keeps on receiving packets. This is a simple
+ *         case and __packet_lookup_frame_in_block will check if block-0
+ *         is free and can now be re-used.
+ */
+static inline void prb_freeze_queue(struct kbdq_core *pkc,
+				  struct packet_sock *po)
+{
+	pkc->reset_pending_on_curr_blk = 1;
+	po->stats_u.stats3.tp_freeze_q_cnt++;
+}
+
+#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN_V3((length)))
+
+/*
+ * If the next block is free then we will dispatch it
+ * and return a good offset.
+ * Else, we will freeze the queue.
+ * So, caller must check the return value.
+ */
+static void *prb_dispatch_next_block(struct kbdq_core *pkc,
+		struct packet_sock *po)
+{
+	struct block_desc *pbd;
+
+	smp_rmb();
+
+	/* 1. Get current block num */
+	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+	/* 2. If this block is currently in_use then freeze the queue */
+	if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
+		prb_freeze_queue(pkc, po);
+		return NULL;
+	}
+
+	/*
+	 * 3.
+	 * open this block and return the offset where the first packet
+	 * needs to get stored.
+	 */
+	prb_open_block(pkc, pbd);
+	return (void *)pkc->nxt_offset;
+}
+
+static void prb_retire_current_block(struct kbdq_core *pkc,
+		struct packet_sock *po, unsigned int status)
+{
+	struct block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+	/* retire/close the current block */
+	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
+		/*
+		 * Plug the case where copy_bits() is in progress on
+		 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
+		 * have space to copy the pkt in the current block and
+		 * called prb_retire_current_block()
+		 *
+		 * We don't need to worry about the TMO case because
+		 * the timer-handler already handled this case.
+		 */
+		if (!(status & TP_STATUS_BLK_TMO)) {
+			while (atomic_read(&pkc->blk_fill_in_prog)) {
+				/* Waiting for skb_copy_bits to finish... */
+				cpu_relax();
+			}
+		}
+		prb_close_block(pkc, pbd, po, status);
+		return;
+	}
+
+	WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
+	dump_stack();
+	BUG();
+}
+
+static inline int prb_curr_blk_in_use(struct kbdq_core *pkc,
+				      struct block_desc *pbd)
+{
+	return TP_STATUS_USER & BLOCK_STATUS(pbd);
+}
+
+static inline int prb_queue_frozen(struct kbdq_core *pkc)
+{
+	return pkc->reset_pending_on_curr_blk;
+}
+
+static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
+{
+	struct kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
+	atomic_dec(&pkc->blk_fill_in_prog);
+}
+
+static void prb_fill_rxhash(struct kbdq_core *pkc,
+			struct tpacket3_hdr *ppd)
+{
+	ppd->tp_rxhash = skb_get_rxhash(pkc->skb);
+}
+
+static void prb_run_all_ft_ops(struct kbdq_core *pkc,
+			struct tpacket3_hdr *ppd)
+{
+	int ops;
+	for (ops = 0; ops < pkc->kfops.num_ops; ops++)
+		pkc->kfops.fn_ptrs[ops](pkc, ppd);
+}
+
+static inline void prb_fill_curr_block(char *curr, struct kbdq_core *pkc,
+				struct block_desc *pbd,
+				unsigned int len)
+{
+	struct tpacket3_hdr *ppd;
+
+	ppd  = (struct tpacket3_hdr *)curr;
+	ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
+	pkc->prev = curr;
+	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
+	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
+	BLOCK_NUM_PKTS(pbd) += 1;
+	atomic_inc(&pkc->blk_fill_in_prog);
+	prb_run_all_ft_ops(pkc, ppd);
+}
+
+/* Assumes caller has the sk->rx_queue.lock */
+static void *__packet_lookup_frame_in_block(struct packet_sock *po,
+					    struct sk_buff *skb,
+						int status,
+					    unsigned int len
+					    )
+{
+	struct kbdq_core *pkc;
+	struct block_desc *pbd;
+	char *curr, *end;
+
+	pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring));
+	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+	pkc->skb = skb;
+
+	/* Queue is frozen when user space is lagging behind */
+	if (prb_queue_frozen(pkc)) {
+		/*
+		 * Check if that last block which caused the queue to freeze,
+		 * is still in_use by user-space.
+		 */
+		if (prb_curr_blk_in_use(pkc, pbd)) {
+			/* Can't record this packet */
+			return NULL;
+		} else {
+			/*
+			 * Ok, the block was released by user-space.
+			 * Now let's open that block.
+			 * opening a block also thaws the queue.
+			 * Thawing is a side effect.
+			 */
+			prb_open_block(pkc, pbd);
+		}
+	}
+
+	smp_mb();
+	curr = pkc->nxt_offset;
+	end = (char *) ((char *)pbd + pkc->kblk_size);
+
+	/* first try the current block */
+	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
+		prb_fill_curr_block(curr, pkc, pbd, len);
+		return (void *)curr;
+	}
+
+	/* Ok, close the current block */
+	prb_retire_current_block(pkc, po, 0);
+
+	/* Now, try to dispatch the next block */
+	curr = (char *)prb_dispatch_next_block(pkc, po);
+	if (curr) {
+		pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+		prb_fill_curr_block(curr, pkc, pbd, len);
+		return (void *)curr;
+	}
+
+	/*
+	 * No free blocks are available.user_space hasn't caught up yet.
+	 * Queue was just frozen and now this packet will get dropped.
+	 */
+	return NULL;
+}
+
+static inline void *packet_current_rx_frame(struct packet_sock *po,
+					    struct sk_buff *skb,
+					    int status, unsigned int len)
+{
+	char *curr = NULL;
+	switch (po->tp_version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		curr = packet_lookup_frame(po, &po->rx_ring,
+					po->rx_ring.head, status);
+		return curr;
+	case TPACKET_V3:
+		return __packet_lookup_frame_in_block(po, skb, status, len);
+	default:
+		WARN(1, "TPACKET version not supported\n");
+		BUG();
+		return 0;
+	}
+}
+
+static inline void *prb_lookup_block(struct packet_sock *po,
+				     struct packet_ring_buffer *rb,
+				     unsigned int previous,
+				     int status)
+{
+	struct kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
+	struct block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
+
+	if (status != BLOCK_STATUS(pbd))
+		return NULL;
+	return pbd;
+}
+
+static inline int prb_previous_blk_num(struct packet_ring_buffer *rb)
+{
+	unsigned int prev;
+	if (rb->prb_bdqc.kactive_blk_num)
+		prev = rb->prb_bdqc.kactive_blk_num-1;
+	else
+		prev = rb->prb_bdqc.knum_blocks-1;
+	return prev;
+}
+
+/* Assumes caller has held the rx_queue.lock */
+static inline void *__prb_previous_block(struct packet_sock *po,
+					 struct packet_ring_buffer *rb,
+					 int status)
+{
+	unsigned int previous = prb_previous_blk_num(rb);
+	return prb_lookup_block(po, rb, previous, status);
+}
+
+static inline void *packet_previous_rx_frame(struct packet_sock *po,
+					     struct packet_ring_buffer *rb,
+					     int status)
+{
+	if (po->tp_version <= TPACKET_V2)
+		return packet_previous_frame(po, rb, status);
+
+	return __prb_previous_block(po, rb, status);
+}
+
+static inline void packet_increment_rx_head(struct packet_sock *po,
+					    struct packet_ring_buffer *rb)
+{
+	switch (po->tp_version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		return packet_increment_head(rb);
+	case TPACKET_V3:
+	default:
+		WARN(1, "TPACKET version not supported.\n");
+		BUG();
+		return;
+	}
+}
+
 static inline void *packet_previous_frame(struct packet_sock *po,
 		struct packet_ring_buffer *rb,
 		int status)
@@ -982,12 +1708,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	union {
 		struct tpacket_hdr *h1;
 		struct tpacket2_hdr *h2;
+		struct tpacket3_hdr *h3;
 		void *raw;
 	} h;
 	u8 *skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
-	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
+	unsigned long status = TP_STATUS_USER;
 	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
@@ -1033,37 +1760,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			po->tp_reserve;
 		macoff = netoff - maclen;
 	}
-
-	if (macoff + snaplen > po->rx_ring.frame_size) {
-		if (po->copy_thresh &&
-		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
-		    (unsigned)sk->sk_rcvbuf) {
-			if (skb_shared(skb)) {
-				copy_skb = skb_clone(skb, GFP_ATOMIC);
-			} else {
-				copy_skb = skb_get(skb);
-				skb_head = skb->data;
+	if (po->tp_version <= TPACKET_V2) {
+		if (macoff + snaplen > po->rx_ring.frame_size) {
+			if (po->copy_thresh &&
+				atomic_read(&sk->sk_rmem_alloc) + skb->truesize
+				< (unsigned)sk->sk_rcvbuf) {
+				if (skb_shared(skb)) {
+					copy_skb = skb_clone(skb, GFP_ATOMIC);
+				} else {
+					copy_skb = skb_get(skb);
+					skb_head = skb->data;
+				}
+				if (copy_skb)
+					skb_set_owner_r(copy_skb, sk);
 			}
-			if (copy_skb)
-				skb_set_owner_r(copy_skb, sk);
+			snaplen = po->rx_ring.frame_size - macoff;
+			if ((int)snaplen < 0)
+				snaplen = 0;
 		}
-		snaplen = po->rx_ring.frame_size - macoff;
-		if ((int)snaplen < 0)
-			snaplen = 0;
 	}
-
 	spin_lock(&sk->sk_receive_queue.lock);
-	h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
+	h.raw = packet_current_rx_frame(po, skb,
+					TP_STATUS_KERNEL, (macoff+snaplen));
 	if (!h.raw)
 		goto ring_is_full;
-	packet_increment_head(&po->rx_ring);
+	if (po->tp_version <= TPACKET_V2) {
+		packet_increment_rx_head(po, &po->rx_ring);
+	/*
+	 * LOSING will be reported till you read the stats,
+	 * because it's COR - Clear On Read.
+	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
+	 * at packet level.
+	 */
+		if (po->stats.tp_drops)
+			status |= TP_STATUS_LOSING;
+	}
 	po->stats.tp_packets++;
 	if (copy_skb) {
 		status |= TP_STATUS_COPY;
 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
 	}
-	if (!po->stats.tp_drops)
-		status &= ~TP_STATUS_LOSING;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
@@ -1114,6 +1850,36 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_padding = 0;
 		hdrlen = sizeof(*h.h2);
 		break;
+	case TPACKET_V3:
+		/* tp_nxt_offset is already populated above.
+		 * So DONT clear those fields here
+		 */
+		h.h3->tp_status = status;
+		h.h3->tp_len = skb->len;
+		h.h3->tp_snaplen = snaplen;
+		h.h3->tp_mac = macoff;
+		h.h3->tp_net = netoff;
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
+			ts = ktime_to_timespec(skb->tstamp);
+		else
+			getnstimeofday(&ts);
+		h.h3->tp_sec  = ts.tv_sec;
+		h.h3->tp_nsec = ts.tv_nsec;
+		if (vlan_tx_tag_present(skb)) {
+			h.h3->tp_vlan_tci = vlan_tx_tag_get(skb);
+			h.h3->tp_status |= TP_STATUS_VLAN_VALID;
+		} else {
+			h.h3->tp_vlan_tci = 0;
+		}
+		h.h3->tp_padding = 0;
+		hdrlen = sizeof(*h.h3);
+		break;
 	default:
 		BUG();
 	}
@@ -1134,13 +1900,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	{
 		u8 *start, *end;
 
-		end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
-		for (start = h.raw; start < end; start += PAGE_SIZE)
-			flush_dcache_page(pgv_to_page(start));
+		if (po->tp_version <= TPACKET_V2) {
+			end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
+				+ macoff + snaplen);
+			for (start = h.raw; start < end; start += PAGE_SIZE)
+				flush_dcache_page(pgv_to_page(start));
+		}
 		smp_wmb();
 	}
 #endif
-	__packet_set_status(po, h.raw, status);
+	if (po->tp_version <= TPACKET_V2)
+		__packet_set_status(po, h.raw, status);
+	else
+		prb_clear_blk_fill_status(&po->rx_ring);
 
 	sk->sk_data_ready(sk, 0);
 
@@ -1631,7 +2403,7 @@ static int packet_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct packet_sock *po;
 	struct net *net;
-	struct tpacket_req req;
+	union tpacket_req_u req_u;
 
 	if (!sk)
 		return 0;
@@ -1654,13 +2426,13 @@ static int packet_release(struct socket *sock)
 
 	packet_flush_mclist(sk);
 
-	memset(&req, 0, sizeof(req));
+	memset(&req_u, 0, sizeof(req_u));
 
 	if (po->rx_ring.pg_vec)
-		packet_set_ring(sk, &req, 1, 0);
+		packet_set_ring(sk, &req_u, 1, 0);
 
 	if (po->tx_ring.pg_vec)
-		packet_set_ring(sk, &req, 1, 1);
+		packet_set_ring(sk, &req_u, 1, 1);
 
 	fanout_release(sk);
 
@@ -2280,15 +3052,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 	case PACKET_RX_RING:
 	case PACKET_TX_RING:
 	{
-		struct tpacket_req req;
+		union tpacket_req_u req_u;
+		int len;
 
-		if (optlen < sizeof(req))
+		switch (po->tp_version) {
+		case TPACKET_V1:
+		case TPACKET_V2:
+			len = sizeof(req_u.req);
+			break;
+		case TPACKET_V3:
+		default:
+			len = sizeof(req_u.req3);
+			break;
+		}
+		if (optlen < len)
 			return -EINVAL;
 		if (pkt_sk(sk)->has_vnet_hdr)
 			return -EINVAL;
-		if (copy_from_user(&req, optval, sizeof(req)))
+		if (copy_from_user(&req_u.req, optval, len))
 			return -EFAULT;
-		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
+		return packet_set_ring(sk, &req_u, 0,
+			optname == PACKET_TX_RING);
 	}
 	case PACKET_COPY_THRESH:
 	{
@@ -2315,6 +3099,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		switch (val) {
 		case TPACKET_V1:
 		case TPACKET_V2:
+		case TPACKET_V3:
 			po->tp_version = val;
 			return 0;
 		default:
@@ -2424,6 +3209,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	struct packet_sock *po = pkt_sk(sk);
 	void *data;
 	struct tpacket_stats st;
+	union tpacket_stats_u st_u;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -2436,15 +3222,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 
 	switch (optname) {
 	case PACKET_STATISTICS:
-		if (len > sizeof(struct tpacket_stats))
-			len = sizeof(struct tpacket_stats);
+		if (po->tp_version == TPACKET_V3) {
+			len = sizeof(struct tpacket_stats_v3);
+		} else {
+			if (len > sizeof(struct tpacket_stats))
+				len = sizeof(struct tpacket_stats);
+		}
 		spin_lock_bh(&sk->sk_receive_queue.lock);
-		st = po->stats;
+		if (po->tp_version == TPACKET_V3) {
+			memcpy(&st_u.stats3, &po->stats,
+			sizeof(struct tpacket_stats));
+			st_u.stats3.tp_freeze_q_cnt =
+			po->stats_u.stats3.tp_freeze_q_cnt;
+			st_u.stats3.tp_packets += po->stats.tp_drops;
+			data = &st_u.stats3;
+		} else {
+			st = po->stats;
+			st.tp_packets += st.tp_drops;
+			data = &st;
+		}
 		memset(&po->stats, 0, sizeof(st));
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		st.tp_packets += st.tp_drops;
-
-		data = &st;
 		break;
 	case PACKET_AUXDATA:
 		if (len > sizeof(int))
@@ -2485,6 +3283,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		case TPACKET_V2:
 			val = sizeof(struct tpacket2_hdr);
 			break;
+		case TPACKET_V3:
+			val = sizeof(struct tpacket3_hdr);
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -2641,7 +3442,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
 
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	if (po->rx_ring.pg_vec) {
-		if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
+		if (!packet_previous_rx_frame(po, &po->rx_ring,
+			TP_STATUS_KERNEL))
 			mask |= POLLIN | POLLRDNORM;
 	}
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -2760,7 +3562,7 @@ out_free_pgvec:
 	goto out;
 }
 
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		int closing, int tx_ring)
 {
 	struct pgv *pg_vec = NULL;
@@ -2769,7 +3571,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 	struct packet_ring_buffer *rb;
 	struct sk_buff_head *rb_queue;
 	__be16 num;
-	int err;
+	int err = -EINVAL;
+	/* Added to avoid minimal code churn */
+	struct tpacket_req *req = &req_u->req;
+
+	/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
+	if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
+		WARN(1, "Tx-ring is not supported.\n");
+		goto out;
+	}
 
 	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
 	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -2795,6 +3605,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		case TPACKET_V2:
 			po->tp_hdrlen = TPACKET2_HDRLEN;
 			break;
+		case TPACKET_V3:
+			po->tp_hdrlen = TPACKET3_HDRLEN;
+			break;
 		}
 
 		err = -EINVAL;
@@ -2820,6 +3633,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		pg_vec = alloc_pg_vec(req, order);
 		if (unlikely(!pg_vec))
 			goto out;
+		switch (po->tp_version) {
+		case TPACKET_V3:
+		/* Transmit path is not supported. We checked
+		 * it above but just being paranoid
+		 */
+			if (!tx_ring)
+				init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
+				break;
+		default:
+			break;
+		}
 	}
 	/* Done */
 	else {
@@ -2872,7 +3696,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		register_prot_hook(sk);
 	}
 	spin_unlock(&po->bind_lock);
-
+	if (closing && (po->tp_version > TPACKET_V2)) {
+		/* Because we don't support block-based V3 on tx-ring */
+		if (!tx_ring)
+			prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
+	}
 	release_sock(sk);
 
 	if (pg_vec)
-- 
1.7.5.2


^ permalink raw reply related

* Re: [PATCH net-next v3 af-packet 2/2] Enhance af-packet to provide (near zero)lossless packet capture functionality.
From: Joe Perches @ 2011-07-18  3:46 UTC (permalink / raw)
  To: Chetan Loke; +Cc: davem, netdev
In-Reply-To: <1310959610-1688-3-git-send-email-loke.chetan@gmail.com>

On Sun, 2011-07-17 at 23:26 -0400, Chetan Loke wrote:
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
[]
> +#define V3_ALIGNMENT	(8)
> +#define ALIGN_V3(x)	(((x)+V3_ALIGNMENT-1)&~(V3_ALIGNMENT-1))

Maybe just use the kernel.h macro ALIGN()?

> +#define BLK_HDR_LEN	(ALIGN_V3(sizeof(struct block_desc)))

So this becomes:
#define BLK_HDR_LEN	ALIGN(sizeof(struct block_desc), V3_ALIGNMENT)

^ permalink raw reply

* Re: [PATCH] Fix panic in virtnet_remove
From: Krishna Kumar @ 2011-07-18  3:57 UTC (permalink / raw)
  To: mst; +Cc: netdev, shemminger, davem, Krishna Kumar

"Michael S. Tsirkin" <mst@redhat.com> wrote on 07/17/2011 03:42:15 PM:

> > modprobe -r virtio_net panics in free_netdev() as the
> > dev is already freed in the newly introduced virtnet_free
> > (commit 3fa2a1df9094).
> 
> Good catch, thanks!
> 
> > Since virtnet_remove doesn't require
> > dev after unregister,
> 
> I'm not sure that true: vi used just above the line you remove
> is I think the struct virtnet_info that is allocated
> as part of that structure.

You are right, the dev cannot be freed in the destructor.

> > I am removing the free_netdev call
> > in virtnet_remove instead of in virtnet_free (which seems
> > to be the right place to free the dev). Confirmed that
> > the panic is fixed with this patch.
> 
> This might be just because the memory isn't reused.
> Try enabling slab poisoning, I think you'll observe some problems.
> 
> Do we absolutely have to have a destructor?
> Can't we move the per cpu counter free from
> virtnet_free to virtnet_remove, and get rid of
> virtnet_free completely?

I see some other drivers doing that, e.g. xennet_remove:
	...
	free_percpu(info->stats);
	free_netdev(info->netdev);
	...

How about this patch (compile tested only)?

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 drivers/net/virtio_net.c |   10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff -ruNp org/drivers/net/virtio_net.c new/drivers/net/virtio_net.c
--- org/drivers/net/virtio_net.c	2011-07-18 09:14:02.000000000 +0530
+++ new/drivers/net/virtio_net.c	2011-07-18 09:16:35.000000000 +0530
@@ -705,14 +705,6 @@ static void virtnet_netpoll(struct net_d
 }
 #endif
 
-static void virtnet_free(struct net_device *dev)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-
-	free_percpu(vi->stats);
-	free_netdev(dev);
-}
-
 static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -959,7 +951,6 @@ static int virtnet_probe(struct virtio_d
 	/* Set up network device as normal. */
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
-	dev->destructor = virtnet_free;
 
 	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
 	SET_NETDEV_DEV(dev, &vdev->dev);
@@ -1122,6 +1113,7 @@ static void __devexit virtnet_remove(str
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
 
+	free_percpu(vi->stats);
 	free_netdev(vi->dev);
 }
 

^ permalink raw reply

* linux-next: manual merge of the net tree with Linus' tree
From: Stephen Rothwell @ 2011-07-18  4:41 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Ilia Kolomisnky, Gustavo F. Padovan,
	Luiz Augusto von Dentz

Hi all,

Today's linux-next merge of the net tree got a conflict in
net/bluetooth/l2cap_core.c between commit 9191e6ad897a ("Bluetooth: Fix
regression in L2CAP connection procedure") from Linus' tree and commit
e2fd318e3a92 ("Bluetooth: Fixes l2cap "command reject" reply according to
spec") from the net tree.

I fixed it up (see below) and can carry the fix as necessary.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc net/bluetooth/l2cap_core.c
index 7705e26,f7f8e2c..0000000
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@@ -620,11 -763,10 +763,11 @@@ static void l2cap_conn_start(struct l2c
  					struct sock *parent = bt_sk(sk)->parent;
  					rsp.result = cpu_to_le16(L2CAP_CR_PEND);
  					rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
 -					parent->sk_data_ready(parent, 0);
 +					if (parent)
 +						parent->sk_data_ready(parent, 0);
  
  				} else {
- 					sk->sk_state = BT_CONFIG;
+ 					l2cap_state_change(chan, BT_CONFIG);
  					rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
  					rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
  				}
@@@ -2324,10 -2523,14 +2524,13 @@@ static inline int l2cap_config_req(stru
  
  	sk = chan->sk;
  
- 	if (sk->sk_state != BT_CONFIG && sk->sk_state != BT_CONNECT2) {
- 		struct l2cap_cmd_rej rej;
 -	if ((bt_sk(sk)->defer_setup && chan->state != BT_CONNECT2) ||
 -		 (!bt_sk(sk)->defer_setup && chan->state != BT_CONFIG)) {
++	if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) {
+ 		struct l2cap_cmd_rej_cid rej;
+ 
+ 		rej.reason = cpu_to_le16(L2CAP_REJ_INVALID_CID);
+ 		rej.scid = cpu_to_le16(chan->scid);
+ 		rej.dcid = cpu_to_le16(chan->dcid);
  
- 		rej.reason = cpu_to_le16(0x0002);
  		l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
  				sizeof(rej), &rej);
  		goto unlock;
@@@ -4010,10 -4150,9 +4150,10 @@@ static int l2cap_security_cfm(struct hc
  					struct sock *parent = bt_sk(sk)->parent;
  					res = L2CAP_CR_PEND;
  					stat = L2CAP_CS_AUTHOR_PEND;
 -					parent->sk_data_ready(parent, 0);
 +					if (parent)
 +						parent->sk_data_ready(parent, 0);
  				} else {
- 					sk->sk_state = BT_CONFIG;
+ 					l2cap_state_change(chan, BT_CONFIG);
  					res = L2CAP_CR_SUCCESS;
  					stat = L2CAP_CS_NO_INFO;
  				}

^ permalink raw reply

* Re: [PATCH net-next v3 af-packet 1/2] Enhance af-packet to provide (near zero)lossless packet capture functionality.
From: Eric Dumazet @ 2011-07-18  6:38 UTC (permalink / raw)
  To: Chetan Loke; +Cc: davem, netdev
In-Reply-To: <1310959610-1688-2-git-send-email-loke.chetan@gmail.com>

Le dimanche 17 juillet 2011 à 23:26 -0400, Chetan Loke a écrit :
> Signed-off-by: Chetan Loke <loke.chetan@gmail.com>
> ---
>  include/linux/if_packet.h |  125 +++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 125 insertions(+), 0 deletions(-)

>  
> +struct tpacket3_hdr {
> +	__u32		tp_status;
> +	__u32		tp_next_offset;
> +	__u32		tp_len;
> +	__u32		tp_snaplen;
> +	__u16		tp_mac;
> +	__u16		tp_net;
> +	__u32		tp_sec;
> +	__u32		tp_nsec;
> +	__u32		tp_rxhash;
> +	__u16		tp_vlan_tci;
> +	__u16		tp_padding;
> +	__u32		tp_next_offset;
> +};

Srange, I see tp_next_offset twice in tpacket3_hdr ?

^ permalink raw reply

* [PATCH 1/2] stmmac: add memory barriers at appropriate places
From: Giuseppe CAVALLARO @ 2011-07-18  6:54 UTC (permalink / raw)
  To: netdev; +Cc: Shiraz Hashim, Giuseppe Cavallaro

From: Shiraz Hashim <shiraz.hashim@st.com>

This patch, provided by ST SPEAr developers,
has fixed a problem raised on ARM CA9 where
happened that the dma_transmission was enabled before
the dma descriptors were properly filled. To guarantee this
data memory barriers have been explicity used in the driver.

Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/stmmac_main.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index e25e44a..f3d16d85 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1045,6 +1045,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 					  len, DMA_TO_DEVICE);
 		priv->tx_skbuff[entry] = NULL;
 		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);
+		wmb();
 		priv->hw->desc->set_tx_owner(desc);
 	}
 
@@ -1056,6 +1057,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (likely(priv->tm->enable))
 		priv->hw->desc->clear_tx_ic(desc);
 #endif
+
+	wmb();
+
 	/* To avoid raise condition */
 	priv->hw->desc->set_tx_owner(first);
 
@@ -1116,6 +1120,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 			}
 			RX_DBG(KERN_INFO "\trefill entry #%d\n", entry);
 		}
+		wmb();
 		priv->hw->desc->set_rx_owner(p + entry);
 	}
 }
-- 
1.7.4.4


^ permalink raw reply related

* [PATCH 2/2] stmmac: Allow SOCs to use Store forward mode eventhough tx_coe is 0. (V2)
From: Giuseppe CAVALLARO @ 2011-07-18  6:54 UTC (permalink / raw)
  To: netdev; +Cc: Srinivas Kandagatla, Giuseppe Cavallaro
In-Reply-To: <1310972049-827-1-git-send-email-peppe.cavallaro@st.com>

From: Srinivas Kandagatla <srinivas.kandagatla@st.com>

This patch adds new field 'force_sf_dma_mode' to plat_stmmacenet_data
struct to allow users to specify if they want to use force store forward
eventhough tx_coe is not available in hw.
without this flag stmmac driver will use cut-thru mode not use
store-forward mode.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/stmmac_main.c |    8 +++++---
 include/linux/stmmac.h           |    1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index f3d16d85..0e0134e 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -557,9 +557,11 @@ static void free_dma_desc_resources(struct stmmac_priv *priv)
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
-	if (likely((priv->plat->tx_coe) && (!priv->no_csum_insertion))) {
-		/* In case of GMAC, SF mode has to be enabled
-		 * to perform the TX COE. This depends on:
+	if (likely(priv->plat->force_sf_dma_mode ||
+		((priv->plat->tx_coe) && (!priv->no_csum_insertion)))) {
+		/*
+		 * In case of GMAC, SF mode can be enabled
+		 * to perform the TX COE in HW. This depends on:
 		 * 1) TX COE if actually supported
 		 * 2) There is no bugged Jumbo frame support
 		 *    that needs to not insert csum in the TDES.
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 9529e49..05d7756 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -40,6 +40,7 @@ struct plat_stmmacenet_data {
 	int tx_coe;
 	int bugged_jumbo;
 	int pmt;
+	int force_sf_dma_mode;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev);
-- 
1.7.4.4


^ permalink raw reply related

* Re: [patch net-next-2.6] vlan: introduce ndo_vlan_[enable/disable]
From: Jiri Pirko @ 2011-07-18  7:13 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, davem, shemminger, eric.dumazet, greearb
In-Reply-To: <CAHXqBFJ94hgCsHF-o8FNQPqpVexy4Z3Z+wY+G4MPDLmuYCx+Jg@mail.gmail.com>

Sun, Jul 17, 2011 at 11:06:57PM CEST, mirqus@gmail.com wrote:
>W dniu 17 lipca 2011 21:44 użytkownik Jiri Pirko <jpirko@redhat.com> napisał:
>> Sun, Jul 17, 2011 at 10:36:04AM CEST, mirqus@gmail.com wrote:
>>>W dniu 17 lipca 2011 09:30 użytkownik Jiri Pirko <jpirko@redhat.com> napisał:
>>>> Sat, Jul 16, 2011 at 04:14:36PM CEST, mirqus@gmail.com wrote:
>>>>>2011/7/16 Jiri Pirko <jpirko@redhat.com>:
>>>>>> Some devices are not able to enable/disable rx/tw vlan accel separately.
>>>>>> they depend on ndo_vlan_rx_register to know if to enable of disable
>>>>>> hw accel. And since ndo_vlan_rx_register is going to die soon,
>>>>>> this must be resolved.
>>>>>>
>>>>>> One solution might be to enable accel on device start every time, even
>>>>>> if there are no vlan up on. But this would change behaviour and might
>>>>>> lead to possible regression (on older devices).
>>>>>[...]
>>>>>
>>>>>Please describe the possible regression. As I see it, there won't be
>>>>>any user visible change of behaviour - network code takes care of
>>>>>reinserting VLAN tag when necessary. If you think that disabling tag
>>>>>stripping is beneficial for cases where no VLANs are configured, it's
>>>>>better to do that in netdev_fix_features() for devices which advertise
>>>>>NETIF_F_HW_VLAN_RX in hw_features.
>>>>
>>>> Well I just wanted to preserve current behaviour which is that in many
>>>> drivers vlan accel is enabled only if some vid is registered upon the
>>>> device and it's disabled again when no vid is registered. I can see
>>>> no way to do this with current code after removing ndo_vlan_rx_register.
>>>>
>>>> I expect unexpected
>>>
>>>:-D
>>>
>>>> ... problems on old cards when vlan accel would be
>>>> enabled all the time, but maybe I'm wrong...
>>>
>>>Device has no way of knowing how the system uses VLAN tags, stripped
>>>or not. Any problems would be driver problems and since you're making
>>>it all use generic code, bugs will hit all drivers simultaneously or
>>>(preferably) won't happen at all.
>>>
>>>> One idea is for device which do not support sepatate rx/tx vlan accel
>>>> enabling/disabling they can probably use ndo_fix_features force to
>>>> enable/disable rx/tx pair together. That would resolve the situation as
>>>> well giving user possibility to turn off vlan accel in case of any issues.
>>>
>>>That is exactly the idea behind ndo_fix_features.
>
>> In netdev_fix_features add check if either one of NETIF_F_HW_VLAN_TX or
>> NETIF_F_HW_VLAN_RX is set and in that case set the other one. Of course
>> this would be done only for devices what do not support separate rx/tx
>> vlan on/off. But how to distinguish? NETIF_F_HW_VLAN_BOTH feature flag?
>
>Not in netdev_fix_features(). This case you describe should be handled
>in driver-specific

Well since the code would be the same in many drivers I was thinking
about putting it in general code...

Anyway, would you please look at following example patch and tell me if
it looks good to you?

diff --git a/drivers/net/atl1c/atl1c.h b/drivers/net/atl1c/atl1c.h
index 0f481b9..ca70e16 100644
--- a/drivers/net/atl1c/atl1c.h
+++ b/drivers/net/atl1c/atl1c.h
@@ -555,7 +555,6 @@ struct atl1c_smb {
 struct atl1c_adapter {
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
-	struct vlan_group   *vlgrp;
 	struct napi_struct  napi;
 	struct atl1c_hw        hw;
 	struct atl1c_hw_stats  hw_stats;
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 1269ba5..ad49ad0 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -411,29 +411,30 @@ static void atl1c_set_multi(struct net_device *netdev)
 	}
 }
 
-static void atl1c_vlan_rx_register(struct net_device *netdev,
-				   struct vlan_group *grp)
+static void __atl1c_vlan_mode(u32 features, u32 *mac_ctrl_data)
+{
+	/* NETIF_F_HW_VLAN_RX is always in same state as NETIF_F_HW_VLAN_TX */
+	if (features & NETIF_F_HW_VLAN_TX) {
+		/* enable VLAN tag insert/strip */
+		*mac_ctrl_data |= MAC_CTRL_RMV_VLAN;
+	} else {
+		/* disable VLAN tag insert/strip */
+		*mac_ctrl_data &= ~MAC_CTRL_RMV_VLAN;
+	}
+}
+
+static void atl1c_vlan_mode(struct net_device *netdev, u32 features)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = adapter->pdev;
 	u32 mac_ctrl_data = 0;
 
 	if (netif_msg_pktdata(adapter))
-		dev_dbg(&pdev->dev, "atl1c_vlan_rx_register\n");
+		dev_dbg(&pdev->dev, "atl1c_vlan_mode\n");
 
 	atl1c_irq_disable(adapter);
-
-	adapter->vlgrp = grp;
 	AT_READ_REG(&adapter->hw, REG_MAC_CTRL, &mac_ctrl_data);
-
-	if (grp) {
-		/* enable VLAN tag insert/strip */
-		mac_ctrl_data |= MAC_CTRL_RMV_VLAN;
-	} else {
-		/* disable VLAN tag insert/strip */
-		mac_ctrl_data &= ~MAC_CTRL_RMV_VLAN;
-	}
-
+	__atl1c_vlan_mode(features, &mac_ctrl_data);
 	AT_WRITE_REG(&adapter->hw, REG_MAC_CTRL, mac_ctrl_data);
 	atl1c_irq_enable(adapter);
 }
@@ -443,9 +444,10 @@ static void atl1c_restore_vlan(struct atl1c_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 
 	if (netif_msg_pktdata(adapter))
-		dev_dbg(&pdev->dev, "atl1c_restore_vlan !");
-	atl1c_vlan_rx_register(adapter->netdev, adapter->vlgrp);
+		dev_dbg(&pdev->dev, "atl1c_restore_vlan\n");
+	atl1c_vlan_mode(adapter->netdev, adapter->netdev->features);
 }
+
 /*
  * atl1c_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -483,12 +485,38 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 
 static u32 atl1c_fix_features(struct net_device *netdev, u32 features)
 {
+	u32 changed = netdev->features ^ features;
+
+	/*
+	 * Since there is no support for separate rx/tx vlan accel
+	 * enable/disable make sure these are always set in pair.
+	 */
+	if ((changed & NETIF_F_HW_VLAN_TX && features & NETIF_F_HW_VLAN_TX) ||
+	    (changed & NETIF_F_HW_VLAN_RX && features & NETIF_F_HW_VLAN_RX))
+		features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+	else
+		features &= ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
+
 	if (netdev->mtu > MAX_TSO_FRAME_SIZE)
 		features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
 
 	return features;
 }
 
+static int atl1c_set_features(struct net_device *netdev, u32 features)
+{
+	u32 changed = netdev->features ^ features;
+
+	/*
+	 * Test for NETIF_F_HW_VLAN_TX as it's paired with NETIF_F_HW_VLAN_RX
+	 * by atl1c_fix_features.
+	 */
+	if (changed & NETIF_F_HW_VLAN_TX)
+		atl1c_vlan_mode(netdev, features);
+
+	return 0;
+}
+
 /*
  * atl1c_change_mtu - Change the Maximum Transfer Unit
  * @netdev: network interface device structure
@@ -1433,8 +1461,7 @@ static void atl1c_setup_mac_ctrl(struct atl1c_adapter *adapter)
 	mac_ctrl_data |= ((hw->preamble_len & MAC_CTRL_PRMLEN_MASK) <<
 			MAC_CTRL_PRMLEN_SHIFT);
 
-	if (adapter->vlgrp)
-		mac_ctrl_data |= MAC_CTRL_RMV_VLAN;
+	__atl1c_vlan_mode(netdev->features, &mac_ctrl_data);
 
 	mac_ctrl_data |= MAC_CTRL_BC_EN;
 	if (netdev->flags & IFF_PROMISC)
@@ -1878,14 +1905,14 @@ rrs_checked:
 		skb_put(skb, length - ETH_FCS_LEN);
 		skb->protocol = eth_type_trans(skb, netdev);
 		atl1c_rx_checksum(adapter, skb, rrs);
-		if (unlikely(adapter->vlgrp) && rrs->word3 & RRS_VLAN_INS) {
+		if (rrs->word3 & RRS_VLAN_INS) {
 			u16 vlan;
 
 			AT_TAG_TO_VLAN(rrs->vlan_tag, vlan);
 			vlan = le16_to_cpu(vlan);
-			vlan_hwaccel_receive_skb(skb, adapter->vlgrp, vlan);
-		} else
-			netif_receive_skb(skb);
+			__vlan_hwaccel_put_tag(skb, vlan);
+		}
+		netif_receive_skb(skb);
 
 		(*work_done)++;
 		count++;
@@ -2507,8 +2534,7 @@ static int atl1c_suspend(struct device *dev)
 		/* clear phy interrupt */
 		atl1c_read_phy_reg(hw, MII_ISR, &mii_intr_status_data);
 		/* Config MAC Ctrl register */
-		if (adapter->vlgrp)
-			mac_ctrl_data |= MAC_CTRL_RMV_VLAN;
+		__atl1c_vlan_mode(netdev->features, &mac_ctrl_data);
 
 		/* magic packet maybe Broadcast&multicast&Unicast frame */
 		if (wufc & AT_WUFC_MAG)
@@ -2581,14 +2607,14 @@ static const struct net_device_ops atl1c_netdev_ops = {
 	.ndo_stop		= atl1c_close,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_start_xmit		= atl1c_xmit_frame,
-	.ndo_set_mac_address 	= atl1c_set_mac_addr,
+	.ndo_set_mac_address	= atl1c_set_mac_addr,
 	.ndo_set_multicast_list = atl1c_set_multi,
 	.ndo_change_mtu		= atl1c_change_mtu,
 	.ndo_fix_features	= atl1c_fix_features,
+	.ndo_set_features	= atl1c_set_features,
 	.ndo_do_ioctl		= atl1c_ioctl,
 	.ndo_tx_timeout		= atl1c_tx_timeout,
 	.ndo_get_stats		= atl1c_get_stats,
-	.ndo_vlan_rx_register	= atl1c_vlan_rx_register,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= atl1c_netpoll,
 #endif
@@ -2608,10 +2634,10 @@ static int atl1c_init_netdev(struct net_device *netdev, struct pci_dev *pdev)
 	netdev->hw_features =	NETIF_F_SG	   |
 				NETIF_F_HW_CSUM	   |
 				NETIF_F_HW_VLAN_TX |
+				NETIF_F_HW_VLAN_RX |
 				NETIF_F_TSO	   |
 				NETIF_F_TSO6;
-	netdev->features =	netdev->hw_features |
-				NETIF_F_HW_VLAN_RX;
+	netdev->features =	netdev->hw_features;
 	return 0;
 }
 
-- 
1.7.6


^ permalink raw reply related

* Re: [PATCHv9] vhost: experimental tx zero-copy support
From: Michael S. Tsirkin @ 2011-07-18  7:16 UTC (permalink / raw)
  To: Jesper Juhl; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <alpine.LNX.2.00.1107172153030.32359@swampdragon.chaosbits.net>

On Sun, Jul 17, 2011 at 10:01:41PM +0200, Jesper Juhl wrote:
> > @@ -28,10 +29,18 @@
> >  
> >  #include "vhost.h"
> >  
> > +static int zcopytx;
> > +module_param(zcopytx, int, 0444);
> 
> Should everyone be able to read this? How about "0440" just to be 
> paranoid? or?

I find it very helpful to have the parameter visible in sysfs.
Given that:

[mst@tuck linux-2.6]$ grep module_param drivers/net/*c|grep [64]44|wc -l
14
[mst@tuck linux-2.6]$ grep module_param drivers/net/*c|grep [64]40|wc -l
0
[mst@tuck linux-2.6]$ grep module_param drivers/net/*c|grep [64]00|wc -l
7

So at least the precedent is against 0440.  What do you think?

-- 
MST

^ permalink raw reply

* [PATCH] net: filter: BPF 'JIT' compiler for PPC64
From: Matt Evans @ 2011-07-18  7:50 UTC (permalink / raw)
  To: netdev, linuxppc-dev

An implementation of a code generator for BPF programs to speed up packet
filtering on PPC64, inspired by Eric Dumazet's x86-64 version.

Filter code is generated as an ABI-compliant function in module_alloc()'d mem
with stackframe & prologue/epilogue generated if required (simple filters don't
need anything more than an li/blr).  The filter's local variables, M[], live in
registers.  Supports all BPF opcodes, although "complicated" loads from negative
packet offsets (e.g. SKF_LL_OFF) are not yet supported.

There are a couple of further optimisations left for future work; many-pass
assembly with branch-reach reduction and a register allocator to push M[]
variables into volatile registers would improve the code quality further.

This currently supports big-endian 64-bit PowerPC only (but is fairly simple
to port to PPC32 or LE!).

Enabled in the same way as x86-64:

	echo 1 > /proc/sys/net/core/bpf_jit_enable

Or, enabled with extra debug output:

	echo 2 > /proc/sys/net/core/bpf_jit_enable

Signed-off-by: Matt Evans <matt@ozlabs.org>
---

Since the RFC post, this has incorporated the bugfixes/tidies from review plus a
couple more found in further testing, plus some general/comment tidies.

 arch/powerpc/Kconfig                  |    1 +
 arch/powerpc/Makefile                 |    3 +-
 arch/powerpc/include/asm/ppc-opcode.h |   40 ++
 arch/powerpc/net/Makefile             |    4 +
 arch/powerpc/net/bpf_jit.S            |  138 +++++++
 arch/powerpc/net/bpf_jit.h            |  227 +++++++++++
 arch/powerpc/net/bpf_jit_comp.c       |  694 +++++++++++++++++++++++++++++++++
 7 files changed, 1106 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2729c66..39860fc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -134,6 +134,7 @@ config PPC
 	select GENERIC_IRQ_SHOW_LEVEL
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_BPF_JIT if PPC64
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index b7212b6..b94740f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -154,7 +154,8 @@ core-y				+= arch/powerpc/kernel/ \
 				   arch/powerpc/lib/ \
 				   arch/powerpc/sysdev/ \
 				   arch/powerpc/platforms/ \
-				   arch/powerpc/math-emu/
+				   arch/powerpc/math-emu/ \
+				   arch/powerpc/net/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
 core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
 
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index e472659..e980faa 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -71,6 +71,42 @@
 #define PPC_INST_ERATSX			0x7c000126
 #define PPC_INST_ERATSX_DOT		0x7c000127
 
+/* Misc instructions for BPF compiler */
+#define PPC_INST_LD			0xe8000000
+#define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LWZ			0x80000000
+#define PPC_INST_STD			0xf8000000
+#define PPC_INST_STDU			0xf8000001
+#define PPC_INST_MFLR			0x7c0802a6
+#define PPC_INST_MTLR			0x7c0803a6
+#define PPC_INST_CMPWI			0x2c000000
+#define PPC_INST_CMPDI			0x2c200000
+#define PPC_INST_CMPLW			0x7c000040
+#define PPC_INST_CMPLWI			0x28000000
+#define PPC_INST_ADDI			0x38000000
+#define PPC_INST_ADDIS			0x3c000000
+#define PPC_INST_ADD			0x7c000214
+#define PPC_INST_SUB			0x7c000050
+#define PPC_INST_BLR			0x4e800020
+#define PPC_INST_BLRL			0x4e800021
+#define PPC_INST_MULLW			0x7c0001d6
+#define PPC_INST_MULHWU			0x7c000016
+#define PPC_INST_MULLI			0x1c000000
+#define PPC_INST_DIVWU			0x7c0003d6
+#define PPC_INST_RLWINM			0x54000000
+#define PPC_INST_RLDICR			0x78000004
+#define PPC_INST_SLW			0x7c000030
+#define PPC_INST_SRW			0x7c000430
+#define PPC_INST_AND			0x7c000038
+#define PPC_INST_ANDDOT			0x7c000039
+#define PPC_INST_OR			0x7c000378
+#define PPC_INST_ANDI			0x70000000
+#define PPC_INST_ORI			0x60000000
+#define PPC_INST_ORIS			0x64000000
+#define PPC_INST_NEG			0x7c0000d0
+#define PPC_INST_BRANCH			0x48000000
+#define PPC_INST_BRANCH_COND		0x40800000
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -83,6 +119,10 @@
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 #define __PPC_WS(w)	(((w) & 0x1f) << 11)
+#define __PPC_SH(s)	__PPC_WS(s)
+#define __PPC_MB(s)	(((s) & 0x1f) << 6)
+#define __PPC_ME(s)	(((s) & 0x1f) << 1)
+#define __PPC_BI(s)	(((s) & 0x1f) << 16)
 
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
new file mode 100644
index 0000000..90568c3
--- /dev/null
+++ b/arch/powerpc/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/powerpc/net/bpf_jit.S b/arch/powerpc/net/bpf_jit.S
new file mode 100644
index 0000000..ff4506e
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit.S
@@ -0,0 +1,138 @@
+/* bpf_jit.S: Packet/header access helper functions
+ * for PPC64 BPF compiler.
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <asm/ppc_asm.h>
+#include "bpf_jit.h"
+
+/*
+ * All of these routines are called directly from generated code,
+ * whose register usage is:
+ *
+ * r3		skb
+ * r4,r5	A,X
+ * r6		*** address parameter to helper ***
+ * r7-r10	scratch
+ * r14		skb->data
+ * r15		skb headlen
+ * r16-31	M[]
+ */
+
+/*
+ * To consider: These helpers are so small it could be better to just
+ * generate them inline.  Inline code can do the simple headlen check
+ * then branch directly to slow_path_XXX if required.  (In fact, could
+ * load a spare GPR with the address of slow_path_generic and pass size
+ * as an argument, making the call site a mtlr, li and bllr.)
+ *
+ * Technically, the "is addr < 0" check is unnecessary & slowing down
+ * the ABS path, as it's statically checked on generation.
+ */
+	.globl	sk_load_word
+sk_load_word:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	/* Are we accessing past headlen? */
+	subi	r_scratch1, r_HL, 4
+	cmpd	r_scratch1, r_addr
+	blt	bpf_slow_path_word
+	/* Nope, just hitting the header.  cr0 here is eq or gt! */
+	lwzx	r_A, r_D, r_addr
+	/* When big endian we don't need to byteswap. */
+	blr	/* Return success, cr0 != LT */
+
+	.globl	sk_load_half
+sk_load_half:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	subi	r_scratch1, r_HL, 2
+	cmpd	r_scratch1, r_addr
+	blt	bpf_slow_path_half
+	lhzx	r_A, r_D, r_addr
+	blr
+
+	.globl	sk_load_byte
+sk_load_byte:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	cmpd	r_HL, r_addr
+	ble	bpf_slow_path_byte
+	lbzx	r_A, r_D, r_addr
+	blr
+
+/*
+ * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
+ * r_addr is the offset value, already known positive
+ */
+	.globl sk_load_byte_msh
+sk_load_byte_msh:
+	cmpd	r_HL, r_addr
+	ble	bpf_slow_path_byte_msh
+	lbzx	r_X, r_D, r_addr
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
+
+bpf_error:
+	/* Entered with cr0 = lt */
+	li	r3, 0
+	/* Generated code will 'blt epilogue', returning 0. */
+	blr
+
+/* Call out to skb_copy_bits:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define bpf_slow_path_common(SIZE)				\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*8);		\
+	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	li	r6, SIZE;					\
+	bl	skb_copy_bits;					\
+	/* R3 = 0 on success */					\
+	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpdi	r3, 0;						\
+	blt	bpf_error;	/* cr0 = LT */			\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	/* Data value is on stack, and cr0 != LT */
+	lwz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_byte_msh:
+	bpf_slow_path_common(1)
+	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
new file mode 100644
index 0000000..af1ab5e
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit.h
@@ -0,0 +1,227 @@
+/* bpf_jit.h: BPF JIT compiler for PPC64
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#define BPF_PPC_STACK_LOCALS	32
+#define BPF_PPC_STACK_BASIC	(48+64)
+#define BPF_PPC_STACK_SAVE	(18*8)
+#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
+				 BPF_PPC_STACK_SAVE)
+#define BPF_PPC_SLOWPATH_FRAME	(48+64)
+
+/*
+ * Generated code register usage:
+ *
+ * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
+ *
+ * skb		r3	(Entry parameter)
+ * A register	r4
+ * X register	r5
+ * addr param	r6
+ * r7-r10	scratch
+ * skb->data	r14
+ * skb headlen	r15	(skb->len - skb->data_len)
+ * m[0]		r16
+ * m[...]	...
+ * m[15]	r31
+ */
+#define r_skb		3
+#define r_ret		3
+#define r_A		4
+#define r_X		5
+#define r_addr		6
+#define r_scratch1	7
+#define r_D		14
+#define r_HL		15
+#define r_M		16
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Assembly helpers from arch/powerpc/net/bpf_jit.S:
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+
+#define FUNCTION_DESCR_SIZE	24
+
+/*
+ * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
+ * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
+ * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
+ */
+#define IMM_H(i)		((uintptr_t)(i)>>16)
+#define IMM_HA(i)		(((uintptr_t)(i)>>16) +			      \
+				 (((uintptr_t)(i) & 0x8000) >> 15))
+#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
+
+#define PLANT_INSTR(d, idx, instr)					      \
+	do { if (d) { (d)[idx] = instr; } idx++; } while (0)
+#define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)
+
+#define PPC_NOP()		EMIT(PPC_INST_NOP)
+#define PPC_BLR()		EMIT(PPC_INST_BLR)
+#define PPC_BLRL()		EMIT(PPC_INST_BLRL)
+#define PPC_MTLR(r)		EMIT(PPC_INST_MTLR | __PPC_RT(r))
+#define PPC_ADDI(d, a, i)	EMIT(PPC_INST_ADDI | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | IMM_L(i))
+#define PPC_MR(d, a)		PPC_OR(d, a, a)
+#define PPC_LI(r, i)		PPC_ADDI(r, 0, i)
+#define PPC_ADDIS(d, a, i)	EMIT(PPC_INST_ADDIS |			      \
+				     __PPC_RS(d) | __PPC_RA(a) | IMM_L(i))
+#define PPC_LIS(r, i)		PPC_ADDIS(r, 0, i)
+#define PPC_STD(r, base, i)	EMIT(PPC_INST_STD | __PPC_RS(r) |	      \
+				     __PPC_RA(base) | ((i) & 0xfffc))
+
+#define PPC_LD(r, base, i)	EMIT(PPC_INST_LD | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+#define PPC_LWZ(r, base, i)	EMIT(PPC_INST_LWZ | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+#define PPC_LHZ(r, base, i)	EMIT(PPC_INST_LHZ | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+/* Convenience helpers for the above with 'far' offsets: */
+#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);     \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LD(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i);   \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LWZ(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i);   \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LHZ(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_CMPWI(a, i)		EMIT(PPC_INST_CMPWI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPDI(a, i)		EMIT(PPC_INST_CMPDI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPLWI(a, i)	EMIT(PPC_INST_CMPLWI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPLW(a, b)		EMIT(PPC_INST_CMPLW | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_SUB(d, a, b)	EMIT(PPC_INST_SUB | __PPC_RT(d) |	      \
+				     __PPC_RB(a) | __PPC_RA(b))
+#define PPC_ADD(d, a, b)	EMIT(PPC_INST_ADD | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MUL(d, a, b)	EMIT(PPC_INST_MULLW | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MULHWU(d, a, b)	EMIT(PPC_INST_MULHWU | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MULI(d, a, i)	EMIT(PPC_INST_MULLI | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | IMM_L(i))
+#define PPC_DIVWU(d, a, b)	EMIT(PPC_INST_DIVWU | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_AND(d, a, b)	EMIT(PPC_INST_AND | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_ANDI(d, a, i)	EMIT(PPC_INST_ANDI | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_AND_DOT(d, a, b)	EMIT(PPC_INST_ANDDOT | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_OR(d, a, b)		EMIT(PPC_INST_OR | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_ORI(d, a, i)	EMIT(PPC_INST_ORI | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_ORIS(d, a, i)	EMIT(PPC_INST_ORIS | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_SLW(d, a, s)	EMIT(PPC_INST_SLW | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(s))
+#define PPC_SRW(d, a, s)	EMIT(PPC_INST_SRW | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(s))
+/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
+#define PPC_SLWI(d, a, i)	EMIT(PPC_INST_RLWINM | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(i) |	      \
+				     __PPC_MB(0) | __PPC_ME(31-(i)))
+/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
+#define PPC_SRWI(d, a, i)	EMIT(PPC_INST_RLWINM | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(32-(i)) |	      \
+				     __PPC_MB(i) | __PPC_ME(31))
+/* sldi = rldicr Rx, Ry, n, 63-n */
+#define PPC_SLDI(d, a, i)	EMIT(PPC_INST_RLDICR | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(i) |	      \
+				     __PPC_MB(63-(i)) | (((i) & 0x20) >> 4))
+#define PPC_NEG(d, a)		EMIT(PPC_INST_NEG | __PPC_RT(d) | __PPC_RA(a))
+
+/* Long jump; (unconditional 'branch') */
+#define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
+				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* "cond" here covers BO:BI fields. */
+#define PPC_BCC_SHORT(cond, dest)	EMIT(PPC_INST_BRANCH_COND |	      \
+					     (((cond) & 0x3ff) << 16) |	      \
+					     (((dest) - (ctx->idx * 4)) &     \
+					      0xfffc))
+#define PPC_LI32(d, i)		do { PPC_LI(d, IMM_L(i));		      \
+		if ((u32)(uintptr_t)(i) >= 32768) {			      \
+			PPC_ADDIS(d, d, IMM_HA(i));			      \
+		} } while(0)
+#define PPC_LI64(d, i)		do {					      \
+		if (!((uintptr_t)(i) & 0xffffffff00000000ULL))		      \
+			PPC_LI32(d, i);					      \
+		else {							      \
+			PPC_LIS(d, ((uintptr_t)(i) >> 48));		      \
+			if ((uintptr_t)(i) & 0x0000ffff00000000ULL)	      \
+				PPC_ORI(d, d,				      \
+					((uintptr_t)(i) >> 32) & 0xffff);     \
+			PPC_SLDI(d, d, 32);				      \
+			if ((uintptr_t)(i) & 0x00000000ffff0000ULL)	      \
+				PPC_ORIS(d, d,				      \
+					 ((uintptr_t)(i) >> 16) & 0xffff);    \
+			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
+				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
+		} } while (0);
+
+static inline bool is_nearbranch(int offset)
+{
+	return (offset < 32768) && (offset >= -32768);
+}
+
+/*
+ * The fly in the ointment of code size changing from pass to pass is
+ * avoided by padding the short branch case with a NOP.	 If code size differs
+ * with different branch reaches we will have the issue of code moving from
+ * one pass to the next and will need a few passes to converge on a stable
+ * state.
+ */
+#define PPC_BCC(cond, dest)	do {					      \
+		if (is_nearbranch((dest) - (ctx->idx * 4))) {		      \
+			PPC_BCC_SHORT(cond, dest);			      \
+			PPC_NOP();					      \
+		} else {						      \
+			/* Flip the 'T or F' bit to invert comparison */      \
+			PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4);  \
+			PPC_JMP(dest);					      \
+		} } while(0)
+
+/* To create a branch condition, select a bit of cr0... */
+#define CR0_LT		0
+#define CR0_GT		1
+#define CR0_EQ		2
+/* ...and modify BO[3] */
+#define COND_CMP_TRUE	0x100
+#define COND_CMP_FALSE	0x000
+/* Together, they make all required comparisons: */
+#define COND_GT		(CR0_GT | COND_CMP_TRUE)
+#define COND_GE		(CR0_LT | COND_CMP_FALSE)
+#define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
+#define COND_NE		(CR0_EQ | COND_CMP_FALSE)
+#define COND_LT		(CR0_LT | COND_CMP_TRUE)
+
+#define SEEN_DATAREF 0x10000 /* might call external helpers */
+#define SEEN_XREG    0x20000 /* X reg is used */
+#define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
+			      * storage */
+#define SEEN_MEM_MSK 0x0ffff
+
+struct codegen_context {
+	unsigned int seen;
+	unsigned int idx;
+	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
+};
+
+#endif
+
+#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
new file mode 100644
index 0000000..4b2a787
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -0,0 +1,694 @@
+/* bpf_jit_comp.c: BPF JIT compiler for PPC64
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include "bpf_jit.h"
+
+#ifndef __BIG_ENDIAN
+/* There are endianness assumptions herein. */
+#error "Little-endian PPC not supported in BPF compiler"
+#endif
+
+int bpf_jit_enable __read_mostly;
+
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+				   struct codegen_context *ctx)
+{
+	int i;
+	const struct sock_filter *filter = fp->insns;
+
+	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
+		/* Make stackframe */
+		if (ctx->seen & SEEN_DATAREF) {
+			/* If we call any helpers (for loads), save LR */
+			EMIT(PPC_INST_MFLR | __PPC_RT(0));
+			PPC_STD(0, 1, 16);
+
+			/* Back up non-volatile regs. */
+			PPC_STD(r_D, 1, -(8*(32-r_D)));
+			PPC_STD(r_HL, 1, -(8*(32-r_HL)));
+		}
+		if (ctx->seen & SEEN_MEM) {
+			/*
+			 * Conditionally save regs r15-r31 as some will be used
+			 * for M[] data.
+			 */
+			for (i = r_M; i < (r_M+16); i++) {
+				if (ctx->seen & (1 << (i-r_M)))
+					PPC_STD(i, 1, -(8*(32-i)));
+			}
+		}
+		EMIT(PPC_INST_STDU | __PPC_RS(1) | __PPC_RA(1) |
+		     (-BPF_PPC_STACKFRAME & 0xfffc));
+	}
+
+	if (ctx->seen & SEEN_DATAREF) {
+		/*
+		 * If this filter needs to access skb data,
+		 * prepare r_D and r_HL:
+		 *  r_HL = skb->len - skb->data_len
+		 *  r_D	 = skb->data
+		 */
+		PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
+							 data_len));
+		PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
+		PPC_SUB(r_HL, r_HL, r_scratch1);
+		PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
+	}
+
+	if (ctx->seen & SEEN_XREG) {
+		/*
+		 * TODO: Could also detect whether first instr. sets X and
+		 * avoid this (as below, with A).
+		 */
+		PPC_LI(r_X, 0);
+	}
+
+	switch (filter[0].code) {
+	case BPF_S_RET_K:
+	case BPF_S_LD_W_LEN:
+	case BPF_S_ANC_PROTOCOL:
+	case BPF_S_ANC_IFINDEX:
+	case BPF_S_ANC_MARK:
+	case BPF_S_ANC_RXHASH:
+	case BPF_S_ANC_CPU:
+	case BPF_S_ANC_QUEUE:
+	case BPF_S_LD_W_ABS:
+	case BPF_S_LD_H_ABS:
+	case BPF_S_LD_B_ABS:
+		/* first instruction sets A register (or is RET 'constant') */
+		break;
+	default:
+		/* make sure we dont leak kernel information to user */
+		PPC_LI(r_A, 0);
+	}
+}
+
+static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
+		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
+		if (ctx->seen & SEEN_DATAREF) {
+			PPC_LD(0, 1, 16);
+			PPC_MTLR(0);
+			PPC_LD(r_D, 1, -(8*(32-r_D)));
+			PPC_LD(r_HL, 1, -(8*(32-r_HL)));
+		}
+		if (ctx->seen & SEEN_MEM) {
+			/* Restore any saved non-vol registers */
+			for (i = r_M; i < (r_M+16); i++) {
+				if (ctx->seen & (1 << (i-r_M)))
+					PPC_LD(i, 1, -(8*(32-i)));
+			}
+		}
+	}
+	/* The RETs have left a return value in R3. */
+
+	PPC_BLR();
+}
+
+/* Assemble the body code between the prologue & epilogue. */
+static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+			      struct codegen_context *ctx,
+			      unsigned int *addrs)
+{
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+	u8 *func;
+	unsigned int true_cond;
+	int i;
+
+	/* Start of epilogue code */
+	unsigned int exit_addr = addrs[flen];
+
+	for (i = 0; i < flen; i++) {
+		unsigned int K = filter[i].k;
+
+		/*
+		 * addrs[] maps a BPF bytecode address into a real offset from
+		 * the start of the body code.
+		 */
+		addrs[i] = ctx->idx * 4;
+
+		switch (filter[i].code) {
+			/*** ALU ops ***/
+		case BPF_S_ALU_ADD_X: /* A += X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_ADD(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_ADD_K: /* A += K; */
+			if (!K)
+				break;
+			PPC_ADDI(r_A, r_A, IMM_L(K));
+			if (K >= 32768)
+				PPC_ADDIS(r_A, r_A, IMM_HA(K));
+			break;
+		case BPF_S_ALU_SUB_X: /* A -= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SUB(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_SUB_K: /* A -= K */
+			if (!K)
+				break;
+			PPC_ADDI(r_A, r_A, IMM_L(-K));
+			if (K >= 32768)
+				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
+			break;
+		case BPF_S_ALU_MUL_X: /* A *= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_MUL(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_MUL_K: /* A *= K */
+			if (K < 32768)
+				PPC_MULI(r_A, r_A, K);
+			else {
+				PPC_LI32(r_scratch1, K);
+				PPC_MUL(r_A, r_A, r_scratch1);
+			}
+			break;
+		case BPF_S_ALU_DIV_X: /* A /= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_CMPWI(r_X, 0);
+			if (ctx->pc_ret0 != -1) {
+				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
+			} else {
+				/*
+				 * Exit, returning 0; first pass hits here
+				 * (longer worst-case code size).
+				 */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+				PPC_LI(r_ret, 0);
+				PPC_JMP(exit_addr);
+			}
+			PPC_DIVWU(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+			PPC_LI32(r_scratch1, K);
+			/* Top 32 bits of 64bit result -> A */
+			PPC_MULHWU(r_A, r_A, r_scratch1);
+			break;
+		case BPF_S_ALU_AND_X:
+			ctx->seen |= SEEN_XREG;
+			PPC_AND(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_AND_K:
+			if (!IMM_H(K))
+				PPC_ANDI(r_A, r_A, K);
+			else {
+				PPC_LI32(r_scratch1, K);
+				PPC_AND(r_A, r_A, r_scratch1);
+			}
+			break;
+		case BPF_S_ALU_OR_X:
+			ctx->seen |= SEEN_XREG;
+			PPC_OR(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_OR_K:
+			if (IMM_L(K))
+				PPC_ORI(r_A, r_A, IMM_L(K));
+			if (K >= 65536)
+				PPC_ORIS(r_A, r_A, IMM_H(K));
+			break;
+		case BPF_S_ALU_LSH_X: /* A <<= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SLW(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_LSH_K:
+			if (K == 0)
+				break;
+			else
+				PPC_SLWI(r_A, r_A, K);
+			break;
+		case BPF_S_ALU_RSH_X: /* A >>= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SRW(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_RSH_K: /* A >>= K; */
+			if (K == 0)
+				break;
+			else
+				PPC_SRWI(r_A, r_A, K);
+			break;
+		case BPF_S_ALU_NEG:
+			PPC_NEG(r_A, r_A);
+			break;
+		case BPF_S_RET_K:
+			PPC_LI32(r_ret, K);
+			if (!K) {
+				if (ctx->pc_ret0 == -1)
+					ctx->pc_ret0 = i;
+			}
+			/*
+			 * If this isn't the very last instruction, branch to
+			 * the epilogue if we've stuff to clean up.  Otherwise,
+			 * if there's nothing to tidy, just return.  If we /are/
+			 * the last instruction, we're about to fall through to
+			 * the epilogue to return.
+			 */
+			if (i != flen - 1) {
+				/*
+				 * Note: 'seen' is properly valid only on pass
+				 * #2.	Both parts of this conditional are the
+				 * same instruction size though, meaning the
+				 * first pass will still correctly determine the
+				 * code size/addresses.
+				 */
+				if (ctx->seen)
+					PPC_JMP(exit_addr);
+				else
+					PPC_BLR();
+			}
+			break;
+		case BPF_S_RET_A:
+			PPC_MR(r_ret, r_A);
+			if (i != flen - 1) {
+				if (ctx->seen)
+					PPC_JMP(exit_addr);
+				else
+					PPC_BLR();
+			}
+			break;
+		case BPF_S_MISC_TAX: /* X = A */
+			ctx->seen |= SEEN_XREG;
+			PPC_MR(r_X, r_A);
+			break;
+		case BPF_S_MISC_TXA: /* A = X */
+			ctx->seen |= SEEN_XREG;
+			PPC_MR(r_A, r_X);
+			break;
+
+			/*** Constant loads/M[] access ***/
+		case BPF_S_LD_IMM: /* A = K */
+			PPC_LI32(r_A, K);
+			break;
+		case BPF_S_LDX_IMM: /* X = K */
+			ctx->seen |= SEEN_XREG;
+			PPC_LI32(r_X, K);
+			break;
+		case BPF_S_LD_MEM: /* A = mem[K] */
+			PPC_MR(r_A, r_M + (K & 0xf));
+			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_LDX_MEM: /* X = mem[K] */
+			PPC_MR(r_X, r_M + (K & 0xf));
+			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_ST: /* mem[K] = A */
+			PPC_MR(r_M + (K & 0xf), r_A);
+			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_STX: /* mem[K] = X */
+			PPC_MR(r_M + (K & 0xf), r_X);
+			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_LD_W_LEN: /*	A = skb->len; */
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
+			break;
+		case BPF_S_LDX_W_LEN: /* X = skb->len; */
+			ctx->seen |= SEEN_XREG;
+			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
+			break;
+
+			/*** Ancillary info loads ***/
+
+			/* None of the BPF_S_ANC* codes appear to be passed by
+			 * sk_chk_filter().  The interpreter and the x86 BPF
+			 * compiler implement them so we do too -- they may be
+			 * planted in future.
+			 */
+		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  protocol) != 2);
+			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  protocol));
+			/* ntohs is a NOP with BE loads. */
+			break;
+		case BPF_S_ANC_IFINDEX:
+			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
+								dev));
+			PPC_CMPDI(r_scratch1, 0);
+			if (ctx->pc_ret0 != -1) {
+				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
+			} else {
+				/* Exit, returning 0; first pass hits here. */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+				PPC_LI(r_ret, 0);
+				PPC_JMP(exit_addr);
+			}
+			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+						  ifindex) != 4);
+			PPC_LWZ_OFFS(r_A, r_scratch1,
+				     offsetof(struct net_device, ifindex));
+			break;
+		case BPF_S_ANC_MARK:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  mark));
+			break;
+		case BPF_S_ANC_RXHASH:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  rxhash));
+			break;
+		case BPF_S_ANC_QUEUE:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  queue_mapping) != 2);
+			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  queue_mapping));
+			break;
+		case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+			/*
+			 * PACA ptr is r13:
+			 * raw_smp_processor_id() = local_paca->paca_index
+			 */
+			PPC_LHZ_OFFS(r_A, 13,
+				     offsetof(struct paca_struct, paca_index));
+#else
+			PPC_LI(r_A, 0);
+#endif
+			break;
+
+			/*** Absolute loads from packet header/data ***/
+		case BPF_S_LD_W_ABS:
+			func = sk_load_word;
+			goto common_load;
+		case BPF_S_LD_H_ABS:
+			func = sk_load_half;
+			goto common_load;
+		case BPF_S_LD_B_ABS:
+			func = sk_load_byte;
+		common_load:
+			/*
+			 * Load from [K].  Reference with the (negative)
+			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
+			 */
+			ctx->seen |= SEEN_DATAREF;
+			if ((int)K < 0)
+				return -ENOTSUPP;
+			PPC_LI64(r_scratch1, func);
+			PPC_MTLR(r_scratch1);
+			PPC_LI32(r_addr, K);
+			PPC_BLRL();
+			/*
+			 * Helper returns 'lt' condition on error, and an
+			 * appropriate return value in r3
+			 */
+			PPC_BCC(COND_LT, exit_addr);
+			break;
+
+			/*** Indirect loads from packet header/data ***/
+		case BPF_S_LD_W_IND:
+			func = sk_load_word;
+			goto common_load_ind;
+		case BPF_S_LD_H_IND:
+			func = sk_load_half;
+			goto common_load_ind;
+		case BPF_S_LD_B_IND:
+			func = sk_load_byte;
+		common_load_ind:
+			/*
+			 * Load from [X + K].  Negative offsets are tested for
+			 * in the helper functions, and result in a 'ret 0'.
+			 */
+			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
+			PPC_LI64(r_scratch1, func);
+			PPC_MTLR(r_scratch1);
+			PPC_ADDI(r_addr, r_X, IMM_L(K));
+			if (K >= 32768)
+				PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
+			PPC_BLRL();
+			/* If error, cr0.LT set */
+			PPC_BCC(COND_LT, exit_addr);
+			break;
+
+		case BPF_S_LDX_B_MSH:
+			/*
+			 * x86 version drops packet (RET 0) when K<0, whereas
+			 * interpreter does allow K<0 (__load_pointer, special
+			 * ancillary data).
+			 */
+			ctx->seen |= SEEN_XREG;
+			func = sk_load_byte_msh;
+			goto common_load;
+			break;
+
+			/*** Jump and branches ***/
+		case BPF_S_JMP_JA:
+			if (K != 0)
+				PPC_JMP(addrs[i + 1 + K]);
+			break;
+
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+			true_cond = COND_GT;
+			goto cond_branch;
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+			true_cond = COND_GE;
+			goto cond_branch;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+			true_cond = COND_EQ;
+			goto cond_branch;
+		case BPF_S_JMP_JSET_K:
+		case BPF_S_JMP_JSET_X:
+			true_cond = COND_NE;
+			/* Fall through */
+		cond_branch:
+			/* same targets, can avoid doing the test :) */
+			if (filter[i].jt == filter[i].jf) {
+				if (filter[i].jt > 0)
+					PPC_JMP(addrs[i + 1 + filter[i].jt]);
+				break;
+			}
+
+			switch (filter[i].code) {
+			case BPF_S_JMP_JGT_X:
+			case BPF_S_JMP_JGE_X:
+			case BPF_S_JMP_JEQ_X:
+				ctx->seen |= SEEN_XREG;
+				PPC_CMPLW(r_A, r_X);
+				break;
+			case BPF_S_JMP_JSET_X:
+				ctx->seen |= SEEN_XREG;
+				PPC_AND_DOT(r_scratch1, r_A, r_X);
+				break;
+			case BPF_S_JMP_JEQ_K:
+			case BPF_S_JMP_JGT_K:
+			case BPF_S_JMP_JGE_K:
+				if (K < 32768)
+					PPC_CMPLWI(r_A, K);
+				else {
+					PPC_LI32(r_scratch1, K);
+					PPC_CMPLW(r_A, r_scratch1);
+				}
+				break;
+			case BPF_S_JMP_JSET_K:
+				if (K < 32768)
+					/* PPC_ANDI is /only/ dot-form */
+					PPC_ANDI(r_scratch1, r_A, K);
+				else {
+					PPC_LI32(r_scratch1, K);
+					PPC_AND_DOT(r_scratch1, r_A,
+						    r_scratch1);
+				}
+				break;
+			}
+			/* Sometimes branches are constructed "backward", with
+			 * the false path being the branch and true path being
+			 * a fallthrough to the next instruction.
+			 */
+			if (filter[i].jt == 0)
+				/* Swap the sense of the branch */
+				PPC_BCC(true_cond ^ COND_CMP_TRUE,
+					addrs[i + 1 + filter[i].jf]);
+			else {
+				PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
+				if (filter[i].jf != 0)
+					PPC_JMP(addrs[i + 1 + filter[i].jf]);
+			}
+			break;
+		default:
+			/* The filter contains something cruel & unusual.
+			 * We don't handle it, but also there shouldn't be
+			 * anything missing from our list.
+			 */
+			pr_err("BPF filter opcode %04x (@%d) unsupported\n",
+			       filter[i].code, i);
+			return -ENOTSUPP;
+		}
+
+	}
+	/* Set end-of-body-code address for exit. */
+	addrs[i] = ctx->idx * 4;
+
+	return 0;
+}
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	unsigned int proglen;
+	unsigned int alloclen;
+	u32 *image = NULL;
+	u32 *code_base;
+	unsigned int *addrs;
+	struct codegen_context cgctx;
+	int pass;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/*
+	 * There are multiple assembly passes as the generated code will change
+	 * size as it settles down, figuring out the max branch offsets/exit
+	 * paths required.
+	 *
+	 * The range of standard conditional branches is +/- 32Kbytes.	Since
+	 * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
+	 * finish with 8 bytes/instruction.  Not feasible, so long jumps are
+	 * used, distinct from short branches.
+	 *
+	 * Current:
+	 *
+	 * For now, both branch types assemble to 2 words (short branches padded
+	 * with a NOP); this is less efficient, but assembly will always complete
+	 * after exactly 3 passes:
+	 *
+	 * First pass: No code buffer; Program is "faux-generated" -- no code
+	 * emitted but maximum size of output determined (and addrs[] filled
+	 * in).	 Also, we note whether we use M[], whether we use skb data, etc.
+	 * All generation choices assumed to be 'worst-case', e.g. branches all
+	 * far (2 instructions), return path code reduction not available, etc.
+	 *
+	 * Second pass: Code buffer allocated with size determined previously.
+	 * Prologue generated to support features we have seen used.  Exit paths
+	 * determined and addrs[] is filled in again, as code may be slightly
+	 * smaller as a result.
+	 *
+	 * Third pass: Code generated 'for real', and branch destinations
+	 * determined from now-accurate addrs[] map.
+	 *
+	 * Ideal:
+	 *
+	 * If we optimise this, near branches will be shorter.	On the
+	 * first assembly pass, we should err on the side of caution and
+	 * generate the biggest code.  On subsequent passes, branches will be
+	 * generated short or long and code size will reduce.  With smaller
+	 * code, more branches may fall into the short category, and code will
+	 * reduce more.
+	 *
+	 * Finally, if we see one pass generate code the same size as the
+	 * previous pass we have converged and should now generate code for
+	 * real.  Allocating at the end will also save the memory that would
+	 * otherwise be wasted by the (small) current code shrinkage.
+	 * Preferably, we should do a small number of passes (e.g. 5) and if we
+	 * haven't converged by then, get impatient and force code to generate
+	 * as-is, even if the odd branch would be left long.  The chances of a
+	 * long jump are tiny with all but the most enormous of BPF filter
+	 * inputs, so we should usually converge on the third pass.
+	 */
+
+	cgctx.idx = 0;
+	cgctx.seen = 0;
+	cgctx.pc_ret0 = -1;
+	/* Scouting faux-generate pass 0 */
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+		/* We hit something illegal or unsupported. */
+		goto out;
+
+	/*
+	 * Pretend to build prologue, given the features we've seen.  This will
+	 * update ctgtx.idx as it pretends to output instructions, then we can
+	 * calculate total size from idx.
+	 */
+	bpf_jit_build_prologue(fp, 0, &cgctx);
+	bpf_jit_build_epilogue(0, &cgctx);
+
+	proglen = cgctx.idx * 4;
+	alloclen = proglen + FUNCTION_DESCR_SIZE;
+	image = module_alloc(max_t(unsigned int, alloclen,
+				   sizeof(struct work_struct)));
+	if (!image)
+		goto out;
+
+	code_base = image + (FUNCTION_DESCR_SIZE/4);
+
+	/* Code generation passes 1-2 */
+	for (pass = 1; pass < 3; pass++) {
+		/* Now build the prologue, body code & epilogue for real. */
+		cgctx.idx = 0;
+		bpf_jit_build_prologue(fp, code_base, &cgctx);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		bpf_jit_build_epilogue(code_base, &cgctx);
+
+		if (bpf_jit_enable > 1)
+			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
+				proglen - (cgctx.idx * 4), cgctx.seen);
+	}
+
+	if (bpf_jit_enable > 1)
+		pr_info("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ",
+				       DUMP_PREFIX_ADDRESS,
+				       16, 1, code_base,
+				       proglen, false);
+
+		bpf_flush_icache(code_base, code_base + (proglen/4));
+		/* Function descriptor nastiness: Address + TOC */
+		((u64 *)image)[0] = (u64)code_base;
+		((u64 *)image)[1] = local_paca->kernel_toc;
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}

^ permalink raw reply related

* [PATCH 00/45] Update bna driver version to 3.0.2.0
From: Rasesh Mody @ 2011-07-18  8:19 UTC (permalink / raw)
  To: davem, netdev; +Cc: adapter_linux_open_src_team, dradovan, Rasesh Mody

Hello David,

   The following patch set adds support for Brocade-1860 Fabric Adapter code
   re-org and cleaning.

   It updates the Brocade BNA driver to v3.0.2.0.

   The driver has been compiled & tested against net-next-2.6(3.0.0-rc7)

   Note: pathces 0007 to 0009 are split for ease of review.

Thanks,
Rasesh

Rasesh Mody (45):
  bna: Checkpatch Cleanup
  bna: Update ASIC Register Definition to Support New Hardware
  bna: Change IOC Event Notification Call Back Mechanism
  bna: State Machine Fault Handling Cleanup
  bna: MSGQ Implementation
  bna: Use DEFINE_PCI_DEVICE_TABLE and Print Driver Version
  bna: Introduce ENET as New Driver and FW Interface
  bna: Tx and Rx Redesign
  bna: ENET and Tx Rx Redesign Update
  bna: Remove Obsolete Files
  bna: Brocade-1860 Fabric Adapter Enablement
  bna: Hardware Clock Setup
  bna: IOC PLL changes and init cleanup
  bna: Brocade 1860 Register and ASIC Mode Changes
  bna: Set MBOX MSIX Index to Zero
  bna: IOC PCI Init & Enable Changes
  bna: Mailbox Interface Changes and FW MBOX fix
  bna: Implement Polling Mechanism for FW Ready
  bna: HW Type Check Fix
  bna: MBOX IRQ Sync Vector Num Fix
  bna: Remove Reset Call Back
  bna: Capability Map and MFG Block Changes for New HW
  bna: Added Defines for Multi TXQ Support
  bna: Mboxq Flush When Ioc Disabled
  bna: Move FW Init to HW Init and Disable Hang Unmapped Fix
  bna: Ethfn LPU DMA Read Fix
  bna: IOC Event Name Change
  bna: Add New IOC event
  bna: Add Sub-System Device ID Info
  bna: Add HW Semaphore Unlock Logic
  bna: Configuration changes
  bna: TxRx Coalesce Settings Fix and Reorg PCI Probe Failure
  bna: Device Init Fix
  bna: Add Multiple Tx Queue Support
  bna: Change TxQ Select Logic and Interrupt Handling
  bna: Data Path and API Changes
  bna: Adpater and Port Mode Settings
  bna: HW Error Counter Fix
  bna: RX Path Changes
  bna: Add IOC MBOX Call Back to Client
  bna: Ethtool Ring Param Set changes and Add Stats Attr
  bna: PLL Init Fix and Add Stats Attributes
  bna: Dropped BUG_ONs and Rx id init fix
  bna: Header File and Unused Code Cleanup
  bna: Driver Version changed to 3.0.2.0

 drivers/net/bna/Makefile            |    5 +-
 drivers/net/bna/bfa_cee.c           |   70 +-
 drivers/net/bna/bfa_cee.h           |    3 +-
 drivers/net/bna/bfa_cs.h            |  148 ++
 drivers/net/bna/bfa_defs.h          |   77 +-
 drivers/net/bna/bfa_defs_cna.h      |    8 +-
 drivers/net/bna/bfa_defs_mfg_comm.h |   93 +-
 drivers/net/bna/bfa_defs_status.h   |  143 +-
 drivers/net/bna/bfa_ioc.c           |  712 ++++--
 drivers/net/bna/bfa_ioc.h           |  106 +-
 drivers/net/bna/bfa_ioc_ct.c        |  498 ++++-
 drivers/net/bna/bfa_msgq.c          |  669 ++++++
 drivers/net/bna/bfa_msgq.h          |  130 +
 drivers/net/bna/bfi.h               |  280 ++-
 drivers/net/bna/bfi_ctreg.h         |  646 -----
 drivers/net/bna/bfi_enet.h          |  902 +++++++
 drivers/net/bna/bfi_ll.h            |  438 ----
 drivers/net/bna/bfi_reg.h           |  452 ++++
 drivers/net/bna/bna.h               |  456 +++--
 drivers/net/bna/bna_ctrl.c          | 3077 ------------------------
 drivers/net/bna/bna_enet.c          | 2202 +++++++++++++++++
 drivers/net/bna/bna_hw.h            | 1516 ++----------
 drivers/net/bna/bna_txrx.c          | 4417 +++++++++++++++++------------------
 drivers/net/bna/bna_types.h         |  696 +++----
 drivers/net/bna/bnad.c              | 1249 ++++++----
 drivers/net/bna/bnad.h              |  157 +-
 drivers/net/bna/bnad_ethtool.c      |  481 +---
 drivers/net/bna/cna.h               |   44 +-
 include/linux/pci_ids.h             |    1 +
 29 files changed, 9824 insertions(+), 9852 deletions(-)
 create mode 100644 drivers/net/bna/bfa_cs.h
 create mode 100644 drivers/net/bna/bfa_msgq.c
 create mode 100644 drivers/net/bna/bfa_msgq.h
 delete mode 100644 drivers/net/bna/bfi_ctreg.h
 create mode 100644 drivers/net/bna/bfi_enet.h
 delete mode 100644 drivers/net/bna/bfi_ll.h
 create mode 100644 drivers/net/bna/bfi_reg.h
 delete mode 100644 drivers/net/bna/bna_ctrl.c
 create mode 100644 drivers/net/bna/bna_enet.c


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox