Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net v5 4/5] forcedeth: Improve stats counters
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, Mandeep Baines,
	David Decotigny
In-Reply-To: <cover.1320539724.git.david.decotigny@google.com>

From: Mandeep Baines <msb@google.com>

Rx byte count was off; instead use the hardware's count.  Tx packet
count was counting pre-TSO packets; instead count on-the-wire packets.
Report hardware dropped frame count as rx_fifo_errors.

- The count of transmitted packets reported by the forcedeth driver
  reports pre-TSO (TCP Segmentation Offload) packet counts and not the
  count of the number of packets sent on the wire. This change fixes
  the forcedeth driver to report the correct count. Fixed the code by
  copying the count stored in the NIC H/W to the value reported by the
  driver.

- Count rx_drop_frame errors as rx_fifo_errors:
  We see a lot of rx_drop_frame errors if we disable the rx bottom-halves
  for too long.  Normally, rx_fifo_errors would be counted in this case.
  The rx_drop_frame error count is private to forcedeth and is not
  reported by ifconfig or sysfs.  The rx_fifo_errors count is currently
  unused in the forcedeth driver.  It is reported by ifconfig as overruns.
  This change reports rx_drop_frame errors as rx_fifo_errors.



Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 2f1eaee..0c10ff7 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1682,6 +1682,7 @@ static void nv_get_hw_stats(struct net_device *dev)
 		np->estats.tx_pause += readl(base + NvRegTxPause);
 		np->estats.rx_pause += readl(base + NvRegRxPause);
 		np->estats.rx_drop_frame += readl(base + NvRegRxDropFrame);
+		np->estats.rx_errors_total += np->estats.rx_drop_frame;
 	}
 
 	if (np->driver_data & DEV_HAS_STATISTICS_V3) {
@@ -1706,11 +1707,14 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
 		nv_get_hw_stats(dev);
 
 		/* copy to net_device stats */
+		dev->stats.tx_packets = np->estats.tx_packets;
+		dev->stats.rx_bytes = np->estats.rx_bytes;
 		dev->stats.tx_bytes = np->estats.tx_bytes;
 		dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
 		dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
 		dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
 		dev->stats.rx_over_errors = np->estats.rx_over_errors;
+		dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
 		dev->stats.rx_errors = np->estats.rx_errors_total;
 		dev->stats.tx_errors = np->estats.tx_errors_total;
 	}
-- 
1.7.3.1

^ permalink raw reply related

* [PATCH net v5 5/5] forcedeth: fix a few sparse warnings (variable shadowing)
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, David Decotigny
In-Reply-To: <cover.1320539724.git.david.decotigny@google.com>

This fixes the following sparse warnings:
drivers/net/ethernet/nvidia/forcedeth.c:2113:7: warning: symbol 'size' shadows an earlier one
drivers/net/ethernet/nvidia/forcedeth.c:2102:6: originally declared here
drivers/net/ethernet/nvidia/forcedeth.c:2155:7: warning: symbol 'size' shadows an earlier one
drivers/net/ethernet/nvidia/forcedeth.c:2102:6: originally declared here
drivers/net/ethernet/nvidia/forcedeth.c:2227:7: warning: symbol 'size' shadows an earlier one
drivers/net/ethernet/nvidia/forcedeth.c:2215:6: originally declared here
drivers/net/ethernet/nvidia/forcedeth.c:2271:7: warning: symbol 'size' shadows an earlier one
drivers/net/ethernet/nvidia/forcedeth.c:2215:6: originally declared here
drivers/net/ethernet/nvidia/forcedeth.c:2986:20: warning: symbol 'addr' shadows an earlier one
drivers/net/ethernet/nvidia/forcedeth.c:2963:6: originally declared here



Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |   34 +++++++++++++++---------------
 1 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 0c10ff7..1dca570 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2103,10 +2103,10 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
-		u32 size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		u32 frag_size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 
-		entries += (size >> NV_TX2_TSO_MAX_SHIFT) +
-			   ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+		entries += (frag_size >> NV_TX2_TSO_MAX_SHIFT) +
+			   ((frag_size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
 	}
 
 	spin_lock_irqsave(&np->lock, flags);
@@ -2145,13 +2145,13 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* setup the fragments */
 	for (i = 0; i < fragments; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		u32 size = skb_frag_size(frag);
+		u32 frag_size = skb_frag_size(frag);
 		offset = 0;
 
 		do {
 			prev_tx = put_tx;
 			prev_tx_ctx = np->put_tx_ctx;
-			bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+			bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size;
 			np->put_tx_ctx->dma = skb_frag_dma_map(
 							&np->pci_dev->dev,
 							frag, offset,
@@ -2163,12 +2163,12 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
 
 			offset += bcnt;
-			size -= bcnt;
+			frag_size -= bcnt;
 			if (unlikely(put_tx++ == np->last_tx.orig))
 				put_tx = np->first_tx.orig;
 			if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
 				np->put_tx_ctx = np->first_tx_ctx;
-		} while (size);
+		} while (frag_size);
 	}
 
 	/* set last fragment flag  */
@@ -2217,10 +2217,10 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
-		u32 size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		u32 frag_size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 
-		entries += (size >> NV_TX2_TSO_MAX_SHIFT) +
-			   ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+		entries += (frag_size >> NV_TX2_TSO_MAX_SHIFT) +
+			   ((frag_size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
 	}
 
 	spin_lock_irqsave(&np->lock, flags);
@@ -2261,13 +2261,13 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 	/* setup the fragments */
 	for (i = 0; i < fragments; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		u32 size = skb_frag_size(frag);
+		u32 frag_size = skb_frag_size(frag);
 		offset = 0;
 
 		do {
 			prev_tx = put_tx;
 			prev_tx_ctx = np->put_tx_ctx;
-			bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+			bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size;
 			np->put_tx_ctx->dma = skb_frag_dma_map(
 							&np->pci_dev->dev,
 							frag, offset,
@@ -2280,12 +2280,12 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 			put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
 
 			offset += bcnt;
-			size -= bcnt;
+			frag_size -= bcnt;
 			if (unlikely(put_tx++ == np->last_tx.ex))
 				put_tx = np->first_tx.ex;
 			if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
 				np->put_tx_ctx = np->first_tx_ctx;
-		} while (size);
+		} while (frag_size);
 	}
 
 	/* set last fragment flag  */
@@ -2933,11 +2933,11 @@ static void nv_set_multicast(struct net_device *dev)
 				struct netdev_hw_addr *ha;
 
 				netdev_for_each_mc_addr(ha, dev) {
-					unsigned char *addr = ha->addr;
+					unsigned char *hw_addr = ha->addr;
 					u32 a, b;
 
-					a = le32_to_cpu(*(__le32 *) addr);
-					b = le16_to_cpu(*(__le16 *) (&addr[4]));
+					a = le32_to_cpu(*(__le32 *) hw_addr);
+					b = le16_to_cpu(*(__le16 *) (&hw_addr[4]));
 					alwaysOn[0] &= a;
 					alwaysOff[0] &= ~a;
 					alwaysOn[1] &= b;
-- 
1.7.3.1

^ permalink raw reply related

* [PATCH net v5 3/5] forcedeth: remove unneeded stats updates
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, David Decotigny
In-Reply-To: <cover.1320539724.git.david.decotigny@google.com>

Function ndo_get_stats() updates most of the stats from hardware
registers, making the manual updates un-needed. This change removes
these manual updates. Main exception is rx_missed_errors which needs
manual update.

Another exception is rx_packets, still updated manually in this commit
to make sure this patch doesn't change behavior of driver. This will
be addressed by a future patch.



Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |   35 +------------------------------
 1 files changed, 1 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index b7cf4b6..2f1eaee 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2374,16 +2374,8 @@ static int nv_tx_done(struct net_device *dev, int limit)
 		if (np->desc_ver == DESC_VER_1) {
 			if (flags & NV_TX_LASTPACKET) {
 				if (flags & NV_TX_ERROR) {
-					if (flags & NV_TX_UNDERFLOW)
-						dev->stats.tx_fifo_errors++;
-					if (flags & NV_TX_CARRIERLOST)
-						dev->stats.tx_carrier_errors++;
 					if ((flags & NV_TX_RETRYERROR) && !(flags & NV_TX_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
-					dev->stats.tx_errors++;
-				} else {
-					dev->stats.tx_packets++;
-					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
@@ -2392,16 +2384,8 @@ static int nv_tx_done(struct net_device *dev, int limit)
 		} else {
 			if (flags & NV_TX2_LASTPACKET) {
 				if (flags & NV_TX2_ERROR) {
-					if (flags & NV_TX2_UNDERFLOW)
-						dev->stats.tx_fifo_errors++;
-					if (flags & NV_TX2_CARRIERLOST)
-						dev->stats.tx_carrier_errors++;
 					if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK))
 						nv_legacybackoff_reseed(dev);
-					dev->stats.tx_errors++;
-				} else {
-					dev->stats.tx_packets++;
-					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
@@ -2434,9 +2418,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
 		nv_unmap_txskb(np, np->get_tx_ctx);
 
 		if (flags & NV_TX2_LASTPACKET) {
-			if (!(flags & NV_TX2_ERROR))
-				dev->stats.tx_packets++;
-			else {
+			if (flags & NV_TX2_ERROR) {
 				if ((flags & NV_TX2_RETRYERROR) && !(flags & NV_TX2_RETRYCOUNT_MASK)) {
 					if (np->driver_data & DEV_HAS_GEAR_MODE)
 						nv_gear_backoff_reseed(dev);
@@ -2636,7 +2618,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
 					if ((flags & NV_RX_ERROR_MASK) == NV_RX_ERROR4) {
 						len = nv_getlen(dev, skb->data, len);
 						if (len < 0) {
-							dev->stats.rx_errors++;
 							dev_kfree_skb(skb);
 							goto next_pkt;
 						}
@@ -2650,11 +2631,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
 					else {
 						if (flags & NV_RX_MISSEDFRAME)
 							dev->stats.rx_missed_errors++;
-						if (flags & NV_RX_CRCERR)
-							dev->stats.rx_crc_errors++;
-						if (flags & NV_RX_OVERFLOW)
-							dev->stats.rx_over_errors++;
-						dev->stats.rx_errors++;
 						dev_kfree_skb(skb);
 						goto next_pkt;
 					}
@@ -2670,7 +2646,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
 					if ((flags & NV_RX2_ERROR_MASK) == NV_RX2_ERROR4) {
 						len = nv_getlen(dev, skb->data, len);
 						if (len < 0) {
-							dev->stats.rx_errors++;
 							dev_kfree_skb(skb);
 							goto next_pkt;
 						}
@@ -2682,11 +2657,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
 					}
 					/* the rest are hard errors */
 					else {
-						if (flags & NV_RX2_CRCERR)
-							dev->stats.rx_crc_errors++;
-						if (flags & NV_RX2_OVERFLOW)
-							dev->stats.rx_over_errors++;
-						dev->stats.rx_errors++;
 						dev_kfree_skb(skb);
 						goto next_pkt;
 					}
@@ -2704,7 +2674,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
 		skb->protocol = eth_type_trans(skb, dev);
 		napi_gro_receive(&np->napi, skb);
 		dev->stats.rx_packets++;
-		dev->stats.rx_bytes += len;
 next_pkt:
 		if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
 			np->get_rx.orig = np->first_rx.orig;
@@ -2787,9 +2756,7 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
 				__vlan_hwaccel_put_tag(skb, vid);
 			}
 			napi_gro_receive(&np->napi, skb);
-
 			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += len;
 		} else {
 			dev_kfree_skb(skb);
 		}
-- 
1.7.3.1

^ permalink raw reply related

* [PATCH net v5 2/5] forcedeth: Acknowledge only interrupts that are being processed
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, Mike Ditto, David Decotigny
In-Reply-To: <cover.1320539724.git.david.decotigny@google.com>

From: Mike Ditto <mditto@google.com>

This is to avoid a race, accidentally acknowledging an interrupt that
we didn't notice and won't immediately process.  This is based solely
on code inspection; it is not known if there was an actual bug here.



Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |   13 ++++++++-----
 1 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 344cb5f..b7cf4b6 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3398,7 +3398,8 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 
 	for (i = 0;; i++) {
 		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL;
-		writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus);
+		writel(events, base + NvRegMSIXIrqStatus);
+		netdev_dbg(dev, "tx irq events: %08x\n", events);
 		if (!(events & np->irqmask))
 			break;
 
@@ -3509,7 +3510,8 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 
 	for (i = 0;; i++) {
 		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL;
-		writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
+		writel(events, base + NvRegMSIXIrqStatus);
+		netdev_dbg(dev, "rx irq events: %08x\n", events);
 		if (!(events & np->irqmask))
 			break;
 
@@ -3553,7 +3555,8 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data)
 
 	for (i = 0;; i++) {
 		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_OTHER;
-		writel(NVREG_IRQ_OTHER, base + NvRegMSIXIrqStatus);
+		writel(events, base + NvRegMSIXIrqStatus);
+		netdev_dbg(dev, "irq events: %08x\n", events);
 		if (!(events & np->irqmask))
 			break;
 
@@ -3617,10 +3620,10 @@ static irqreturn_t nv_nic_irq_test(int foo, void *data)
 
 	if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
 		events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
-		writel(NVREG_IRQ_TIMER, base + NvRegIrqStatus);
+		writel(events & NVREG_IRQ_TIMER, base + NvRegIrqStatus);
 	} else {
 		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
-		writel(NVREG_IRQ_TIMER, base + NvRegMSIXIrqStatus);
+		writel(events & NVREG_IRQ_TIMER, base + NvRegMSIXIrqStatus);
 	}
 	pci_push(base);
 	if (!(events & NVREG_IRQ_TIMER))
-- 
1.7.3.1

^ permalink raw reply related

* [PATCH net v5 1/5] forcedeth: fix race when unloading module
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, David Decotigny
In-Reply-To: <cover.1320539724.git.david.decotigny@google.com>

When forcedeth module is unloaded, there exists a path that can lead
to mod_timer() after del_timer_sync(), causing an oops. This patch
short-circuits this unneeded path, which originates in
nv_get_ethtool_stats().

Tested:
  x86_64 16-way + 3 ethtool -S infinite loops + 100Mbps incoming traffic
  + rmmod/modprobe/ifconfig in a loop

Initial-Author: Salman Qazi <sqazi@google.com>
Discussion: http://patchwork.ozlabs.org/patch/123548/


Signed-off-by: David Decotigny <david.decotigny@google.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 1e37eb9..344cb5f 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4566,7 +4566,7 @@ static void nv_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *e
 	struct fe_priv *np = netdev_priv(dev);
 
 	/* update stats */
-	nv_do_stats_poll((unsigned long)dev);
+	nv_get_hw_stats(dev);
 
 	memcpy(buffer, &np->estats, nv_get_sset_count(dev, ETH_SS_STATS)*sizeof(u64));
 }
-- 
1.7.3.1

^ permalink raw reply related

* [PATCH net v5 0/5] forcedeth: minor fixes for stats, rmmod, sparse
From: David Decotigny @ 2011-11-06  0:38 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc, David Decotigny

This is a minor update over v4, re-adding a patch I left aside to
study it.

Changes since v4:
 - added patch 1/5 fixing a rmmod race (see description for test
   details)
 - all other patches identical to v4

Changes since v3:
 - removed feature additions, this leaves minor sparse and stats
   fixes. Feature additions shipped previously will go to net-next

Changes since v2:
 - removed "Fix a race during rmmod of forcedeth" from the series
   (will look at it separately with original author)
 - added "remove unneeded stats updates" and "64-bit stats"
 - reordered patches

Changes since v1:
 - rebased on top of netdev tip
 - do not repeat name of device in netdev_dbg
 - do not completely mute TX timeout messages when debug_tx_timeout is
   not set
 - make debug_tx_timeout writable in /sys/module
 Note: I am re-submitting "expose module parameters in /sys/module" as
       it can be useful in production and I was assured it doesn't add
       much memory overhead by the sysfs maintainers.

Tested:
  16-way x86_64 SMP, dual forcedeth ->
  RX bytes:7244556582 (7.2 GB)  TX bytes:181904254 (181.9 MB)


############################################
# Patch Set Summary:

David Decotigny (3):
  forcedeth: fix race when unloading module
  forcedeth: remove unneeded stats updates
  forcedeth: fix a few sparse warnings (variable shadowing)

Mandeep Baines (1):
  forcedeth: Improve stats counters

Mike Ditto (1):
  forcedeth: Acknowledge only interrupts that are being processed

 drivers/net/ethernet/nvidia/forcedeth.c |   88 +++++++++++--------------------
 1 files changed, 31 insertions(+), 57 deletions(-)

-- 
1.7.3.1

^ permalink raw reply

* ixgbe: compilation failed if CONFIG_PCI_IOV isn't set
From: Alexander Kolesen @ 2011-11-06  0:08 UTC (permalink / raw)
  To: netdev; +Cc: Greg Rose, Sibai Li, Jeff Kirsher

Hello.
I've tried to build vanilla kernel (HEAD: 5c8a0fbb) and got the error:

  CC [M]  drivers/net/ethernet/intel/ixgbe/ixgbe_main.o
  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c: In function 'ixgbe_set_interrupt_capability':
  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:4724:3: error: implicit declaration of function 'ixgbe_disable_sriov'
  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c: In function 'ixgbe_remove':
  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:7773:3: error: implicit declaration of function 'ixgbe_check_vf_assignment'
  make[5]: *** [drivers/net/ethernet/intel/ixgbe/ixgbe_main.o] Error 1
  make[4]: *** [drivers/net/ethernet/intel/ixgbe] Error 2
  make[3]: *** [drivers/net/ethernet/intel] Error 2
  make[2]: *** [drivers/net/ethernet] Error 2
  make[1]: *** [drivers/net] Error 2
  make: *** [drivers] Error 2

Looks like the commit 9487dc84 breaks it when the CONFIG_PCI_IOV switched off.

^ permalink raw reply

* Re: [PATCH net v2 6/8] forcedeth: Fix a race during rmmod of forcedeth
From: David Decotigny @ 2011-11-05 22:16 UTC (permalink / raw)
  To: Ben Hutchings, Salman Qazi
  Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
	Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc
In-Reply-To: <1320378386.3079.56.camel@deadeye>

Hello,

Thank you for your feedback, Ben. I looked at this patch more carefully:

On Thu, Nov 3, 2011 at 8:46 PM, Ben Hutchings <bhutchings@solarflare.com> wrote:
> On Thu, 2011-11-03 at 18:41 -0700, David Decotigny wrote:
>> From: Salman Qazi <sqazi@google.com>
>>
>> The race was between del_timer_sync and nv_do_stats_poll called through
>> nv_get_ethtool_stats.
>
> I don't think so.  nv_close() and nv_get_ethtool_stats() are both called
> with RTNL held.
>
> Calling the timer function from nv_get_ethtool_stats is very likely part
> of the problem though, so why don't you stop doing that?

Right. As the initial author noted, the problem is presumably that
mod_timer was called after del_timer_sync, from a non-timer path
(which can only be via nv_get_ethtool_stats in our case). As you
noted, it's enough to ensure this path doesn't exist, which is easy to
do here and doesn't require synchro. I'll send an interim patch for
that to netdev (it should fix the race but will have the same
shortcomings as current code wrt 64b-correctness on 32b hosts).

When switching to the ndo_get_stats64 api, I will make sure
u64_stats_sync.h is used. This is for another patch series scheduled
later for net-next.

>> @@ -5189,13 +5198,13 @@ static int nv_close(struct net_device *dev)
>>
>>         spin_lock_irq(&np->lock);
>>         np->in_shutdown = 1;
>> +       del_timer_sync(&np->stats_poll);
>>         spin_unlock_irq(&np->lock);
>>         nv_napi_disable(dev);
>>         synchronize_irq(np->pci_dev->irq);
>>
>>         del_timer_sync(&np->oom_kick);
>>         del_timer_sync(&np->nic_poll);
>> -       del_timer_sync(&np->stats_poll);
>>
>>         netif_stop_queue(dev);
>>         spin_lock_irq(&np->lock);
>
> I don't believe this code movement is helpful.

I agree.

Regards,

^ permalink raw reply

* Re: [PATCH/RFC 00/11] HFSC patches
From: Michal Soltys @ 2011-11-05 17:43 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: davem, netdev
In-Reply-To: <4EB4B1F0.20404@trash.net>

On 11-11-05 04:48, Patrick McHardy wrote:
>
> Thanks Michal. It has been quite a while since I've last looked
> at this and this is complicated stuff, please give me a few days
> to review your patches.

Of course, and thanks for doing it. If I have any corrections, I'll add 
them as v2 versions in this thread (under respective patches).

>
>>  Apart from these, there's still one subtle thing to do w.r.t. cl_cvtmin (during
>>  init_vf(), as this value is lagged relatively to the situation at the time of
>>  enqueue).
>>
>>  On a side note, I was thinking about something like hfsc-strict or so - where
>>  [uplink] interface could be upperlimited on hfsc qdisc level, but all the class
>>  upperlimit would be otherwise gone. Not sure if anyone would be even interested
>>  in something like that at all.
>
> So classes would just use link-sharing curves? That's
> already possible, so I probably don't get your idea.
>

I mean, that upperlimit's main use is for matching [upstream] router's 
capability (as far as I've always seen this). Other scenarios where 
upperlimit is used somewhere lower, can be transformed to just proper 
linksharing ratios and realtime leaves w/o linksharking part (if 
applicable) - so thus the idea of no upperlimit at class level at all 
(and related code), but ability to define one at qdisc level (added 
during tc qdisc add hfsc ...) and executed during hfsc_dequeue().

Note - this is just a purist's idea, and I realize unacceptable in 
context of existing hfsc scheduler for many reasons (compatibility with 
exisiting configurations for once). But the idea about 
hfsc-{light,strict,pure,etc.} has been crawling in my head for a while.

Apart from that - in the sch_hfsc.c code there're few things you once 
commented out - related to myf adjustments that "overshoot" and made 
classes stay way too much under their respective linksharing curves. Do 
you have the configuration examples saved somewhere, under which it 
happened ?

^ permalink raw reply

* [PATCH iproute2] link/vlan: Add 802.1ad / QinQ support
From: David Lamparter @ 2011-11-05 17:05 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, David Lamparter
In-Reply-To: <1320512055-1231037-2-git-send-email-equinox@diac24.net>

this adds the IFLA_VLAN_PROTOCOL attribute to the link family of
commands. The attribute is only added when a protocol is given on the
command line and only displayed if it has a value other than 0x8100.

Signed-off-by: David Lamparter <equinox@diac24.net>
---
 include/linux/if_link.h |    1 +
 ip/iplink_vlan.c        |   34 ++++++++++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 304c44f..0e6eeec 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -223,6 +223,7 @@ enum {
 	IFLA_VLAN_FLAGS,
 	IFLA_VLAN_EGRESS_QOS,
 	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
 	__IFLA_VLAN_MAX,
 };
 
diff --git a/ip/iplink_vlan.c b/ip/iplink_vlan.c
index 223feb3..95a5dae 100644
--- a/ip/iplink_vlan.c
+++ b/ip/iplink_vlan.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <linux/if_vlan.h>
+#include <linux/if_ether.h>
 
 #include "rt_names.h"
 #include "utils.h"
@@ -21,7 +22,7 @@
 static void explain(void)
 {
 	fprintf(stderr,
-		"Usage: ... vlan id VLANID [ FLAG-LIST ]\n"
+		"Usage: ... vlan id VLANID [ protocol ENCAPSULATION ] [ FLAG-LIST ]\n"
 		"                          [ ingress-qos-map QOS-MAP ] [ egress-qos-map QOS-MAP ]\n"
 		"\n"
 		"VLANID := 0-4095\n"
@@ -30,6 +31,7 @@ static void explain(void)
 		"        [ loose_binding { on | off } ]\n"
 		"QOS-MAP := [ QOS-MAP ] QOS-MAPPING\n"
 		"QOS-MAPPING := FROM:TO\n"
+		"ENCAPSULATION := 802.1Q | 802.1ad | 9100 | 9200 | 9300\n"
 	);
 }
 
@@ -77,7 +79,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv,
 			  struct nlmsghdr *n)
 {
 	struct ifla_vlan_flags flags = { 0 };
-	__u16 id;
+	__u16 id, proto;
 
 	while (argc > 0) {
 		if (matches(*argv, "id") == 0) {
@@ -85,6 +87,21 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv,
 			if (get_u16(&id, *argv, 0))
 				invarg("id is invalid", *argv);
 			addattr_l(n, 1024, IFLA_VLAN_ID, &id, 2);
+		} else if (matches(*argv, "protocol") == 0) {
+			NEXT_ARG();
+			if (strcmp(*argv, "802.1Q") == 0)
+				proto = ETH_P_8021Q;
+			else if (strcmp(*argv, "802.1ad") == 0)
+				proto = ETH_P_8021AD;
+			else if (strcmp(*argv, "9100") == 0)
+				proto = 0x9100;
+			else if (strcmp(*argv, "9200") == 0)
+				proto = 0x9200;
+			else if (strcmp(*argv, "9300") == 0)
+				proto = 0x9300;
+			else
+				invarg("protocol is invalid", *argv);
+			addattr_l(n, 1024, IFLA_VLAN_PROTOCOL, &proto, 2);
 		} else if (matches(*argv, "reorder_hdr") == 0) {
 			NEXT_ARG();
 			flags.mask |= VLAN_FLAG_REORDER_HDR;
@@ -183,6 +200,19 @@ static void vlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 	    RTA_PAYLOAD(tb[IFLA_VLAN_ID]) < sizeof(__u16))
 		return;
 
+	if (tb[IFLA_VLAN_PROTOCOL]) {
+		unsigned protocol = *(__u16 *)RTA_DATA(tb[IFLA_VLAN_PROTOCOL]);
+		switch (protocol) {
+		case ETH_P_8021Q:
+			break;
+		case ETH_P_8021AD:
+			fprintf(f, "protocol 802.1ad ");
+			break;
+		default:
+			fprintf(f, "protocol %04x ", protocol);
+			break;
+		}
+	}
 	fprintf(f, "id %u ", *(__u16 *)RTA_DATA(tb[IFLA_VLAN_ID]));
 
 	if (tb[IFLA_VLAN_FLAGS]) {
-- 
1.7.7

^ permalink raw reply related

* [PATCH 2/2] net: vlan: remove unused struct vlan_group->hlist
From: David Lamparter @ 2011-11-05 16:54 UTC (permalink / raw)
  To: netdev; +Cc: David Lamparter, Patrick McHardy
In-Reply-To: <1320512055-1231037-1-git-send-email-equinox@diac24.net>

"hlist" in struct vlan_group has no reference in the entire kernel code.
just remove it.

Signed-off-by: David Lamparter <equinox@diac24.net>
Cc: Patrick McHardy <kaber@trash.net>
---
 include/linux/if_vlan.h |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 522b464..bb604bf 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -96,7 +96,6 @@ struct vlan_group {
 					    * the vlan is attached to.
 					    */
 	unsigned int		nr_vlans;
-	struct hlist_node	hlist;	/* linked list */
 	struct net_device **vlan_devices_arrays[VLAN_N_PROTOCOL]
 						[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
-- 
1.7.7

^ permalink raw reply related

* [PATCH 1/2] net: vlan: 802.1ad S-VLAN support
From: David Lamparter @ 2011-11-05 16:54 UTC (permalink / raw)
  To: netdev; +Cc: David Lamparter, Patrick McHardy
In-Reply-To: <1320512055-1231037-1-git-send-email-equinox@diac24.net>

this adds support for 802.1ad S-VLANs, which basically are regular VLANs
with a different protocol field. also supported are the legacy QinQ
9100/9200/9300 ethertypes. as with the CFI bit for 802.1Q, the DEI bit
is blissfully ignored.

this patch modifies the 802.1Q code, but keeps the regular VLAN
acceleration architecture unchanged. the S-VLAN code does not use that;
I am not aware of any NIC implementing it for ethertypes other than
8100.

all in-kernel interfaces and definitions are kept compatible; 802.1Q
performance should not experience significant changes.

Signed-off-by: David Lamparter <equinox@diac24.net>
Cc: Patrick McHardy <kaber@trash.net>
---
[v1: rebased onto current net-next with vlan restructuring; cleaned up
     module symbols; removed incomplete/unused ioctl .1ad support;
     fixed nla_total_size mess-up; added some documentation]
---
 Documentation/networking/ieee802_1ad.txt |   56 +++++++++++++
 include/linux/if_link.h                  |    1 +
 include/linux/if_vlan.h                  |   24 ++++--
 net/8021q/Kconfig                        |    7 ++-
 net/8021q/vlan.c                         |  131 ++++++++++++++++-------------
 net/8021q/vlan.h                         |   34 ++++++---
 net/8021q/vlan_core.c                    |   26 +++++-
 net/8021q/vlan_dev.c                     |   15 +++-
 net/8021q/vlan_gvrp.c                    |    6 ++
 net/8021q/vlan_netlink.c                 |   21 +++++-
 net/8021q/vlanproc.c                     |    9 ++-
 net/core/dev.c                           |    2 +-
 12 files changed, 242 insertions(+), 90 deletions(-)
 create mode 100644 Documentation/networking/ieee802_1ad.txt

diff --git a/Documentation/networking/ieee802_1ad.txt b/Documentation/networking/ieee802_1ad.txt
new file mode 100644
index 0000000..b0945e3
--- /dev/null
+++ b/Documentation/networking/ieee802_1ad.txt
@@ -0,0 +1,56 @@
+
+		(Linux) IEEE 802.1ad caveats
+
+
+What is 802.1ad?
+================
+
+802.1ad, "S-VLAN" or "Carrier" VLANs, is basically just your old 802.1Q VLAN,
+but with a new protocol value. Due to the history of S-VLANs, there are
+several protocol values in use. The officially allocated value is 0x88a8;
+Nortel originally used 0x9100, 0x9200 and 0x9300. Linux currently supports
+those 4 values and can be extended if need arises.
+
+802.1ad S-VLANs usually carry an inner layer of 802.1Q VLANs. To do so with
+Linux, just create an 802.1Q device on top of the 802.1ad device.
+
+
+How to use
+==========
+
+vconfig/ioctl is not supported with 802.1ad.
+
+Acquire a recent version of iproute2 and use
+"ip link add link ... type vlan ... protocol ..."
+
+
+Frame size caveats
+==================
+
+802.1ad increases ethernet frame size by 4 bytes, just like 802.1Q VLANs do.
+
+To keep the upper-layer MTU at the 1500 bytes it is supposed to be, you will
+need to make sure that both your NICs and your ethernet switches support the
+resulting frames.
+
+The Linux kernel does currently _not_ track individual NIC's hardware
+capabilities, however since it can't test your switch capabilities you will
+need to verify by testing either way. Make sure to use your desired production
+VLAN stack-up and try "ping -s 1472". If it doesn't work, you will need to
+reduce the MTU on _all_ nodes on that particular broadcast domain.
+
+General expectations on support are:
+
+ - expect all 10/100 ethernet switches to cause problems. These usually
+   support either 1500 bytes or 1500 bytes plus 802.1Q tags and nothing else.
+   Even if 802.1Q is supported, they won't recognise 802.1ad tags as VLAN tags
+   and subject packets to normal length checks (which will fail).
+
+ - Jumbo Frame capable equipment (Gigabit Ethernet) should be able to handle
+   802.1ad frames, but you might need to shrink your (jumbo) MTU by 4 bytes.
+
+ - a good part of 10/100 NICs (and non-jumbo 1GE NICs) don't have the size
+   limit at 1514/1518 bytes but at 1536 or 2048 bytes. These will work fine.
+
+ - if your NIC already has problems with 802.1Q, don't expect it to work with
+   802.1ad.
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c52d4b5..b45d2d9 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -225,6 +225,7 @@ enum {
 	IFLA_VLAN_FLAGS,
 	IFLA_VLAN_EGRESS_QOS,
 	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
 	__IFLA_VLAN_MAX,
 };
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 44da482..522b464 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -45,7 +45,7 @@ struct vlan_hdr {
  *	struct vlan_ethhdr - vlan ethernet header (ethhdr + vlan_hdr)
  *	@h_dest: destination ethernet address
  *	@h_source: source ethernet address
- *	@h_vlan_proto: ethernet protocol (always 0x8100)
+ *	@h_vlan_proto: ethernet protocol (0x8100, 0x88a8, 0x9x00)
  *	@h_vlan_TCI: priority and VLAN ID
  *	@h_vlan_encapsulated_proto: packet type ID or len
  */
@@ -71,6 +71,16 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
 #define VLAN_N_VID		4096
 
+enum {
+	VLAN_PROTOIDX_8021Q = 0,
+	VLAN_PROTOIDX_8021AD,
+	VLAN_PROTOIDX_QINQ1,
+	VLAN_PROTOIDX_QINQ2,
+	VLAN_PROTOIDX_QINQ3,
+
+	VLAN_N_PROTOCOL
+};
+
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
@@ -87,7 +97,8 @@ struct vlan_group {
 					    */
 	unsigned int		nr_vlans;
 	struct hlist_node	hlist;	/* linked list */
-	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+	struct net_device **vlan_devices_arrays[VLAN_N_PROTOCOL]
+						[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
 };
 
@@ -106,7 +117,7 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
-extern bool vlan_do_receive(struct sk_buff **skb);
+extern bool vlan_do_receive(struct sk_buff **skb, int pidx, u16 protocol);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
 #else
@@ -154,7 +165,8 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+					      u16 protocol, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -169,7 +181,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 	skb->mac_header -= VLAN_HLEN;
 
 	/* first, the ethernet type */
-	veth->h_vlan_proto = htons(ETH_P_8021Q);
+	veth->h_vlan_proto = htons(protocol);
 
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
@@ -190,7 +202,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
  */
 static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 {
-	skb = vlan_insert_tag(skb, vlan_tci);
+	skb = vlan_insert_tag(skb, ETH_P_8021Q, vlan_tci);
 	if (skb)
 		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index fa073a5..fcfa1c3 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config VLAN_8021Q
-	tristate "802.1Q VLAN Support"
+	tristate "802.1Q/.1ad VLAN Support"
 	---help---
 	  Select this and you will be able to create 802.1Q VLAN interfaces
 	  on your ethernet interfaces.  802.1Q VLAN supports almost
@@ -13,6 +13,11 @@ config VLAN_8021Q
 	  use VLANs.  See the VLAN web page for more information:
 	  <http://www.candelatech.com/~greear/vlan.html>
 
+	  This code also supports 802.1ad S-VLANs if used in conjunction
+	  with a recent version of iproute2. Make sure to read
+	  Documentation/networking/ieee802_1ad.txt and understand the
+	  caveats associated with frame size restrictions.
+
 	  To compile this code as a module, choose M here: the module
 	  will be called 8021q.
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5471628..23a250e 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -46,17 +46,18 @@
 
 int vlan_net_id __read_mostly;
 
-const char vlan_fullname[] = "802.1Q VLAN Support";
+const char vlan_fullname[] = "802.1Q/.1ad VLAN Support";
 const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
 static void vlan_group_free(struct vlan_group *grp)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
+	for (j = 0; j < VLAN_N_PROTOCOL; j++)
+		for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+			kfree(grp->vlan_devices_arrays[j][i]);
 	kfree(grp);
 }
 
@@ -72,14 +73,16 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
 	return grp;
 }
 
-static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
+static int vlan_group_prealloc_vid(struct vlan_group *vg,
+				   u16 protocol, u16 vlan_id)
 {
 	struct net_device **array;
 	unsigned int size;
 
 	ASSERT_RTNL();
 
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[vlan_pidx(protocol)]
+					[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	if (array != NULL)
 		return 0;
 
@@ -88,7 +91,8 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	if (array == NULL)
 		return -ENOBUFS;
 
-	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
+	vg->vlan_devices_arrays[vlan_pidx(protocol)]
+				[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
 	return 0;
 }
 
@@ -103,6 +107,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
+	u16 protocol = vlan->protocol;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
@@ -114,7 +119,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 * HW accelerating devices or SW vlan input packet processing if
 	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
+	if (vlan_id && protocol == ETH_P_8021Q &&
+			(real_dev->features & NETIF_F_HW_VLAN_FILTER))
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
 	grp->nr_vlans--;
@@ -122,7 +128,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
 
-	vlan_group_set_device(grp, vlan_id, NULL);
+	vlan_group_set_device_pidx(grp, vlan_pidx(protocol), vlan_id, NULL);
 	/* Because unregister_netdevice_queue() makes sure at least one rcu
 	 * grace period is respected before device freeing,
 	 * we dont need to call synchronize_net() here.
@@ -143,7 +149,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	dev_put(real_dev);
 }
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev,
+			u16 protocol, u16 vlan_id)
 {
 	const char *name = real_dev->name;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
@@ -159,7 +166,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 		return -EOPNOTSUPP;
 	}
 
-	if (vlan_find_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, vlan_pidx(protocol), vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -171,6 +178,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
+	u16 protocol = vlan->protocol;
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
@@ -184,7 +192,7 @@ int register_vlan_dev(struct net_device *dev)
 			goto out_free_group;
 	}
 
-	err = vlan_group_prealloc_vid(grp, vlan_id);
+	err = vlan_group_prealloc_vid(grp, protocol, vlan_id);
 	if (err < 0)
 		goto out_uninit_applicant;
 
@@ -201,13 +209,13 @@ int register_vlan_dev(struct net_device *dev)
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
-	vlan_group_set_device(grp, vlan_id, dev);
+	vlan_group_set_device_pidx(grp, vlan_pidx(protocol), vlan_id, dev);
 	grp->nr_vlans++;
 
 	if (ngrp) {
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
 	}
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+	if (protocol == ETH_P_8021Q && real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
 
 	return 0;
@@ -225,6 +233,8 @@ out_free_group:
 
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns 0 if the device was created or a negative error code otherwise.
+ *  Only used for ioctl; netlink gets the name from userspace and saves
+ *  some complexity.
  */
 static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 {
@@ -237,7 +247,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
-	err = vlan_check_real_dev(real_dev, vlan_id);
+	err = vlan_check_real_dev(real_dev, ETH_P_8021Q, vlan_id);
 	if (err < 0)
 		return err;
 
@@ -278,6 +288,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 */
 	new_dev->mtu = real_dev->mtu;
 
+	vlan_dev_info(new_dev)->protocol = ETH_P_8021Q;
 	vlan_dev_info(new_dev)->vlan_id = vlan_id;
 	vlan_dev_info(new_dev)->real_dev = real_dev;
 	vlan_dev_info(new_dev)->dent = NULL;
@@ -355,6 +366,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
 	}
 }
 
+#define vlangrp_for_each_dev(i, grp, vlandev) \
+	for (i = 0; i < VLAN_N_VID * VLAN_N_PROTOCOL; i++) \
+		if ((vlandev = vlan_group_get_device_pidx(grp, \
+					i / VLAN_N_VID, i % VLAN_N_VID)))
+			/* { code here } */
+
 static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			     void *ptr)
 {
@@ -387,22 +404,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			netif_stacked_transfer_operstate(dev, vlandev);
 		}
 		break;
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -412,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			if (vlandev->mtu <= dev->mtu)
 				continue;
 
@@ -426,11 +431,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			vlan_transfer_features(dev, vlandev);
 		}
 
@@ -438,11 +439,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (!(flgs & IFF_UP))
 				continue;
@@ -456,11 +453,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			flgs = vlandev->flags;
 			if (flgs & IFF_UP)
 				continue;
@@ -477,17 +470,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
-			/* unregistration of last vlan destroys group, abort
-			 * afterwards */
-			if (grp->nr_vlans == 1)
-				i = VLAN_N_VID;
+		vlangrp_for_each_dev(i, grp, vlandev) {
+			unsigned int nr = grp->nr_vlans;
 
 			unregister_vlan_dev(vlandev, &list);
+
+			/* if it was the last VLAN, grp is now gone */
+			if (nr == 1)
+				break;
 		}
 		unregister_netdevice_many(&list);
 		break;
@@ -499,11 +489,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_BONDING_FAILOVER:
 		/* Propagate to vlan devices */
-		for (i = 0; i < VLAN_N_VID; i++) {
-			vlandev = vlan_group_get_device(grp, i);
-			if (!vlandev)
-				continue;
-
+		vlangrp_for_each_dev(i, grp, vlandev) {
 			call_netdevice_notifiers(event, vlandev);
 		}
 		break;
@@ -664,6 +650,23 @@ static struct pernet_operations vlan_net_ops = {
 	.size = sizeof(struct vlan_net),
 };
 
+static struct packet_type vlan_1ad_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_8021AD),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq1_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ1),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq2_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ2),
+	.func = vlan_rcv,
+};
+static struct packet_type vlan_qq3_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_QINQ3),
+	.func = vlan_rcv,
+};
+
 static int __init vlan_proto_init(void)
 {
 	int err;
@@ -687,6 +690,11 @@ static int __init vlan_proto_init(void)
 		goto err4;
 
 	vlan_ioctl_set(vlan_ioctl_handler);
+
+	dev_add_pack(&vlan_1ad_type);
+	dev_add_pack(&vlan_qq1_type);
+	dev_add_pack(&vlan_qq2_type);
+	dev_add_pack(&vlan_qq3_type);
 	return 0;
 
 err4:
@@ -701,6 +709,11 @@ err0:
 
 static void __exit vlan_cleanup_module(void)
 {
+	dev_remove_pack(&vlan_qq3_type);
+	dev_remove_pack(&vlan_qq2_type);
+	dev_remove_pack(&vlan_qq1_type);
+	dev_remove_pack(&vlan_1ad_type);
+
 	vlan_ioctl_set(NULL);
 	vlan_netlink_fini();
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 9fd45f3..13b46f3 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -46,6 +46,7 @@ struct vlan_pcpu_stats {
  *	@ingress_priority_map: ingress priority mappings
  *	@nr_egress_mappings: number of egress priority mappings
  *	@egress_priority_map: hash of egress priority mappings
+ *	@protocol: encapsulation protocol value (8100, 88a8, 9x00)
  *	@vlan_id: VLAN identifier
  *	@flags: device flags
  *	@real_dev: underlying netdevice
@@ -59,6 +60,7 @@ struct vlan_dev_info {
 	unsigned int				nr_egress_mappings;
 	struct vlan_priority_tci_mapping	*egress_priority_map[16];
 
+	u16					protocol;
 	u16					vlan_id;
 	u16					flags;
 
@@ -74,33 +76,42 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
-static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
-						       u16 vlan_id)
+static inline int vlan_pidx(u16 protocol)
+{
+	if (likely(protocol == ETH_P_8021Q))
+		return VLAN_PROTOIDX_8021Q;
+	if (protocol == ETH_P_8021AD)
+		return VLAN_PROTOIDX_8021AD;
+	return ((protocol - ETH_P_QINQ1) >> 8) + VLAN_PROTOIDX_QINQ1;
+}
+
+static inline struct net_device *vlan_group_get_device_pidx(struct vlan_group *vg,
+							    int proto_idx, u16 vlan_id)
 {
 	struct net_device **array;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[proto_idx][vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
 }
 
-static inline void vlan_group_set_device(struct vlan_group *vg,
-					 u16 vlan_id,
-					 struct net_device *dev)
+static inline void vlan_group_set_device_pidx(struct vlan_group *vg,
+					      int proto_idx, u16 vlan_id,
+					      struct net_device *dev)
 {
 	struct net_device **array;
 	if (!vg)
 		return;
-	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+	array = vg->vlan_devices_arrays[proto_idx][vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
 /* Must be invoked with rcu_read_lock or with RTNL. */
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
-					       u16 vlan_id)
+					       int pidx, u16 vlan_id)
 {
 	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
 
 	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
+		return vlan_group_get_device_pidx(grp, pidx, vlan_id);
 
 	return NULL;
 }
@@ -113,10 +124,13 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev,
+			u16 protocol, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
+int vlan_rcv(struct sk_buff *skb, struct net_device *dev,
+	     struct packet_type *pt, struct net_device *orig_dev);
 
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 					    u16 vlan_tci)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f1f2f7b..f83b9fa 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,14 +4,14 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-bool vlan_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp, int pidx, u16 protocol)
 {
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
-	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+	vlan_dev = vlan_find_dev(skb->dev, pidx, vlan_id);
 	if (!vlan_dev) {
 		if (vlan_id)
 			skb->pkt_type = PACKET_OTHERHOST;
@@ -41,7 +41,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		skb = *skbp = vlan_insert_tag(skb, protocol, skb->vlan_tci);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
@@ -70,7 +70,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
 
 	if (grp) {
-		return vlan_group_get_device(grp, vlan_id);
+		return vlan_group_get_device_pidx(grp,
+				VLAN_PROTOIDX_8021Q, vlan_id);
 	} else {
 		/*
 		 * Bonding slaves do not have grp assigned to themselves.
@@ -175,3 +176,20 @@ err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+
+int vlan_rcv(struct sk_buff *skb, struct net_device *dev,
+	     struct packet_type *pt, struct net_device *orig_dev)
+{
+	u16 protocol = be16_to_cpu(pt->type);
+
+	skb = vlan_untag(skb);
+	if (unlikely(!skb))
+		return 0;
+	if (vlan_do_receive(&skb, vlan_pidx(protocol), protocol))
+		return netif_receive_skb(skb);
+
+	if (likely(skb))
+		kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vlan_rcv);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c8cf939..a30a4a4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -119,8 +119,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 
-		skb->protocol = htons(ETH_P_8021Q);
-		type = ETH_P_8021Q;
+		type = vlan_dev_info(dev)->protocol;
+		skb->protocol = htons(type);
 		vhdrlen = VLAN_HLEN;
 	}
 
@@ -140,6 +140,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+	u16 protocol = vlan_dev_info(dev)->protocol;
 	unsigned int len;
 	int ret;
 
@@ -148,12 +149,15 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
+	if (veth->h_vlan_proto != htons(protocol) ||
 	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan_dev_info(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		if (protocol == ETH_P_8021Q)
+			skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+		else
+			skb = vlan_insert_tag(skb, protocol, vlan_tci);
 	}
 
 	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
@@ -551,7 +555,8 @@ static int vlan_dev_init(struct net_device *dev)
 #endif
 
 	dev->needed_headroom = real_dev->needed_headroom;
-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+	if (vlan_dev_info(dev)->protocol == ETH_P_8021Q
+			&& real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061cece..83c6728 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,9 @@ int vlan_gvrp_request_join(const struct net_device *dev)
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->protocol != ETH_P_8021Q)
+		return 0;
+
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
 				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
@@ -41,6 +44,9 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
+	if (vlan->protocol != ETH_P_8021Q)
+		return;
+
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
 			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
 }
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index be9a5c1..44be0f6 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -19,6 +19,7 @@
 
 static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
 	[IFLA_VLAN_ID]		= { .type = NLA_U16 },
+	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
 	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
 	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
@@ -57,6 +58,19 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		if (id >= VLAN_VID_MASK)
 			return -ERANGE;
 	}
+	if (data[IFLA_VLAN_PROTOCOL]) {
+		id = nla_get_u16(data[IFLA_VLAN_PROTOCOL]);
+		switch (id) {
+		case ETH_P_8021Q:
+		case ETH_P_8021AD:
+		case ETH_P_QINQ1:
+		case ETH_P_QINQ2:
+		case ETH_P_QINQ3:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
 		if ((flags->flags & flags->mask) &
@@ -118,10 +132,12 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 		return -ENODEV;
 
 	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
+	vlan->protocol = data[IFLA_VLAN_PROTOCOL]
+			? nla_get_u16(data[IFLA_VLAN_PROTOCOL]) : ETH_P_8021Q;
 	vlan->real_dev = real_dev;
 	vlan->flags    = VLAN_FLAG_REORDER_HDR;
 
-	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+	err = vlan_check_real_dev(real_dev, vlan->protocol, vlan->vlan_id);
 	if (err < 0)
 		return err;
 
@@ -150,7 +166,7 @@ static size_t vlan_get_size(const struct net_device *dev)
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 
-	return nla_total_size(2) +	/* IFLA_VLAN_ID */
+	return nla_total_size(2) * 2 +	/* IFLA_VLAN_ID + _PROTOCOL */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,6 +182,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	unsigned int i;
 
 	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+	NLA_PUT_U16(skb, IFLA_VLAN_PROTOCOL, vlan_dev_info(dev)->protocol);
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d34b6da..7e6464c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -270,8 +270,11 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 		const struct net_device *vlandev = v;
 		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 
-		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
-			   dev_info->vlan_id,    dev_info->real_dev->name);
+		seq_printf(seq, "%-15s| ", vlandev->name);
+		if (dev_info->protocol != ETH_P_8021Q)
+			seq_printf(seq, "%04x:", dev_info->protocol);
+		seq_printf(seq, "%d  | %s\n", dev_info->vlan_id,
+				dev_info->real_dev->name);
 	}
 	return 0;
 }
@@ -301,6 +304,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	if (dev_info->protocol != ETH_P_8021Q)
+		seq_printf(seq, ", protocol 0x%04x", dev_info->protocol);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
 			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
diff --git a/net/core/dev.c b/net/core/dev.c
index b7ba81a..10ac4f3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3288,7 +3288,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_do_receive(&skb))
+		if (vlan_do_receive(&skb, VLAN_PROTOIDX_8021Q, ETH_P_8021Q))
 			goto another_round;
 		else if (unlikely(!skb))
 			goto out;
-- 
1.7.7

^ permalink raw reply related

* [PATCH net-next 0/2] 802.1ad S-VLAN support
From: David Lamparter @ 2011-11-05 16:54 UTC (permalink / raw)
  To: netdev

Hi DaveM, hi everyone,

this kernel patch, together with the iproute2 userspace support,
allows creating 802.1ad S-VLAN devices.

This feature might have weird interactions with hardware VLAN
acceleration. I've done my best to make sure it doesn't break
802.1Q, but my access to hardware is rather limited. I did grep
& scan all drivers for maybe-affected vlan behaviour and found
nothing. I've tested on e1000, forcedeth, virtio and a Kirkwood
ARM.

It'd be nice to get this into the next merge window to get some
people with funny hardware a nice smoke trail...

Cheers,

David L.

^ permalink raw reply

* Re: [PATCH net v3 8/9] forcedeth: 64-bit stats
From: David Decotigny @ 2011-11-05 16:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc
In-Reply-To: <1320478098.16609.9.camel@edumazet-laptop>

Thanks Eric, I will update this.

Please note that I am deferring 5 of the patches from this "v3"
patch-set for net-next: this patch is one of them.
Meanwhile, the most recent version of the remaining 4 patches is the
"v4" series I sent yesterday; they are limited to minor fixes.

Regards,

--
David Decotigny



On Sat, Nov 5, 2011 at 12:28 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 04 novembre 2011 à 18:53 -0700, David Decotigny a écrit :
>> This converts forcedeth stats to be 64-bits. It also improves
>> accounting for dropped rx frames.
>>
>> Tested:
>>   16-way SMP x86_64 ->
>>   RX bytes:7244556582 (7.2 GB)  TX bytes:181904254 (181.9 MB)
>>
>>
>
> This changelog and patch title are misleading.
>
> On a 32bit x86, stats are still 32bit wide after your patch.
>
> On a 64bit x86_64, stats were already 64bit wide before your patch.
>
> So the real thing is about not using the embedded netdevice dev->stats
> structure, to reduce false sharing.
>
>>
>> Signed-off-by: David Decotigny <david.decotigny@google.com>
>> ---
>>  drivers/net/ethernet/nvidia/forcedeth.c |   69 +++++++++++++++++++-----------
>>  1 files changed, 44 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
>> index 90cdf26..08c512b 100644
>> --- a/drivers/net/ethernet/nvidia/forcedeth.c
>> +++ b/drivers/net/ethernet/nvidia/forcedeth.c
>> @@ -799,6 +799,8 @@ struct fe_priv {
>>       struct timer_list stats_poll;
>>       u32 nic_poll_irq;
>>       int rx_ring_size;
>> +     unsigned long stats_rx_dropped;
>> +     unsigned long stats_rx_missed_errors;
>>
>>       /* media detection workaround.
>>        * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
>> @@ -821,6 +823,7 @@ struct fe_priv {
>>       struct nv_skb_map *tx_change_owner;
>>       struct nv_skb_map *tx_end_flip;
>>       int tx_stop;
>> +     unsigned long stats_tx_dropped;
>>
>>       /* msi/msi-x fields */
>>       u32 msi_flags;
>> @@ -1700,33 +1703,47 @@ static void nv_get_hw_stats(struct net_device *dev)
>>  }
>>
>>  /*
>> - * nv_get_stats: dev->get_stats function
>> + * nv_get_stats: dev->ndo_get_stats64 function
>>   * Get latest stats value from the nic.
>>   * Called with read_lock(&dev_base_lock) held for read -
>>   * only synchronized against unregister_netdevice.
>>   */
>> -static struct net_device_stats *nv_get_stats(struct net_device *dev)
>> +static struct rtnl_link_stats64*
>> +nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
>>  {
>>       struct fe_priv *np = netdev_priv(dev);
>>
>>       /* If the nic supports hw counters then retrieve latest values */
>> -     if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
>> +     if (np->driver_data & (DEV_HAS_STATISTICS_V1
>> +                            | DEV_HAS_STATISTICS_V2
>> +                            | DEV_HAS_STATISTICS_V3)) {
>>               nv_get_hw_stats(dev);
>>
>> -             /* copy to net_device stats */
>> -             dev->stats.tx_packets = np->estats.tx_packets;
>> -             dev->stats.rx_bytes = np->estats.rx_bytes;
>> -             dev->stats.tx_bytes = np->estats.tx_bytes;
>> -             dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
>> -             dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
>> -             dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
>> -             dev->stats.rx_over_errors = np->estats.rx_over_errors;
>> -             dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
>> -             dev->stats.rx_errors = np->estats.rx_errors_total;
>> -             dev->stats.tx_errors = np->estats.tx_errors_total;
>> -     }
>> -
>> -     return &dev->stats;
>> +             /* generic stats */
>> +             storage->rx_packets = np->estats.rx_packets;
>> +             storage->tx_packets = np->estats.tx_packets;
>> +             storage->rx_bytes   = np->estats.rx_bytes;
>> +             storage->tx_bytes   = np->estats.tx_bytes;
>> +             storage->rx_errors  = np->estats.rx_errors_total;
>> +             storage->tx_errors  = np->estats.tx_errors_total;
>> +             storage->rx_dropped = np->stats_rx_dropped;
>> +             storage->tx_dropped = np->stats_tx_dropped;
>> +             storage->multicast  = np->estats.rx_multicast;
>> +
>> +             /* detailed rx_errors */
>> +             storage->rx_length_errors = np->estats.rx_length_error;
>> +             storage->rx_over_errors   = np->estats.rx_over_errors;
>> +             storage->rx_crc_errors    = np->estats.rx_crc_errors;
>> +             storage->rx_frame_errors  = np->estats.rx_frame_align_error;
>> +             storage->rx_fifo_errors   = np->estats.rx_drop_frame;
>> +             storage->rx_missed_errors = np->stats_rx_missed_errors;
>> +
>> +             /* detailed tx_errors */
>> +             storage->tx_carrier_errors = np->estats.tx_carrier_errors;
>> +             storage->tx_fifo_errors    = np->estats.tx_fifo_errors;
>> +     }
>> +
>> +     return storage;
>>  }
>>
>>  /*
>> @@ -1759,8 +1776,10 @@ static int nv_alloc_rx(struct net_device *dev)
>>                               np->put_rx.orig = np->first_rx.orig;
>>                       if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
>>                               np->put_rx_ctx = np->first_rx_ctx;
>> -             } else
>> +             } else {
>> +                     np->stats_rx_dropped++;
>>                       return 1;
>> +             }
>>       }
>>       return 0;
>>  }
>> @@ -1791,8 +1810,10 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
>>                               np->put_rx.ex = np->first_rx.ex;
>>                       if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
>>                               np->put_rx_ctx = np->first_rx_ctx;
>> -             } else
>> +             } else {
>> +                     np->stats_rx_dropped++;
>>                       return 1;
>> +             }
>>       }
>>       return 0;
>>  }
>> @@ -1928,7 +1949,7 @@ static void nv_drain_tx(struct net_device *dev)
>>                       np->tx_ring.ex[i].buflow = 0;
>>               }
>>               if (nv_release_txskb(np, &np->tx_skb[i]))
>> -                     dev->stats.tx_dropped++;
>> +                     np->stats_tx_dropped++;
>>               np->tx_skb[i].dma = 0;
>>               np->tx_skb[i].dma_len = 0;
>>               np->tx_skb[i].dma_single = 0;
>> @@ -2651,7 +2672,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
>>                                       /* the rest are hard errors */
>>                                       else {
>>                                               if (flags & NV_RX_MISSEDFRAME)
>> -                                                     dev->stats.rx_missed_errors++;
>> +                                                     np->stats_rx_missed_errors++;
>>                                               dev_kfree_skb(skb);
>>                                               goto next_pkt;
>>                                       }
>> @@ -2694,7 +2715,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
>>               skb_put(skb, len);
>>               skb->protocol = eth_type_trans(skb, dev);
>>               napi_gro_receive(&np->napi, skb);
>> -             dev->stats.rx_packets++;
>>  next_pkt:
>>               if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
>>                       np->get_rx.orig = np->first_rx.orig;
>> @@ -2777,7 +2797,6 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
>>                               __vlan_hwaccel_put_tag(skb, vid);
>>                       }
>>                       napi_gro_receive(&np->napi, skb);
>> -                     dev->stats.rx_packets++;
>>               } else {
>>                       dev_kfree_skb(skb);
>>               }
>> @@ -5199,7 +5218,7 @@ static int nv_close(struct net_device *dev)
>>  static const struct net_device_ops nv_netdev_ops = {
>>       .ndo_open               = nv_open,
>>       .ndo_stop               = nv_close,
>> -     .ndo_get_stats          = nv_get_stats,
>> +     .ndo_get_stats64        = nv_get_stats64,
>>       .ndo_start_xmit         = nv_start_xmit,
>>       .ndo_tx_timeout         = nv_tx_timeout,
>>       .ndo_change_mtu         = nv_change_mtu,
>> @@ -5216,7 +5235,7 @@ static const struct net_device_ops nv_netdev_ops = {
>>  static const struct net_device_ops nv_netdev_ops_optimized = {
>>       .ndo_open               = nv_open,
>>       .ndo_stop               = nv_close,
>> -     .ndo_get_stats          = nv_get_stats,
>> +     .ndo_get_stats64        = nv_get_stats64,
>>       .ndo_start_xmit         = nv_start_xmit_optimized,
>>       .ndo_tx_timeout         = nv_tx_timeout,
>>       .ndo_change_mtu         = nv_change_mtu,
>
>
>

^ permalink raw reply

* (unknown), 
From: Bootsdiy @ 2011-11-05 15:53 UTC (permalink / raw)



Dear Webmail Account Owner,

This message is from Webmail messaging center to all Webmail account owners. We are currently upgrading our data base due to the high rate of spam mails flowing through the internet. Update and by filling your account detail with below infromation:

****************************************************************************
CONFIRM YOUR EMAIL IDENTITY BELOW
Email Username/Login ID : .....
EMAIL Password : ..............
Confirm Password :.............
Date of Birth : ...............
*****************************************************************************
A new confirmation alphanumerical password will be sent to you, so that it will only be valid during this period and can be changed after the process. Failiure to send us your account detail your account will be deleted from our data base

We apologize for any inconvenience this might cause for this period, but we are here to serve you better and provide more technology which revolves around e-mail Internet networking.



Thanks
Webmail Project Team.

^ permalink raw reply

* [PATCH] tcp: Fix comments for Nagle algorithm
From: Feng King @ 2011-11-05 14:23 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-kernel, Feng King

TCP_NODELAY is weaker than TCP_CORK, when TCP_CORK was set, small
segments will always pass Nagle test regardless of TCP_NODELAY option.

Signed-off-by: Feng King <kinwin2008@gmail.com>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0..a908f95 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1377,7 +1377,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
 /* Return 0, if packet can be sent now without violation Nagle's rules:
  * 1. It is full sized.
  * 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_NODELAY was set.
+ * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
  * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
  *    With Minshall's modification: all sent small packets are ACKed.
  */
-- 
1.7.1

^ permalink raw reply related

* [PATCH] data: hello
From: Feng King @ 2011-11-05 14:21 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-kernel, Feng King

 great


Signed-off-by: Feng King <kinwin2008@gmail.com>
---
 b |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)
 create mode 100644 b

diff --git a/b b/b
new file mode 100644
index 0000000..a60a1f3
--- /dev/null
+++ b/b
@@ -0,0 +1 @@
+hell
-- 
1.7.1

^ permalink raw reply related

* Re: [net] etherh: Add MAINTAINERS entry for etherh
From: Russell King - ARM Linux @ 2011-11-05 10:33 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: davem, netdev, gospo, sassmann, Joe Perches
In-Reply-To: <1320447521-14270-1-git-send-email-jeffrey.t.kirsher@intel.com>

On Fri, Nov 04, 2011 at 03:58:41PM -0700, Jeff Kirsher wrote:
> During the re-organization of Ethernet drivers, the MAINTAINERS
> entry for etherh got dropped accidentally.
> 
> CC: Russell King <linux@arm.linux.org.uk>

Thanks Jeff.

Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>

> CC: Joe Perches <joe@perches.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
>  MAINTAINERS |    1 +
>  1 files changed, 1 insertions(+), 0 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a6afe34..ecb2299 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1032,6 +1032,7 @@ F:	arch/arm/include/asm/hardware/ioc.h
>  F:	arch/arm/include/asm/hardware/iomd.h
>  F:	arch/arm/include/asm/hardware/memc.h
>  F:	arch/arm/mach-rpc/
> +F:	drivers/net/ethernet/8390/etherh.c
>  F:	drivers/net/ethernet/i825xx/ether1*
>  F:	drivers/net/ethernet/seeq/ether3*
>  F:	drivers/scsi/arm/
> -- 
> 1.7.6.4
> 

^ permalink raw reply

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: Eric Dumazet @ 2011-11-05  9:30 UTC (permalink / raw)
  To: François-Xavier Le Bail; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320482392.98040.YahooMailNeo@web126003.mail.ne1.yahoo.com>

Le samedi 05 novembre 2011 à 01:39 -0700, François-Xavier Le Bail a
écrit :

> 
> I will study and test these options for my application server

Here is a sample of use of the IPv4 part, an udpecho service that use
IP_PKTINFO and IP_RECVTOS/IP_TOS to be able to use multihomed machine,
and reflect TOS field as well.

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/udp.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>

#define PORT 4040

int pktinfo_get(struct msghdr *my_hdr, struct in_pktinfo *pktinfo)
{
	int res = -1;

	if (my_hdr->msg_controllen > 0) {
		struct cmsghdr *get_cmsg;
		for (get_cmsg = CMSG_FIRSTHDR(my_hdr); get_cmsg;
			get_cmsg = CMSG_NXTHDR(my_hdr, get_cmsg)) {
			if (get_cmsg->cmsg_type == IP_PKTINFO) {
				struct in_pktinfo *get_pktinfo = (struct in_pktinfo *)CMSG_DATA(get_cmsg);
				memcpy(pktinfo, get_pktinfo, sizeof(*pktinfo));
				res = 0;
			}
		}
	}
	return res;
}

int tos_get(struct msghdr *my_hdr, unsigned char *tos)
{
	int res = -1;

	if (my_hdr->msg_controllen > 0) {
		struct cmsghdr *get_cmsg;
		for (get_cmsg = CMSG_FIRSTHDR(my_hdr); get_cmsg;
			get_cmsg = CMSG_NXTHDR(my_hdr, get_cmsg)) {
			if (get_cmsg->cmsg_type == IP_TOS) {
				unsigned char *pkttos = (unsigned char *)CMSG_DATA(get_cmsg);
				*tos = *pkttos;
				res = 0;
			}
		}
	}
	return res;
}

int main(int argc, char *argv[])
{
	int fd = socket(AF_INET, SOCK_DGRAM, 0);
	struct sockaddr_in addr, rem_addr;
	int res, on = 1;
	struct msghdr msghdr;
	struct iovec vec[1];
	char cbuf[512];
	char frame[4096];
	struct in_pktinfo pktinfo;
	int c, count = 1000000;
	unsigned char last_tos = 0;

	while ((c = getopt(argc, argv, "c:")) != -1) {
		if (c == 'c') count = atoi(optarg);
		}
	memset(&addr, 0, sizeof(addr));
	addr.sin_family = AF_INET;
	addr.sin_port = htons(PORT);
	if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
		perror("bind");
		return 1;
	}
	setsockopt(fd, SOL_IP, IP_PKTINFO, &on, sizeof(on));
	setsockopt(fd, SOL_IP, IP_RECVTOS, &on, sizeof(on));

	while (1) {
		unsigned char tos;

		memset(&msghdr, 0, sizeof(msghdr));
		msghdr.msg_control = cbuf;
		msghdr.msg_controllen = sizeof(cbuf);
		msghdr.msg_iov = vec;
		msghdr.msg_iovlen = 1;
		vec[0].iov_base = frame;
		vec[0].iov_len = sizeof(frame);
		msghdr.msg_name = &rem_addr;
		msghdr.msg_namelen = sizeof(rem_addr);
		res = recvmsg(fd, &msghdr, 0);
		if (res == -1)
			break;
		if (pktinfo_get(&msghdr, &pktinfo) == 0) {

//			printf("Got IP_PKTINFO dst addr=%s\n", inet_ntoa(pktinfo.ipi_spec_dst));
			}
		if (tos_get(&msghdr, &tos) == 0) {
			/* IP_TOS option wont be used in sendmsg(), we must use setsockopt() instead */
			if (tos != last_tos) {
				if (setsockopt(fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) == 0)
					last_tos = tos;
			}
		}
		/* ok, just echo reply this frame.
		 * Using sendmsg() will provide IP_PKTINFO back to kernel
		 * to let it use the 'right' source address
		 * (destination address of the incoming packet)
		 */
		vec[0].iov_len = res;
		sendmsg(fd, &msghdr, 0);
		if (--count == 0)
			break;
	}
	return 0;
}

^ permalink raw reply

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: Eric Dumazet @ 2011-11-05  9:20 UTC (permalink / raw)
  To: François-Xavier Le Bail; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320482392.98040.YahooMailNeo@web126003.mail.ne1.yahoo.com>

Le samedi 05 novembre 2011 à 01:39 -0700, François-Xavier Le Bail a
écrit :

> Agreed, but remain the case of ICMPv6 echo request/reply, which I think is in kernel.

Yes, please describe your setup and how to reproduce the problem.

^ permalink raw reply

* [PATCH] net: better pcpu data alignment
From: Eric Dumazet @ 2011-11-05  9:19 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Tunnels can force an alignment of their percpu data to reduce number of
cache lines used in fast path, or read in .ndo_get_stats()

percpu_alloc() is a very fine grained allocator, so any small hole will
be used anyway.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/veth.c    |    2 +-
 net/ipv4/ip_gre.c     |    2 +-
 net/ipv4/ipip.c       |    2 +-
 net/ipv6/ip6_tunnel.c |    2 +-
 net/ipv6/sit.c        |    2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 5b23767..dd303a9 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,8 +26,8 @@
 
 struct veth_net_stats {
 	u64			rx_packets;
-	u64			tx_packets;
 	u64			rx_bytes;
+	u64			tx_packets;
 	u64			tx_bytes;
 	u64			rx_dropped;
 	struct u64_stats_sync	syncp;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d55110e..38f7c07 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -171,7 +171,7 @@ struct pcpu_tstats {
 	unsigned long	rx_bytes;
 	unsigned long	tx_packets;
 	unsigned long	tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
 
 static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
 {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 065effd..9490690 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,7 +148,7 @@ struct pcpu_tstats {
 	unsigned long	rx_bytes;
 	unsigned long	tx_packets;
 	unsigned long	tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
 
 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
 {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index bdc15c9..f36ca13 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -93,7 +93,7 @@ struct pcpu_tstats {
 	unsigned long	rx_bytes;
 	unsigned long	tx_packets;
 	unsigned long	tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
 
 static struct net_device_stats *ip6_get_stats(struct net_device *dev)
 {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a7a1860..cec0938 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,7 +91,7 @@ struct pcpu_tstats {
 	unsigned long	rx_bytes;
 	unsigned long	tx_packets;
 	unsigned long	tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
 
 static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
 {

^ permalink raw reply related

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: François-Xavier Le Bail @ 2011-11-05  8:39 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320423860.16609.4.camel@edumazet-laptop>

>From: Eric Dumazet <eric.dumazet@gmail.com>
>To: François-Xavier Le Bail <fx.lebail@yahoo.com>
>Sent: Friday, November 4, 2011 5:24 PM
>Subject: Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
>
>Le vendredi 04 novembre 2011 à 07:46 -0700, François-Xavier Le Bail a
>écrit :
>> I do some tests on a Linux 3.0 kernel with IPv6 forwarding mode enabled.
>> 
>> When I ping (ICMPv6 echo request) on one of its Subnet-Router anycast addresses
>> (SRAA, http://tools.ietf.org/html/rfc4291#section-2.6.1),
>> the Linux kernel reply with an unicast source address, not the anycast one.
>> 
>> When I send an IPv6 UDP packet to a server on Linux on one of its SRAA,
>> the Linux kernel build a reply with an unicast source address, not the anycast one.


Thanks for your answer.

>Nothing in the kernel builds a reply to an UDP packet.

You are right. I meant that in some cases the kernel selects the source address
of the reply.

>I would say the user application is responsible to build an answer, and
>chose appropriate source address.


OK.


>If your application uses a ANY_ADDR bind, then it must appropriate
>action so that a good source address is used in answers.
>
>In case of IPv6 socket, I advise you take a look at IPV6_PKTINFO /
>IPV6_RECVPKTINFO options.


I will study and test these options for my application server.

>> The RFC 4942 states (http://tools.ietf.org/html/rfc4942#section-2.1.6) :
>> 2.1.6. Anycast Traffic Identification and Security
>> [. . .]
>>    To avoid exposing knowledge about the internal structure of the
>>    network, it is recommended that anycast servers now take advantage of
>>    the ability to return responses with the anycast address as the
>>    source address if possible.
>> 
>> Also, If the source address of reply differs from destination address of the request, many applications are broken.
>> Please let me know your feedback.
>> 
>
>'anycast servers' are a combination of kernel and userland parts.


Agreed, but remain the case of ICMPv6 echo request/reply, which I think is in kernel.

Francois-Xavier

^ permalink raw reply

* You Have Exceeded Your Storage Limit
From: Meigs_Debra @ 2011-11-05  7:55 UTC (permalink / raw)


To ensure quick, responsive e-mail services, it is necessary to establish limits on the amount of e-mail each user may store on the system.
Our records show that you have almost exhausted your usage allowance provided with your webmail service.
Depending on your current storage space you may request for additional storage.

Please click here to request for additional storage.
https://docs.google.com/spreadsheet/viewform?formkey=dEtqeVNOcF9kZzRzRlQ4TkY4NnZqaVE6MQ

Thanks
Meigs Debra
For IT Support Center

^ permalink raw reply

* Re: PROBLEM: pppol2tp over pppoe NULL pointer dereference
From: Eric Dumazet @ 2011-11-05  7:40 UTC (permalink / raw)
  To: David Miller; +Cc: spiked.yar, netdev
In-Reply-To: <20111104.222851.1376278499619626232.davem@davemloft.net>

Le vendredi 04 novembre 2011 à 22:28 -0400, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Wed, 02 Nov 2011 00:58:13 +0100
> 
> > Please try following patch, thanks !
> > 
> > [PATCH] l2tp: handle fragmented skbs in receive path
> > 
> > Modern drivers provide skb with fragments, and L2TP doesnt properly
> > handles them.
> > 
> > Some bad frames can also trigger panics because of insufficent checks.
> > 
> > Reported-by: Misha Labjuk <spiked.yar@gmail.com>
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> I'm still waiting for testing results of this patch.

Of course.

If you prefer, I can submit a smaller patch for the obvious bug first,
and I can respin the thing when net-next reopens.

[PATCH] l2tp: fix l2tp_udp_recv_core()

pskb_may_pull() can change skb->data, so we have to load ptr/optr at the
right place.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/l2tp/l2tp_core.c |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index bf8d50c..cf0f308 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -756,9 +756,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 		goto error;
 	}
 
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
 	/* Trace packet contents, if enabled */
 	if (tunnel->debug & L2TP_MSG_DATA) {
 		length = min(32u, skb->len);
@@ -769,12 +766,15 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 		offset = 0;
 		do {
-			printk(" %02X", ptr[offset]);
+			printk(" %02X", skb->data[offset]);
 		} while (++offset < length);
 
 		printk("\n");
 	}
 
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
 	/* Get L2TP header flags */
 	hdrflags = ntohs(*(__be16 *) ptr);
 

^ permalink raw reply related

* Re: [PATCH net v3 8/9] forcedeth: 64-bit stats
From: Eric Dumazet @ 2011-11-05  7:28 UTC (permalink / raw)
  To: David Decotigny
  Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Jeff Kirsher,
	Jiri Pirko, Joe Perches, Szymon Janc
In-Reply-To: <4c77142ad7c6019e908884723d3c299163a55e2e.1320457247.git.david.decotigny@google.com>

Le vendredi 04 novembre 2011 à 18:53 -0700, David Decotigny a écrit :
> This converts forcedeth stats to be 64-bits. It also improves
> accounting for dropped rx frames.
> 
> Tested:
>   16-way SMP x86_64 ->
>   RX bytes:7244556582 (7.2 GB)  TX bytes:181904254 (181.9 MB)
> 
> 

This changelog and patch title are misleading.

On a 32bit x86, stats are still 32bit wide after your patch.

On a 64bit x86_64, stats were already 64bit wide before your patch.

So the real thing is about not using the embedded netdevice dev->stats
structure, to reduce false sharing.

> 
> Signed-off-by: David Decotigny <david.decotigny@google.com>
> ---
>  drivers/net/ethernet/nvidia/forcedeth.c |   69 +++++++++++++++++++-----------
>  1 files changed, 44 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
> index 90cdf26..08c512b 100644
> --- a/drivers/net/ethernet/nvidia/forcedeth.c
> +++ b/drivers/net/ethernet/nvidia/forcedeth.c
> @@ -799,6 +799,8 @@ struct fe_priv {
>  	struct timer_list stats_poll;
>  	u32 nic_poll_irq;
>  	int rx_ring_size;
> +	unsigned long stats_rx_dropped;
> +	unsigned long stats_rx_missed_errors;
>  
>  	/* media detection workaround.
>  	 * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
> @@ -821,6 +823,7 @@ struct fe_priv {
>  	struct nv_skb_map *tx_change_owner;
>  	struct nv_skb_map *tx_end_flip;
>  	int tx_stop;
> +	unsigned long stats_tx_dropped;
>  
>  	/* msi/msi-x fields */
>  	u32 msi_flags;
> @@ -1700,33 +1703,47 @@ static void nv_get_hw_stats(struct net_device *dev)
>  }
>  
>  /*
> - * nv_get_stats: dev->get_stats function
> + * nv_get_stats: dev->ndo_get_stats64 function
>   * Get latest stats value from the nic.
>   * Called with read_lock(&dev_base_lock) held for read -
>   * only synchronized against unregister_netdevice.
>   */
> -static struct net_device_stats *nv_get_stats(struct net_device *dev)
> +static struct rtnl_link_stats64*
> +nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
>  {
>  	struct fe_priv *np = netdev_priv(dev);
>  
>  	/* If the nic supports hw counters then retrieve latest values */
> -	if (np->driver_data & (DEV_HAS_STATISTICS_V1|DEV_HAS_STATISTICS_V2|DEV_HAS_STATISTICS_V3)) {
> +	if (np->driver_data & (DEV_HAS_STATISTICS_V1
> +			       | DEV_HAS_STATISTICS_V2
> +			       | DEV_HAS_STATISTICS_V3)) {
>  		nv_get_hw_stats(dev);
>  
> -		/* copy to net_device stats */
> -		dev->stats.tx_packets = np->estats.tx_packets;
> -		dev->stats.rx_bytes = np->estats.rx_bytes;
> -		dev->stats.tx_bytes = np->estats.tx_bytes;
> -		dev->stats.tx_fifo_errors = np->estats.tx_fifo_errors;
> -		dev->stats.tx_carrier_errors = np->estats.tx_carrier_errors;
> -		dev->stats.rx_crc_errors = np->estats.rx_crc_errors;
> -		dev->stats.rx_over_errors = np->estats.rx_over_errors;
> -		dev->stats.rx_fifo_errors = np->estats.rx_drop_frame;
> -		dev->stats.rx_errors = np->estats.rx_errors_total;
> -		dev->stats.tx_errors = np->estats.tx_errors_total;
> -	}
> -
> -	return &dev->stats;
> +		/* generic stats */
> +		storage->rx_packets = np->estats.rx_packets;
> +		storage->tx_packets = np->estats.tx_packets;
> +		storage->rx_bytes   = np->estats.rx_bytes;
> +		storage->tx_bytes   = np->estats.tx_bytes;
> +		storage->rx_errors  = np->estats.rx_errors_total;
> +		storage->tx_errors  = np->estats.tx_errors_total;
> +		storage->rx_dropped = np->stats_rx_dropped;
> +		storage->tx_dropped = np->stats_tx_dropped;
> +		storage->multicast  = np->estats.rx_multicast;
> +
> +		/* detailed rx_errors */
> +		storage->rx_length_errors = np->estats.rx_length_error;
> +		storage->rx_over_errors   = np->estats.rx_over_errors;
> +		storage->rx_crc_errors    = np->estats.rx_crc_errors;
> +		storage->rx_frame_errors  = np->estats.rx_frame_align_error;
> +		storage->rx_fifo_errors   = np->estats.rx_drop_frame;
> +		storage->rx_missed_errors = np->stats_rx_missed_errors;
> +
> +		/* detailed tx_errors */
> +		storage->tx_carrier_errors = np->estats.tx_carrier_errors;
> +		storage->tx_fifo_errors    = np->estats.tx_fifo_errors;
> +	}
> +
> +	return storage;
>  }
>  
>  /*
> @@ -1759,8 +1776,10 @@ static int nv_alloc_rx(struct net_device *dev)
>  				np->put_rx.orig = np->first_rx.orig;
>  			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
>  				np->put_rx_ctx = np->first_rx_ctx;
> -		} else
> +		} else {
> +			np->stats_rx_dropped++;
>  			return 1;
> +		}
>  	}
>  	return 0;
>  }
> @@ -1791,8 +1810,10 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
>  				np->put_rx.ex = np->first_rx.ex;
>  			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
>  				np->put_rx_ctx = np->first_rx_ctx;
> -		} else
> +		} else {
> +			np->stats_rx_dropped++;
>  			return 1;
> +		}
>  	}
>  	return 0;
>  }
> @@ -1928,7 +1949,7 @@ static void nv_drain_tx(struct net_device *dev)
>  			np->tx_ring.ex[i].buflow = 0;
>  		}
>  		if (nv_release_txskb(np, &np->tx_skb[i]))
> -			dev->stats.tx_dropped++;
> +			np->stats_tx_dropped++;
>  		np->tx_skb[i].dma = 0;
>  		np->tx_skb[i].dma_len = 0;
>  		np->tx_skb[i].dma_single = 0;
> @@ -2651,7 +2672,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
>  					/* the rest are hard errors */
>  					else {
>  						if (flags & NV_RX_MISSEDFRAME)
> -							dev->stats.rx_missed_errors++;
> +							np->stats_rx_missed_errors++;
>  						dev_kfree_skb(skb);
>  						goto next_pkt;
>  					}
> @@ -2694,7 +2715,6 @@ static int nv_rx_process(struct net_device *dev, int limit)
>  		skb_put(skb, len);
>  		skb->protocol = eth_type_trans(skb, dev);
>  		napi_gro_receive(&np->napi, skb);
> -		dev->stats.rx_packets++;
>  next_pkt:
>  		if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
>  			np->get_rx.orig = np->first_rx.orig;
> @@ -2777,7 +2797,6 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
>  				__vlan_hwaccel_put_tag(skb, vid);
>  			}
>  			napi_gro_receive(&np->napi, skb);
> -			dev->stats.rx_packets++;
>  		} else {
>  			dev_kfree_skb(skb);
>  		}
> @@ -5199,7 +5218,7 @@ static int nv_close(struct net_device *dev)
>  static const struct net_device_ops nv_netdev_ops = {
>  	.ndo_open		= nv_open,
>  	.ndo_stop		= nv_close,
> -	.ndo_get_stats		= nv_get_stats,
> +	.ndo_get_stats64	= nv_get_stats64,
>  	.ndo_start_xmit		= nv_start_xmit,
>  	.ndo_tx_timeout		= nv_tx_timeout,
>  	.ndo_change_mtu		= nv_change_mtu,
> @@ -5216,7 +5235,7 @@ static const struct net_device_ops nv_netdev_ops = {
>  static const struct net_device_ops nv_netdev_ops_optimized = {
>  	.ndo_open		= nv_open,
>  	.ndo_stop		= nv_close,
> -	.ndo_get_stats		= nv_get_stats,
> +	.ndo_get_stats64	= nv_get_stats64,
>  	.ndo_start_xmit		= nv_start_xmit_optimized,
>  	.ndo_tx_timeout		= nv_tx_timeout,
>  	.ndo_change_mtu		= nv_change_mtu,

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox