Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH 2.6.35-rc6] net-next: Add multiqueue support to vmxnet3 driver
From: Stephen Hemminger @ 2010-10-13 21:57 UTC (permalink / raw)
  To: Shreyas Bhatewara; +Cc: netdev, pv-drivers, linux-kernel
In-Reply-To: <alpine.LRH.2.00.1010131445080.25989@sbhatewara-dev1.eng.vmware.com>

On Wed, 13 Oct 2010 14:47:05 -0700 (PDT)
Shreyas Bhatewara <sbhatewara@vmware.com> wrote:

> #ifdef VMXNET3_RSS
> +static unsigned int num_rss_entries;
> +#define VMXNET3_MAX_DEVICES 10
> +
> +static int rss_ind_table[VMXNET3_MAX_DEVICES *
> +			 VMXNET3_RSS_IND_TABLE_SIZE + 1] = {
> +	[0 ... VMXNET3_MAX_DEVICES * VMXNET3_RSS_IND_TABLE_SIZE] = -1 };
> +#endif
> +static int num_tqs[VMXNET3_MAX_DEVICES + 1] = {
> +	[0 ... VMXNET3_MAX_DEVICES] = 1 };
> +static int num_rqs[VMXNET3_MAX_DEVICES + 1] = {
> +	[0 ... VMXNET3_MAX_DEVICES] = 1 };
> +static int share_tx_intr[VMXNET3_MAX_DEVICES + 1] = {
> +	[0 ... VMXNET3_MAX_DEVICES] = 0 };
> +static int buddy_intr[VMXNET3_MAX_DEVICES + 1] = {
> +	[0 ... VMXNET3_MAX_DEVICES] = 1 };
> +
> +static unsigned int num_adapters;
> +module_param_array(share_tx_intr, int, &num_adapters, 0400);
> +MODULE_PARM_DESC(share_tx_intr, "Share one IRQ among all tx queue completions. "
> +		 "Comma separated list of 1s and 0s - one for each NIC. "
> +		 "1 to share, 0 to not, default is 0");
> +module_param_array(buddy_intr, int, &num_adapters, 0400);
> +MODULE_PARM_DESC(buddy_intr, "Share one IRQ among corresponding tx and rx "
> +		 "queues. Comma separated list of 1s and 0s - one for each "
> +		 "NIC. 1 to share, 0 to not, default is 1");
> +module_param_array(num_tqs, int, &num_adapters, 0400);
> +MODULE_PARM_DESC(num_tqs, "Number of transmit queues in each adapter. Comma "
> +		 "separated list of integers. Setting this to 0 makes number"
> +		 " of queues same as number of CPUs. Default is 1.");
> +
> +#ifdef VMXNET3_RSS
> +module_param_array(rss_ind_table, int, &num_rss_entries, 0400);
> +MODULE_PARM_DESC(rss_ind_table, "RSS Indirection table. Number of entries "
> +		 "per NIC should be 32. Each integer in a comma separated list"
> +		 " is an rx queue number starting with 0. Repeat the same for"
> +		 " all NICs.");
> +module_param_array(num_rqs, int, &num_adapters, 0400);
> +MODULE_PARM_DESC(num_rqs, "Number of receive queues in each adapter. Comma "
> +		 " separated list of integers. Setting this to 0 makes number"
> +		 " of queues same as number of CPUs. Default is 1.");

Module parameters are not right for this. They lead to different API
for interacting with each driver vendor. Is there a another better API?
Does it have to be this tweakable in a production environment.

-- 

^ permalink raw reply

* [PATCH 2.6.35-rc6] net-next: Add multiqueue support to vmxnet3 driver
From: Shreyas Bhatewara @ 2010-10-13 21:47 UTC (permalink / raw)
  To: netdev, pv-drivers, linux-kernel
In-Reply-To: <alpine.LRH.2.00.1009290104130.464@sbhatewara-dev1.eng.vmware.com>


Add multiqueue support to vmxnet3 driver

This change adds Multiqueue and thus receive side scaling support
to vmxnet3 device driver. Number of rx queues is limited to 1 in cases
where
- MSI is not configured or
- One MSIx vector is not available per rx queue

By default 1 tx and 1 rx queue will be initialized. module parameters can
be used to configure tx and rx upto a maximum of 8 queues.

Signed-off-by: Shreyas Bhatewara <sbhatewara@vmware.com>

---

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index e04dc10..48058fc 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -44,6 +44,50 @@ MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
 
 static atomic_t devices_found;
 
+#ifdef VMXNET3_RSS
+static unsigned int num_rss_entries;
+#define VMXNET3_MAX_DEVICES 10
+
+static int rss_ind_table[VMXNET3_MAX_DEVICES *
+			 VMXNET3_RSS_IND_TABLE_SIZE + 1] = {
+	[0 ... VMXNET3_MAX_DEVICES * VMXNET3_RSS_IND_TABLE_SIZE] = -1 };
+#endif
+static int num_tqs[VMXNET3_MAX_DEVICES + 1] = {
+	[0 ... VMXNET3_MAX_DEVICES] = 1 };
+static int num_rqs[VMXNET3_MAX_DEVICES + 1] = {
+	[0 ... VMXNET3_MAX_DEVICES] = 1 };
+static int share_tx_intr[VMXNET3_MAX_DEVICES + 1] = {
+	[0 ... VMXNET3_MAX_DEVICES] = 0 };
+static int buddy_intr[VMXNET3_MAX_DEVICES + 1] = {
+	[0 ... VMXNET3_MAX_DEVICES] = 1 };
+
+static unsigned int num_adapters;
+module_param_array(share_tx_intr, int, &num_adapters, 0400);
+MODULE_PARM_DESC(share_tx_intr, "Share one IRQ among all tx queue completions. "
+		 "Comma separated list of 1s and 0s - one for each NIC. "
+		 "1 to share, 0 to not, default is 0");
+module_param_array(buddy_intr, int, &num_adapters, 0400);
+MODULE_PARM_DESC(buddy_intr, "Share one IRQ among corresponding tx and rx "
+		 "queues. Comma separated list of 1s and 0s - one for each "
+		 "NIC. 1 to share, 0 to not, default is 1");
+module_param_array(num_tqs, int, &num_adapters, 0400);
+MODULE_PARM_DESC(num_tqs, "Number of transmit queues in each adapter. Comma "
+		 "separated list of integers. Setting this to 0 makes number"
+		 " of queues same as number of CPUs. Default is 1.");
+
+#ifdef VMXNET3_RSS
+module_param_array(rss_ind_table, int, &num_rss_entries, 0400);
+MODULE_PARM_DESC(rss_ind_table, "RSS Indirection table. Number of entries "
+		 "per NIC should be 32. Each integer in a comma separated list"
+		 " is an rx queue number starting with 0. Repeat the same for"
+		 " all NICs.");
+module_param_array(num_rqs, int, &num_adapters, 0400);
+MODULE_PARM_DESC(num_rqs, "Number of receive queues in each adapter. Comma "
+		 " separated list of integers. Setting this to 0 makes number"
+		 " of queues same as number of CPUs. Default is 1.");
+
+#endif /* VMXNET3_RSS */
+
 
 /*
  *    Enable/Disable the given intr
@@ -107,7 +151,7 @@ static void
 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
 	tq->stopped = false;
-	netif_start_queue(adapter->netdev);
+	netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
 }
 
 
@@ -115,7 +159,7 @@ static void
 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
 	tq->stopped = false;
-	netif_wake_queue(adapter->netdev);
+	netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 }
 
 
@@ -124,7 +168,7 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
 	tq->stopped = true;
 	tq->num_stop++;
-	netif_stop_queue(adapter->netdev);
+	netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 }
 
 
@@ -135,6 +179,7 @@ static void
 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 {
 	u32 ret;
+	int i;
 
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
 	ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
@@ -145,22 +190,28 @@ vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 		if (!netif_carrier_ok(adapter->netdev))
 			netif_carrier_on(adapter->netdev);
 
-		if (affectTxQueue)
-			vmxnet3_tq_start(&adapter->tx_queue, adapter);
+		if (affectTxQueue) {
+			for (i = 0; i < adapter->num_tx_queues; i++)
+				vmxnet3_tq_start(&adapter->tx_queue[i],
+						 adapter);
+		}
 	} else {
 		printk(KERN_INFO "%s: NIC Link is Down\n",
 		       adapter->netdev->name);
 		if (netif_carrier_ok(adapter->netdev))
 			netif_carrier_off(adapter->netdev);
 
-		if (affectTxQueue)
-			vmxnet3_tq_stop(&adapter->tx_queue, adapter);
+		if (affectTxQueue) {
+			for (i = 0; i < adapter->num_tx_queues; i++)
+				vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
+		}
 	}
 }
 
 static void
 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 {
+	int i;
 	u32 events = le32_to_cpu(adapter->shared->ecr);
 	if (!events)
 		return;
@@ -176,16 +227,18 @@ vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
 				       VMXNET3_CMD_GET_QUEUE_STATUS);
 
-		if (adapter->tqd_start->status.stopped) {
-			printk(KERN_ERR "%s: tq error 0x%x\n",
-			       adapter->netdev->name,
-			       le32_to_cpu(adapter->tqd_start->status.error));
-		}
-		if (adapter->rqd_start->status.stopped) {
-			printk(KERN_ERR "%s: rq error 0x%x\n",
-			       adapter->netdev->name,
-			       adapter->rqd_start->status.error);
-		}
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			if (adapter->tqd_start[i].status.stopped)
+				dev_dbg(&adapter->netdev->dev,
+					"%s: tq[%d] error 0x%x\n",
+					adapter->netdev->name, i, le32_to_cpu(
+					adapter->tqd_start[i].status.error));
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			if (adapter->rqd_start[i].status.stopped)
+				dev_dbg(&adapter->netdev->dev,
+					"%s: rq[%d] error 0x%x\n",
+					adapter->netdev->name, i,
+					adapter->rqd_start[i].status.error);
 
 		schedule_work(&adapter->work);
 	}
@@ -410,7 +463,7 @@ vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 }
 
 
-void
+static void
 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 		   struct vmxnet3_adapter *adapter)
 {
@@ -518,6 +571,14 @@ err:
 	return -ENOMEM;
 }
 
+static void
+vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
+}
 
 /*
  *    starting from ring->next2fill, allocate rx buffers for the given ring
@@ -621,27 +682,14 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 	skb_shinfo(skb)->nr_frags++;
 }
 
-
-/*
- * Free any pages which were attached to the frags of the spare skb.  This can
- * happen when the spare skb is attached to the rx ring to prevent starvation,
- * but there was no issue with page allocation.
- */
-
-static void
-vmxnet3_rx_spare_skb_free_frags(struct vmxnet3_adapter *adapter)
+/* Destroy all tx queues */
+void
+vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
 {
-	struct sk_buff *skb = adapter->rx_queue.spare_skb;
 	int i;
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
-		BUG_ON(frag->page != 0);
-		put_page(frag->page);
-		frag->page = 0;
-		frag->size = 0;
-	}
-	skb_shinfo(skb)->nr_frags = 0;
-	skb->data_len = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
 }
 
 
@@ -760,6 +808,17 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 }
 
 
+/* Init all tx queues */
+static void
+vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
+}
+
+
 /*
  *    parse and copy relevant protocol headers:
  *      For a tso pkt, relevant headers are L2/3/4 including options
@@ -1028,8 +1087,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	if (le32_to_cpu(tq->shared->txNumDeferred) >=
 					le32_to_cpu(tq->shared->txThreshold)) {
 		tq->shared->txNumDeferred = 0;
-		VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD,
-				       tq->tx_ring.next2fill);
+		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_TXPROD +
+				       tq->qid * 8), tq->tx_ring.next2fill);
 	}
 
 	return NETDEV_TX_OK;
@@ -1048,7 +1107,10 @@ vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 
-	return vmxnet3_tq_xmit(skb, &adapter->tx_queue, adapter, netdev);
+		BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
+		return vmxnet3_tq_xmit(skb,
+				       &adapter->tx_queue[skb->queue_mapping],
+				       adapter, netdev);
 }
 
 
@@ -1100,10 +1162,7 @@ vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
 	 * desc for the pkt
 	 */
 	if (ctx->skb) {
-		if (ctx->skb == rq->spare_skb)
-			vmxnet3_rx_spare_skb_free_frags(adapter);
-		else
-			dev_kfree_skb_irq(ctx->skb);
+		dev_kfree_skb_irq(ctx->skb);
 	}
 
 	ctx->skb = NULL;
@@ -1138,9 +1197,9 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 			break;
 		}
 		num_rxd++;
-
+		BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
 		idx = rcd->rxdIdx;
-		ring_idx = rcd->rqID == rq->qid ? 0 : 1;
+		ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
 		vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
 				  &rxCmdDesc);
 		rbi = rq->buf_info[ring_idx] + idx;
@@ -1202,12 +1261,6 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
 		skb = ctx->skb;
 		if (rcd->eop) {
-			if (skb == rq->spare_skb) {
-				rq->stats.drop_total++;
-				vmxnet3_rx_spare_skb_free_frags(adapter);
-				ctx->skb = NULL;
-				goto rcd_done;
-			}
 			skb->len += skb->data_len;
 			skb->truesize += skb->data_len;
 
@@ -1292,17 +1345,18 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
 					rq->rx_ring[ring_idx].next2comp = 0;
 		rq->uncommitted[ring_idx] = 0;
 	}
+	rq->comp_ring.gen = VMXNET3_INIT_GEN;
+	rq->comp_ring.next2proc = 0;
+}
 
-	/* free starvation prevention skb if allocated */
-	if (rq->spare_skb) {
-		vmxnet3_rx_spare_skb_free_frags(adapter);
-		dev_kfree_skb(rq->spare_skb);
-		rq->spare_skb = NULL;
-	}
 
+static void
+vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
+{
+	int i;
 
-	rq->comp_ring.gen = VMXNET3_INIT_GEN;
-	rq->comp_ring.next2proc = 0;
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
 }
 
 
@@ -1406,6 +1460,25 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
 
 
 static int
+vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
+		if (unlikely(err)) {
+			dev_err(&adapter->netdev->dev, "%s: failed to "
+				"initialize rx queue%i\n",
+				adapter->netdev->name, i);
+			break;
+		}
+	}
+	return err;
+
+}
+
+
+static int
 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
 {
 	int i;
@@ -1453,33 +1526,177 @@ err:
 
 
 static int
+vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
+		if (unlikely(err)) {
+			dev_err(&adapter->netdev->dev,
+				"%s: failed to create rx queue%i\n",
+				adapter->netdev->name, i);
+			goto err_out;
+		}
+	}
+	return err;
+err_out:
+	vmxnet3_rq_destroy_all(adapter);
+	return err;
+
+}
+
+/* Multiple queue aware polling function for tx and rx */
+
+static int
 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
 {
+	int rcd_done = 0, i;
 	if (unlikely(adapter->shared->ecr))
 		vmxnet3_process_events(adapter);
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
 
-	vmxnet3_tq_tx_complete(&adapter->tx_queue, adapter);
-	return vmxnet3_rq_rx_complete(&adapter->rx_queue, adapter, budget);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
+						   adapter, budget);
+	return rcd_done;
 }
 
 
 static int
 vmxnet3_poll(struct napi_struct *napi, int budget)
 {
-	struct vmxnet3_adapter *adapter = container_of(napi,
-					  struct vmxnet3_adapter, napi);
+	struct vmxnet3_rx_queue *rx_queue = container_of(napi,
+					  struct vmxnet3_rx_queue, napi);
+	int rxd_done;
+
+	rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
+
+	if (rxd_done < budget) {
+		napi_complete(napi);
+		vmxnet3_enable_all_intrs(rx_queue->adapter);
+	}
+	return rxd_done;
+}
+
+/*
+ * NAPI polling function for MSI-X mode with multiple Rx queues
+ * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
+ */
+
+static int
+vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
+{
+	struct vmxnet3_rx_queue *rq = container_of(napi,
+						struct vmxnet3_rx_queue, napi);
+	struct vmxnet3_adapter *adapter = rq->adapter;
 	int rxd_done;
 
-	rxd_done = vmxnet3_do_poll(adapter, budget);
+	/* When sharing interrupt with corresponding tx queue, process
+	 * tx completions in that queue as well
+	 */
+	if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
+		struct vmxnet3_tx_queue *tq =
+				&adapter->tx_queue[rq - adapter->rx_queue];
+		vmxnet3_tq_tx_complete(tq, adapter);
+	}
+
+	rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
 
 	if (rxd_done < budget) {
 		napi_complete(napi);
-		vmxnet3_enable_intr(adapter, 0);
+		vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
 	}
 	return rxd_done;
 }
 
 
+#ifdef CONFIG_PCI_MSI
+
+/*
+ * Handle completion interrupts on tx queues
+ * Returns whether or not the intr is handled
+ */
+
+static irqreturn_t
+vmxnet3_msix_tx(int irq, void *data)
+{
+	struct vmxnet3_tx_queue *tq = data;
+	struct vmxnet3_adapter *adapter = tq->adapter;
+
+	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+		vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
+
+	/* Handle the case where only one irq is allocate for all tx queues */
+	if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
+		int i;
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
+			vmxnet3_tq_tx_complete(txq, adapter);
+		}
+	} else {
+		vmxnet3_tq_tx_complete(tq, adapter);
+	}
+	vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
+
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * Handle completion interrupts on rx queues. Returns whether or not the
+ * intr is handled
+ */
+
+static irqreturn_t
+vmxnet3_msix_rx(int irq, void *data)
+{
+	struct vmxnet3_rx_queue *rq = data;
+	struct vmxnet3_adapter *adapter = rq->adapter;
+
+	/* disable intr if needed */
+	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+		vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
+	napi_schedule(&rq->napi);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * vmxnet3_msix_event --
+ *
+ *    vmxnet3 msix event intr handler
+ *
+ * Result:
+ *    whether or not the intr is handled
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static irqreturn_t
+vmxnet3_msix_event(int irq, void *data)
+{
+	struct net_device *dev = data;
+	struct vmxnet3_adapter *adapter = netdev_priv(dev);
+
+	/* disable intr if needed */
+	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+		vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
+
+	if (adapter->shared->ecr)
+		vmxnet3_process_events(adapter);
+
+	vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
+
+	return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_PCI_MSI  */
+
+
 /* Interrupt handler for vmxnet3  */
 static irqreturn_t
 vmxnet3_intr(int irq, void *dev_id)
@@ -1487,7 +1704,7 @@ vmxnet3_intr(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct vmxnet3_adapter *adapter = netdev_priv(dev);
 
-	if (unlikely(adapter->intr.type == VMXNET3_IT_INTX)) {
+	if (adapter->intr.type == VMXNET3_IT_INTX) {
 		u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
 		if (unlikely(icr == 0))
 			/* not ours */
@@ -1497,77 +1714,136 @@ vmxnet3_intr(int irq, void *dev_id)
 
 	/* disable intr if needed */
 	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
-		vmxnet3_disable_intr(adapter, 0);
+		vmxnet3_disable_all_intrs(adapter);
 
-	napi_schedule(&adapter->napi);
+	napi_schedule(&adapter->rx_queue[0].napi);
 
 	return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 
-
 /* netpoll callback. */
 static void
 vmxnet3_netpoll(struct net_device *netdev)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	int irq;
 
-#ifdef CONFIG_PCI_MSI
-	if (adapter->intr.type == VMXNET3_IT_MSIX)
-		irq = adapter->intr.msix_entries[0].vector;
-	else
-#endif
-		irq = adapter->pdev->irq;
+	if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+		vmxnet3_disable_all_intrs(adapter);
+
+	vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
+	vmxnet3_enable_all_intrs(adapter);
 
-	disable_irq(irq);
-	vmxnet3_intr(irq, netdev);
-	enable_irq(irq);
 }
-#endif
+#endif	/* CONFIG_NET_POLL_CONTROLLER */
 
 static int
 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
 {
-	int err;
+	struct vmxnet3_intr *intr = &adapter->intr;
+	int err = 0, i;
+	int vector = 0;
 
 #ifdef CONFIG_PCI_MSI
 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
-		/* we only use 1 MSI-X vector */
-		err = request_irq(adapter->intr.msix_entries[0].vector,
-				  vmxnet3_intr, 0, adapter->netdev->name,
-				  adapter->netdev);
-	} else if (adapter->intr.type == VMXNET3_IT_MSI) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			sprintf(adapter->tx_queue[i].name, "%s:v%d-%s",
+				adapter->netdev->name, vector, "Tx");
+			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
+				err = request_irq(
+					      intr->msix_entries[vector].vector,
+					      vmxnet3_msix_tx, 0,
+					      adapter->tx_queue[i].name,
+					      &adapter->tx_queue[i]);
+			if (err) {
+				dev_err(&adapter->netdev->dev,
+					"Failed to request irq for MSIX, %s, "
+					"error %d\n",
+					adapter->tx_queue[i].name, err);
+				return err;
+			}
+
+			/* Handle the case where only 1 MSIx was allocated for
+			 * all tx queues */
+			if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
+				for (; i < adapter->num_tx_queues; i++)
+					adapter->tx_queue[i].comp_ring.intr_idx
+								= vector;
+				vector++;
+				break;
+			} else {
+				adapter->tx_queue[i].comp_ring.intr_idx
+								= vector++;
+			}
+		}
+		if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
+			vector = 0;
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			sprintf(adapter->rx_queue[i].name, "%s:v%d-%s",
+				adapter->netdev->name, vector, "Rx");
+			err = request_irq(intr->msix_entries[vector].vector,
+					  vmxnet3_msix_rx, 0,
+					  adapter->rx_queue[i].name,
+					  &(adapter->rx_queue[i]));
+			if (err) {
+				printk(KERN_ERR "Failed to request irq for MSIX"
+				       ", %s, error %d\n",
+				       adapter->rx_queue[i].name, err);
+				return err;
+			}
+
+			adapter->rx_queue[i].comp_ring.intr_idx = vector++;
+		}
+
+		sprintf(intr->event_msi_vector_name, "%s:v%d-event",
+			adapter->netdev->name, vector);
+		err = request_irq(intr->msix_entries[vector].vector,
+				  vmxnet3_msix_event, 0,
+				  intr->event_msi_vector_name, adapter->netdev);
+		intr->event_intr_idx = vector;
+
+	} else if (intr->type == VMXNET3_IT_MSI) {
+		adapter->num_rx_queues = 1;
 		err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
 				  adapter->netdev->name, adapter->netdev);
-	} else
+	} else {
 #endif
-	{
+		adapter->num_rx_queues = 1;
 		err = request_irq(adapter->pdev->irq, vmxnet3_intr,
 				  IRQF_SHARED, adapter->netdev->name,
 				  adapter->netdev);
+#ifdef CONFIG_PCI_MSI
 	}
-
-	if (err)
+#endif
+	intr->num_intrs = vector + 1;
+	if (err) {
 		printk(KERN_ERR "Failed to request irq %s (intr type:%d), error"
-		       ":%d\n", adapter->netdev->name, adapter->intr.type, err);
+		       ":%d\n", adapter->netdev->name, intr->type, err);
+	} else {
+		/* Number of rx queues will not change after this */
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+			rq->qid = i;
+			rq->qid2 = i + adapter->num_rx_queues;
+		}
 
 
-	if (!err) {
-		int i;
-		/* init our intr settings */
-		for (i = 0; i < adapter->intr.num_intrs; i++)
-			adapter->intr.mod_levels[i] = UPT1_IML_ADAPTIVE;
 
-		/* next setup intr index for all intr sources */
-		adapter->tx_queue.comp_ring.intr_idx = 0;
-		adapter->rx_queue.comp_ring.intr_idx = 0;
-		adapter->intr.event_intr_idx = 0;
+		/* init our intr settings */
+		for (i = 0; i < intr->num_intrs; i++)
+			intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
+		if (adapter->intr.type != VMXNET3_IT_MSIX) {
+			adapter->intr.event_intr_idx = 0;
+			for (i = 0; i < adapter->num_tx_queues; i++)
+				adapter->tx_queue[i].comp_ring.intr_idx = 0;
+			adapter->rx_queue[0].comp_ring.intr_idx = 0;
+		}
 
 		printk(KERN_INFO "%s: intr type %u, mode %u, %u vectors "
-		       "allocated\n", adapter->netdev->name, adapter->intr.type,
-		       adapter->intr.mask_mode, adapter->intr.num_intrs);
+		       "allocated\n", adapter->netdev->name, intr->type,
+		       intr->mask_mode, intr->num_intrs);
 	}
 
 	return err;
@@ -1577,18 +1853,32 @@ vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
 static void
 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
 {
-	BUG_ON(adapter->intr.type == VMXNET3_IT_AUTO ||
-	       adapter->intr.num_intrs <= 0);
+	struct vmxnet3_intr *intr = &adapter->intr;
+	BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
 
-	switch (adapter->intr.type) {
+	switch (intr->type) {
 #ifdef CONFIG_PCI_MSI
 	case VMXNET3_IT_MSIX:
 	{
-		int i;
+		int i, vector = 0;
+
+		if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
+			for (i = 0; i < adapter->num_tx_queues; i++) {
+				free_irq(intr->msix_entries[vector++].vector,
+					 &(adapter->tx_queue[i]));
+				if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
+					break;
+			}
+		}
 
-		for (i = 0; i < adapter->intr.num_intrs; i++)
-			free_irq(adapter->intr.msix_entries[i].vector,
-				 adapter->netdev);
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			free_irq(intr->msix_entries[vector++].vector,
+				 &(adapter->rx_queue[i]));
+		}
+
+		free_irq(intr->msix_entries[vector].vector,
+			 adapter->netdev);
+		BUG_ON(vector >= intr->num_intrs);
 		break;
 	}
 #endif
@@ -1801,6 +2091,15 @@ vmxnet3_set_mc(struct net_device *netdev)
 	kfree(new_table);
 }
 
+void
+vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
+}
+
 
 /*
  *   Set up driver_shared based on settings in adapter.
@@ -1848,40 +2147,87 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
 	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
 	devRead->misc.queueDescLen = cpu_to_le32(
-				     sizeof(struct Vmxnet3_TxQueueDesc) +
-				     sizeof(struct Vmxnet3_RxQueueDesc));
+		adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
+		adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
 
 	/* tx queue settings */
-	BUG_ON(adapter->tx_queue.tx_ring.base == NULL);
-
-	devRead->misc.numTxQueues = 1;
-	tqc = &adapter->tqd_start->conf;
-	tqc->txRingBasePA   = cpu_to_le64(adapter->tx_queue.tx_ring.basePA);
-	tqc->dataRingBasePA = cpu_to_le64(adapter->tx_queue.data_ring.basePA);
-	tqc->compRingBasePA = cpu_to_le64(adapter->tx_queue.comp_ring.basePA);
-	tqc->ddPA           = cpu_to_le64(virt_to_phys(
-						adapter->tx_queue.buf_info));
-	tqc->txRingSize     = cpu_to_le32(adapter->tx_queue.tx_ring.size);
-	tqc->dataRingSize   = cpu_to_le32(adapter->tx_queue.data_ring.size);
-	tqc->compRingSize   = cpu_to_le32(adapter->tx_queue.comp_ring.size);
-	tqc->ddLen          = cpu_to_le32(sizeof(struct vmxnet3_tx_buf_info) *
-			      tqc->txRingSize);
-	tqc->intrIdx        = adapter->tx_queue.comp_ring.intr_idx;
+	devRead->misc.numTxQueues =  adapter->num_tx_queues;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
+		BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
+		tqc = &adapter->tqd_start[i].conf;
+		tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
+		tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
+		tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
+		tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
+		tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
+		tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
+		tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
+		tqc->ddLen          = cpu_to_le32(
+					sizeof(struct vmxnet3_tx_buf_info) *
+					tqc->txRingSize);
+		tqc->intrIdx        = tq->comp_ring.intr_idx;
+	}
 
 	/* rx queue settings */
-	devRead->misc.numRxQueues = 1;
-	rqc = &adapter->rqd_start->conf;
-	rqc->rxRingBasePA[0] = cpu_to_le64(adapter->rx_queue.rx_ring[0].basePA);
-	rqc->rxRingBasePA[1] = cpu_to_le64(adapter->rx_queue.rx_ring[1].basePA);
-	rqc->compRingBasePA  = cpu_to_le64(adapter->rx_queue.comp_ring.basePA);
-	rqc->ddPA            = cpu_to_le64(virt_to_phys(
-						adapter->rx_queue.buf_info));
-	rqc->rxRingSize[0]   = cpu_to_le32(adapter->rx_queue.rx_ring[0].size);
-	rqc->rxRingSize[1]   = cpu_to_le32(adapter->rx_queue.rx_ring[1].size);
-	rqc->compRingSize    = cpu_to_le32(adapter->rx_queue.comp_ring.size);
-	rqc->ddLen           = cpu_to_le32(sizeof(struct vmxnet3_rx_buf_info) *
-			       (rqc->rxRingSize[0] + rqc->rxRingSize[1]));
-	rqc->intrIdx         = adapter->rx_queue.comp_ring.intr_idx;
+	devRead->misc.numRxQueues = adapter->num_rx_queues;
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[i];
+		rqc = &adapter->rqd_start[i].conf;
+		rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
+		rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
+		rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
+		rqc->ddPA            = cpu_to_le64(virt_to_phys(
+							rq->buf_info));
+		rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
+		rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
+		rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
+		rqc->ddLen           = cpu_to_le32(
+					sizeof(struct vmxnet3_rx_buf_info) *
+					(rqc->rxRingSize[0] +
+					 rqc->rxRingSize[1]));
+		rqc->intrIdx         = rq->comp_ring.intr_idx;
+	}
+
+#ifdef VMXNET3_RSS
+	memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
+
+	if (adapter->rss) {
+		struct UPT1_RSSConf *rssConf = adapter->rss_conf;
+		devRead->misc.uptFeatures |= UPT1_F_RSS;
+		devRead->misc.numRxQueues = adapter->num_rx_queues;
+		rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
+				    UPT1_RSS_HASH_TYPE_IPV4 |
+				    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
+				    UPT1_RSS_HASH_TYPE_IPV6;
+		rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
+		rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
+		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
+		get_random_bytes(&rssConf->hashKey[0], rssConf->hashKeySize);
+		if (num_rss_entries >= adapter->dev_number *
+				       VMXNET3_RSS_IND_TABLE_SIZE) {
+			int j = (adapter->dev_number) *
+				VMXNET3_RSS_IND_TABLE_SIZE;
+			for (i = 0; i < rssConf->indTableSize; i++, j++) {
+				if (rss_ind_table[j] >= 0 &&
+				    rss_ind_table[j] < adapter->num_rx_queues)
+					rssConf->indTable[i] = rss_ind_table[j];
+				else
+					rssConf->indTable[i] = i %
+							adapter->num_rx_queues;
+			}
+		} else {
+			for (i = 0; i < rssConf->indTableSize; i++)
+				rssConf->indTable[i] = i %
+							adapter->num_rx_queues;
+		}
+
+		devRead->rssConfDesc.confVer = 1;
+		devRead->rssConfDesc.confLen = sizeof(*rssConf);
+		devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
+	}
+
+#endif /* VMXNET3_RSS */
 
 	/* intr settings */
 	devRead->intrConf.autoMask = adapter->intr.mask_mode ==
@@ -1903,18 +2249,18 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 int
 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 {
-	int err;
+	int err, i;
 	u32 ret;
 
-	dev_dbg(&adapter->netdev->dev,
-		"%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes"
-		" %u %u %u\n", adapter->netdev->name, adapter->skb_buf_size,
-		adapter->rx_buf_per_pkt, adapter->tx_queue.tx_ring.size,
-		adapter->rx_queue.rx_ring[0].size,
-		adapter->rx_queue.rx_ring[1].size);
-
-	vmxnet3_tq_init(&adapter->tx_queue, adapter);
-	err = vmxnet3_rq_init(&adapter->rx_queue, adapter);
+	dev_dbg(&adapter->netdev->dev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
+		" ring sizes %u %u %u\n", adapter->netdev->name,
+		adapter->skb_buf_size, adapter->rx_buf_per_pkt,
+		adapter->tx_queue[0].tx_ring.size,
+		adapter->rx_queue[0].rx_ring[0].size,
+		adapter->rx_queue[0].rx_ring[1].size);
+
+	vmxnet3_tq_init_all(adapter);
+	err = vmxnet3_rq_init_all(adapter);
 	if (err) {
 		printk(KERN_ERR "Failed to init rx queue for %s: error %d\n",
 		       adapter->netdev->name, err);
@@ -1944,10 +2290,15 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 		err = -EINVAL;
 		goto activate_err;
 	}
-	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD,
-			       adapter->rx_queue.rx_ring[0].next2fill);
-	VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD2,
-			       adapter->rx_queue.rx_ring[1].next2fill);
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD +
+				(i * VMXNET3_REG_ALIGN)),
+				adapter->rx_queue[i].rx_ring[0].next2fill);
+		VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
+				(i * VMXNET3_REG_ALIGN)),
+				adapter->rx_queue[i].rx_ring[1].next2fill);
+	}
 
 	/* Apply the rx filter settins last. */
 	vmxnet3_set_mc(adapter->netdev);
@@ -1957,8 +2308,8 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 	 * tx queue if the link is up.
 	 */
 	vmxnet3_check_link(adapter, true);
-
-	napi_enable(&adapter->napi);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		napi_enable(&adapter->rx_queue[i].napi);
 	vmxnet3_enable_all_intrs(adapter);
 	clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
 	return 0;
@@ -1970,7 +2321,7 @@ activate_err:
 irq_err:
 rq_err:
 	/* free up buffers we allocated */
-	vmxnet3_rq_cleanup(&adapter->rx_queue, adapter);
+	vmxnet3_rq_cleanup_all(adapter);
 	return err;
 }
 
@@ -1985,6 +2336,7 @@ vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
 int
 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
 {
+	int i;
 	if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
 		return 0;
 
@@ -1993,13 +2345,14 @@ vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
 			       VMXNET3_CMD_QUIESCE_DEV);
 	vmxnet3_disable_all_intrs(adapter);
 
-	napi_disable(&adapter->napi);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		napi_disable(&adapter->rx_queue[i].napi);
 	netif_tx_disable(adapter->netdev);
 	adapter->link_speed = 0;
 	netif_carrier_off(adapter->netdev);
 
-	vmxnet3_tq_cleanup(&adapter->tx_queue, adapter);
-	vmxnet3_rq_cleanup(&adapter->rx_queue, adapter);
+	vmxnet3_tq_cleanup_all(adapter);
+	vmxnet3_rq_cleanup_all(adapter);
 	vmxnet3_free_irqs(adapter);
 	return 0;
 }
@@ -2121,7 +2474,9 @@ vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
 static void
 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
 {
-	size_t sz;
+	size_t sz, i, ring0_size, ring1_size, comp_size;
+	struct vmxnet3_rx_queue	*rq = &adapter->rx_queue[0];
+
 
 	if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
 				    VMXNET3_MAX_ETH_HDR_SIZE) {
@@ -2143,11 +2498,19 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
 	 * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
 	 */
 	sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
-	adapter->rx_queue.rx_ring[0].size = (adapter->rx_queue.rx_ring[0].size +
-					     sz - 1) / sz * sz;
-	adapter->rx_queue.rx_ring[0].size = min_t(u32,
-					    adapter->rx_queue.rx_ring[0].size,
-					    VMXNET3_RX_RING_MAX_SIZE / sz * sz);
+	ring0_size = adapter->rx_queue[0].rx_ring[0].size;
+	ring0_size = (ring0_size + sz - 1) / sz * sz;
+	ring0_size = min_t(u32, rq->rx_ring[0].size, VMXNET3_RX_RING_MAX_SIZE /
+			   sz * sz);
+	ring1_size = adapter->rx_queue[0].rx_ring[1].size;
+	comp_size = ring0_size + ring1_size;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		rq = &adapter->rx_queue[i];
+		rq->rx_ring[0].size = ring0_size;
+		rq->rx_ring[1].size = ring1_size;
+		rq->comp_ring.size = comp_size;
+	}
 }
 
 
@@ -2155,29 +2518,53 @@ int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
 		      u32 rx_ring_size, u32 rx_ring2_size)
 {
-	int err;
-
-	adapter->tx_queue.tx_ring.size   = tx_ring_size;
-	adapter->tx_queue.data_ring.size = tx_ring_size;
-	adapter->tx_queue.comp_ring.size = tx_ring_size;
-	adapter->tx_queue.shared = &adapter->tqd_start->ctrl;
-	adapter->tx_queue.stopped = true;
-	err = vmxnet3_tq_create(&adapter->tx_queue, adapter);
-	if (err)
-		return err;
+	int err = 0, i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct vmxnet3_tx_queue	*tq = &adapter->tx_queue[i];
+		tq->tx_ring.size   = tx_ring_size;
+		tq->data_ring.size = tx_ring_size;
+		tq->comp_ring.size = tx_ring_size;
+		tq->shared = &adapter->tqd_start[i].ctrl;
+		tq->stopped = true;
+		tq->adapter = adapter;
+		tq->qid = i;
+		err = vmxnet3_tq_create(tq, adapter);
+		/*
+		 * Too late to change num_tx_queues. We cannot do away with
+		 * lesser number of queues than what we asked for
+		 */
+		if (err)
+			goto queue_err;
+	}
 
-	adapter->rx_queue.rx_ring[0].size = rx_ring_size;
-	adapter->rx_queue.rx_ring[1].size = rx_ring2_size;
+	adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
+	adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
 	vmxnet3_adjust_rx_ring_size(adapter);
-	adapter->rx_queue.comp_ring.size  = adapter->rx_queue.rx_ring[0].size +
-					    adapter->rx_queue.rx_ring[1].size;
-	adapter->rx_queue.qid  = 0;
-	adapter->rx_queue.qid2 = 1;
-	adapter->rx_queue.shared = &adapter->rqd_start->ctrl;
-	err = vmxnet3_rq_create(&adapter->rx_queue, adapter);
-	if (err)
-		vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
-
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+		/* qid and qid2 for rx queues will be assigned later when num
+		 * of rx queues is finalized after allocating intrs */
+		rq->shared = &adapter->rqd_start[i].ctrl;
+		rq->adapter = adapter;
+		err = vmxnet3_rq_create(rq, adapter);
+		if (err) {
+			if (i == 0) {
+				printk(KERN_ERR "Could not allocate any rx"
+				       "queues. Aborting.\n");
+				goto queue_err;
+			} else {
+				printk(KERN_INFO "Number of rx queues changed "
+				       "to : %d.\n", i);
+				adapter->num_rx_queues = i;
+				err = 0;
+				break;
+			}
+		}
+	}
+	return err;
+queue_err:
+	vmxnet3_tq_destroy_all(adapter);
 	return err;
 }
 
@@ -2185,11 +2572,12 @@ static int
 vmxnet3_open(struct net_device *netdev)
 {
 	struct vmxnet3_adapter *adapter;
-	int err;
+	int err, i;
 
 	adapter = netdev_priv(netdev);
 
-	spin_lock_init(&adapter->tx_queue.tx_lock);
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		spin_lock_init(&adapter->tx_queue[i].tx_lock);
 
 	err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
 				    VMXNET3_DEF_RX_RING_SIZE,
@@ -2204,8 +2592,8 @@ vmxnet3_open(struct net_device *netdev)
 	return 0;
 
 activate_err:
-	vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
-	vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
+	vmxnet3_rq_destroy_all(adapter);
+	vmxnet3_tq_destroy_all(adapter);
 queue_err:
 	return err;
 }
@@ -2225,8 +2613,8 @@ vmxnet3_close(struct net_device *netdev)
 
 	vmxnet3_quiesce_dev(adapter);
 
-	vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
-	vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
+	vmxnet3_rq_destroy_all(adapter);
+	vmxnet3_tq_destroy_all(adapter);
 
 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
 
@@ -2238,6 +2626,8 @@ vmxnet3_close(struct net_device *netdev)
 void
 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
 {
+	int i;
+
 	/*
 	 * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
 	 * vmxnet3_close() will deadlock.
@@ -2245,7 +2635,8 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter)
 	BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
 
 	/* we need to enable NAPI, otherwise dev_close will deadlock */
-	napi_enable(&adapter->napi);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		napi_enable(&adapter->rx_queue[i].napi);
 	dev_close(adapter->netdev);
 }
 
@@ -2276,14 +2667,11 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
 		vmxnet3_reset_dev(adapter);
 
 		/* we need to re-create the rx queue based on the new mtu */
-		vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
+		vmxnet3_rq_destroy_all(adapter);
 		vmxnet3_adjust_rx_ring_size(adapter);
-		adapter->rx_queue.comp_ring.size  =
-					adapter->rx_queue.rx_ring[0].size +
-					adapter->rx_queue.rx_ring[1].size;
-		err = vmxnet3_rq_create(&adapter->rx_queue, adapter);
+		err = vmxnet3_rq_create_all(adapter);
 		if (err) {
-			printk(KERN_ERR "%s: failed to re-create rx queue,"
+			printk(KERN_ERR "%s: failed to re-create rx queues,"
 				" error %d. Closing it.\n", netdev->name, err);
 			goto out;
 		}
@@ -2348,6 +2736,55 @@ vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
 	mac[5] = (tmp >> 8) & 0xff;
 }
 
+#ifdef CONFIG_PCI_MSI
+
+/*
+ * Enable MSIx vectors.
+ * Returns :
+ *	0 on successful enabling of required vectors,
+ *	VMXNET3_LINUX_MIN_MSIX_VECT when only minumum number of vectors required
+ *	 could be enabled.
+ *	number of vectors which can be enabled otherwise (this number is smaller
+ *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
+ */
+
+static int
+vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
+			     int vectors)
+{
+	int err = 0, vector_threshold;
+	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
+
+	while (vectors >= vector_threshold) {
+		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
+				      vectors);
+		if (!err) {
+			adapter->intr.num_intrs = vectors;
+			return 0;
+		} else if (err < 0) {
+			printk(KERN_ERR "Failed to enable MSI-X for %s, error"
+			       " %d\n",	adapter->netdev->name, err);
+			vectors = 0;
+		} else if (err < vector_threshold) {
+			break;
+		} else {
+			/* If fails to enable required number of MSI-x vectors
+			 * try enabling 3 of them. One each for rx, tx and event
+			 */
+			vectors = vector_threshold;
+			printk(KERN_ERR "Failed to enable %d MSI-X for %s, try"
+			       " %d instead\n", vectors, adapter->netdev->name,
+			       vector_threshold);
+		}
+	}
+
+	printk(KERN_INFO "Number of MSI-X interrupts which can be allocatedi"
+	       " are lower than min threshold required.\n");
+	return err;
+}
+
+
+#endif /* CONFIG_PCI_MSI */
 
 static void
 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
@@ -2367,16 +2804,47 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
 
 #ifdef CONFIG_PCI_MSI
 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
-		int err;
-
-		adapter->intr.msix_entries[0].entry = 0;
-		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
-				      VMXNET3_LINUX_MAX_MSIX_VECT);
-		if (!err) {
-			adapter->intr.num_intrs = 1;
-			adapter->intr.type = VMXNET3_IT_MSIX;
+		int vector, err = 0;
+
+		adapter->intr.num_intrs = (adapter->share_intr ==
+					   VMXNET3_INTR_TXSHARE) ? 1 :
+					   adapter->num_tx_queues;
+		adapter->intr.num_intrs += (adapter->share_intr ==
+					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
+					   adapter->num_rx_queues;
+		adapter->intr.num_intrs += 1;		/* for link event */
+
+		adapter->intr.num_intrs = (adapter->intr.num_intrs >
+					   VMXNET3_LINUX_MIN_MSIX_VECT
+					   ? adapter->intr.num_intrs :
+					   VMXNET3_LINUX_MIN_MSIX_VECT);
+
+		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
+			adapter->intr.msix_entries[vector].entry = vector;
+
+		err = vmxnet3_acquire_msix_vectors(adapter,
+						   adapter->intr.num_intrs);
+		/* If we cannot allocate one MSIx vector per queue
+		 * then limit the number of rx queues to 1
+		 */
+		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
+			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
+			    || adapter->num_rx_queues != 2) {
+				adapter->share_intr = VMXNET3_INTR_TXSHARE;
+				printk(KERN_ERR "Number of rx queues : 1\n");
+				adapter->num_rx_queues = 1;
+				adapter->intr.num_intrs =
+						VMXNET3_LINUX_MIN_MSIX_VECT;
+			}
 			return;
 		}
+		if (!err)
+			return;
+
+		/* If we cannot allocate MSIx vectors use only one rx queue */
+		printk(KERN_INFO "Failed to enable MSI-X for %s, error %d."
+		       "#rx queues : 1, try MSI\n", adapter->netdev->name, err);
+
 		adapter->intr.type = VMXNET3_IT_MSI;
 	}
 
@@ -2384,12 +2852,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
 		int err;
 		err = pci_enable_msi(adapter->pdev);
 		if (!err) {
+			adapter->num_rx_queues = 1;
 			adapter->intr.num_intrs = 1;
 			return;
 		}
 	}
 #endif /* CONFIG_PCI_MSI */
 
+	adapter->num_rx_queues = 1;
+	printk(KERN_INFO "Using INTx interrupt, #Rx queues: 1.\n");
 	adapter->intr.type = VMXNET3_IT_INTX;
 
 	/* INT-X related setting */
@@ -2417,6 +2888,7 @@ vmxnet3_tx_timeout(struct net_device *netdev)
 
 	printk(KERN_ERR "%s: tx hang\n", adapter->netdev->name);
 	schedule_work(&adapter->work);
+	netif_wake_queue(adapter->netdev);
 }
 
 
@@ -2473,8 +2945,32 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	struct net_device *netdev;
 	struct vmxnet3_adapter *adapter;
 	u8 mac[ETH_ALEN];
+	int size;
+	int num_tx_queues = num_tqs[atomic_read(&devices_found)];
+	int num_rx_queues = num_rqs[atomic_read(&devices_found)];
+
+#ifdef VMXNET3_RSS
+	if (num_rx_queues <= 0)
+		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+				    (int)num_online_cpus());
+	else
+		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+				    num_rx_queues);
+#else
+	num_rx_queues = 1;
+#endif
+
+	if (num_tx_queues <= 0)
+		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
+				    (int)num_online_cpus());
+	else
+		num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
+				    num_tx_queues);
+	netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
+				   num_tx_queues);
+	printk(KERN_INFO "# of Tx queues : %d, # of Rx queues : %d\n",
+	       num_tx_queues, num_rx_queues);
 
-	netdev = alloc_etherdev(sizeof(struct vmxnet3_adapter));
 	if (!netdev) {
 		printk(KERN_ERR "Failed to alloc ethernet device for adapter "
 			"%s\n",	pci_name(pdev));
@@ -2496,9 +2992,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		goto err_alloc_shared;
 	}
 
-	adapter->tqd_start = pci_alloc_consistent(adapter->pdev,
-			     sizeof(struct Vmxnet3_TxQueueDesc) +
-			     sizeof(struct Vmxnet3_RxQueueDesc),
+	adapter->num_rx_queues = num_rx_queues;
+	adapter->num_tx_queues = num_tx_queues;
+
+	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+	size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
+	adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
 			     &adapter->queue_desc_pa);
 
 	if (!adapter->tqd_start) {
@@ -2507,8 +3006,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		err = -ENOMEM;
 		goto err_alloc_queue_desc;
 	}
-	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start
-							    + 1);
+	adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
+							adapter->num_tx_queues);
 
 	adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
 	if (adapter->pm_conf == NULL) {
@@ -2518,6 +3017,17 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		goto err_alloc_pm;
 	}
 
+#ifdef VMXNET3_RSS
+
+	adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
+	if (adapter->rss_conf == NULL) {
+		printk(KERN_ERR "Failed to allocate memory for %s\n",
+		       pci_name(pdev));
+		err = -ENOMEM;
+		goto err_alloc_rss;
+	}
+#endif /* VMXNET3_RSS */
+
 	err = vmxnet3_alloc_pci_resources(adapter, &dma64);
 	if (err < 0)
 		goto err_alloc_pci;
@@ -2545,8 +3055,32 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	vmxnet3_declare_features(adapter, dma64);
 
 	adapter->dev_number = atomic_read(&devices_found);
+
+	/*
+	 * Sharing intr between corresponding tx and rx queues gets priority
+	 * over all tx queues sharing an intr. Also, to use buddy interrupts
+	 * number of tx queues should be same as number of rx queues.
+	 */
+	if (share_tx_intr[adapter->dev_number] == 1)
+		adapter->share_intr = VMXNET3_INTR_TXSHARE;
+	else if (buddy_intr[adapter->dev_number] == 1 &&
+		 adapter->num_tx_queues == adapter->num_rx_queues)
+		adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
+	else
+		adapter->share_intr = VMXNET3_INTR_DONTSHARE;
+
 	vmxnet3_alloc_intr_resources(adapter);
 
+#ifdef VMXNET3_RSS
+	if (adapter->num_rx_queues > 1 &&
+	    adapter->intr.type == VMXNET3_IT_MSIX) {
+		adapter->rss = true;
+		printk(KERN_INFO "RSS is enabled.\n");
+	} else {
+		adapter->rss = false;
+	}
+#endif
+
 	vmxnet3_read_mac_addr(adapter, mac);
 	memcpy(netdev->dev_addr,  mac, netdev->addr_len);
 
@@ -2556,7 +3090,18 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 
 	INIT_WORK(&adapter->work, vmxnet3_reset_work);
 
-	netif_napi_add(netdev, &adapter->napi, vmxnet3_poll, 64);
+	if (adapter->intr.type == VMXNET3_IT_MSIX) {
+		int i;
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			netif_napi_add(adapter->netdev,
+				       &adapter->rx_queue[i].napi,
+				       vmxnet3_poll_rx_only, 64);
+		}
+	} else {
+		netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
+			       vmxnet3_poll, 64);
+	}
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	err = register_netdev(netdev);
 
@@ -2576,11 +3121,14 @@ err_register:
 err_ver:
 	vmxnet3_free_pci_resources(adapter);
 err_alloc_pci:
+#ifdef VMXNET3_RSS
+	kfree(adapter->rss_conf);
+err_alloc_rss:
+#endif
 	kfree(adapter->pm_conf);
 err_alloc_pm:
-	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) +
-			    sizeof(struct Vmxnet3_RxQueueDesc),
-			    adapter->tqd_start, adapter->queue_desc_pa);
+	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
+			    adapter->queue_desc_pa);
 err_alloc_queue_desc:
 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
 			    adapter->shared, adapter->shared_pa);
@@ -2596,6 +3144,19 @@ vmxnet3_remove_device(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	int size = 0;
+	int num_rx_queues = num_rqs[adapter->dev_number];
+
+#ifdef VMXNET3_RSS
+	if (num_rx_queues <= 0)
+		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+				    (int)num_online_cpus());
+	else
+		num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+				    num_rx_queues);
+#else
+	num_rx_queues = 1;
+#endif
 
 	flush_scheduled_work();
 
@@ -2603,10 +3164,15 @@ vmxnet3_remove_device(struct pci_dev *pdev)
 
 	vmxnet3_free_intr_resources(adapter);
 	vmxnet3_free_pci_resources(adapter);
+#ifdef VMXNET3_RSS
+	kfree(adapter->rss_conf);
+#endif
 	kfree(adapter->pm_conf);
-	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) +
-			    sizeof(struct Vmxnet3_RxQueueDesc),
-			    adapter->tqd_start, adapter->queue_desc_pa);
+
+	size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+	size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
+	pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
+			    adapter->queue_desc_pa);
 	pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
 			    adapter->shared, adapter->shared_pa);
 	free_netdev(netdev);
@@ -2637,7 +3203,7 @@ vmxnet3_suspend(struct device *device)
 	vmxnet3_free_intr_resources(adapter);
 
 	netif_device_detach(netdev);
-	netif_stop_queue(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	/* Create wake-up filters. */
 	pmConf = adapter->pm_conf;
@@ -2782,6 +3348,7 @@ vmxnet3_init_module(void)
 {
 	printk(KERN_INFO "%s - version %s\n", VMXNET3_DRIVER_DESC,
 		VMXNET3_DRIVER_VERSION_REPORT);
+	atomic_set(&devices_found, 0);
 	return pci_register_driver(&vmxnet3_driver);
 }
 
@@ -2800,3 +3367,5 @@ MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
+
+
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 7e4b5a8..c429793 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -153,44 +153,42 @@ vmxnet3_get_stats(struct net_device *netdev)
 	struct UPT1_TxStats *devTxStats;
 	struct UPT1_RxStats *devRxStats;
 	struct net_device_stats *net_stats = &netdev->stats;
+	int i;
 
 	adapter = netdev_priv(netdev);
 
 	/* Collect the dev stats into the shared area */
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
 
-	/* Assuming that we have a single queue device */
-	devTxStats = &adapter->tqd_start->stats;
-	devRxStats = &adapter->rqd_start->stats;
-
-	/* Get access to the driver stats per queue */
-	drvTxStats = &adapter->tx_queue.stats;
-	drvRxStats = &adapter->rx_queue.stats;
-
 	memset(net_stats, 0, sizeof(*net_stats));
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		devTxStats = &adapter->tqd_start[i].stats;
+		drvTxStats = &adapter->tx_queue[i].stats;
+		net_stats->tx_packets += devTxStats->ucastPktsTxOK +
+					devTxStats->mcastPktsTxOK +
+					devTxStats->bcastPktsTxOK;
+		net_stats->tx_bytes += devTxStats->ucastBytesTxOK +
+				      devTxStats->mcastBytesTxOK +
+				      devTxStats->bcastBytesTxOK;
+		net_stats->tx_errors += devTxStats->pktsTxError;
+		net_stats->tx_dropped += drvTxStats->drop_total;
+	}
 
-	net_stats->rx_packets = devRxStats->ucastPktsRxOK +
-				devRxStats->mcastPktsRxOK +
-				devRxStats->bcastPktsRxOK;
-
-	net_stats->tx_packets = devTxStats->ucastPktsTxOK +
-				devTxStats->mcastPktsTxOK +
-				devTxStats->bcastPktsTxOK;
-
-	net_stats->rx_bytes = devRxStats->ucastBytesRxOK +
-			      devRxStats->mcastBytesRxOK +
-			      devRxStats->bcastBytesRxOK;
-
-	net_stats->tx_bytes = devTxStats->ucastBytesTxOK +
-			      devTxStats->mcastBytesTxOK +
-			      devTxStats->bcastBytesTxOK;
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		devRxStats = &adapter->rqd_start[i].stats;
+		drvRxStats = &adapter->rx_queue[i].stats;
+		net_stats->rx_packets += devRxStats->ucastPktsRxOK +
+					devRxStats->mcastPktsRxOK +
+					devRxStats->bcastPktsRxOK;
 
-	net_stats->rx_errors = devRxStats->pktsRxError;
-	net_stats->tx_errors = devTxStats->pktsTxError;
-	net_stats->rx_dropped = drvRxStats->drop_total;
-	net_stats->tx_dropped = drvTxStats->drop_total;
-	net_stats->multicast =  devRxStats->mcastPktsRxOK;
+		net_stats->rx_bytes += devRxStats->ucastBytesRxOK +
+				      devRxStats->mcastBytesRxOK +
+				      devRxStats->bcastBytesRxOK;
 
+		net_stats->rx_errors += devRxStats->pktsRxError;
+		net_stats->rx_dropped += drvRxStats->drop_total;
+		net_stats->multicast +=  devRxStats->mcastPktsRxOK;
+	}
 	return net_stats;
 }
 
@@ -309,24 +307,26 @@ vmxnet3_get_ethtool_stats(struct net_device *netdev,
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	u8 *base;
 	int i;
+	int j = 0;
 
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
 
 	/* this does assume each counter is 64-bit wide */
+/* TODO change this for multiple queues */
 
-	base = (u8 *)&adapter->tqd_start->stats;
+	base = (u8 *)&adapter->tqd_start[j].stats;
 	for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++)
 		*buf++ = *(u64 *)(base + vmxnet3_tq_dev_stats[i].offset);
 
-	base = (u8 *)&adapter->tx_queue.stats;
+	base = (u8 *)&adapter->tx_queue[j].stats;
 	for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats); i++)
 		*buf++ = *(u64 *)(base + vmxnet3_tq_driver_stats[i].offset);
 
-	base = (u8 *)&adapter->rqd_start->stats;
+	base = (u8 *)&adapter->rqd_start[j].stats;
 	for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++)
 		*buf++ = *(u64 *)(base + vmxnet3_rq_dev_stats[i].offset);
 
-	base = (u8 *)&adapter->rx_queue.stats;
+	base = (u8 *)&adapter->rx_queue[j].stats;
 	for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats); i++)
 		*buf++ = *(u64 *)(base + vmxnet3_rq_driver_stats[i].offset);
 
@@ -341,6 +341,7 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	u32 *buf = p;
+	int i = 0;
 
 	memset(p, 0, vmxnet3_get_regs_len(netdev));
 
@@ -349,28 +350,29 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 	/* Update vmxnet3_get_regs_len if we want to dump more registers */
 
 	/* make each ring use multiple of 16 bytes */
-	buf[0] = adapter->tx_queue.tx_ring.next2fill;
-	buf[1] = adapter->tx_queue.tx_ring.next2comp;
-	buf[2] = adapter->tx_queue.tx_ring.gen;
+/* TODO change this for multiple queues */
+	buf[0] = adapter->tx_queue[i].tx_ring.next2fill;
+	buf[1] = adapter->tx_queue[i].tx_ring.next2comp;
+	buf[2] = adapter->tx_queue[i].tx_ring.gen;
 	buf[3] = 0;
 
-	buf[4] = adapter->tx_queue.comp_ring.next2proc;
-	buf[5] = adapter->tx_queue.comp_ring.gen;
-	buf[6] = adapter->tx_queue.stopped;
+	buf[4] = adapter->tx_queue[i].comp_ring.next2proc;
+	buf[5] = adapter->tx_queue[i].comp_ring.gen;
+	buf[6] = adapter->tx_queue[i].stopped;
 	buf[7] = 0;
 
-	buf[8] = adapter->rx_queue.rx_ring[0].next2fill;
-	buf[9] = adapter->rx_queue.rx_ring[0].next2comp;
-	buf[10] = adapter->rx_queue.rx_ring[0].gen;
+	buf[8] = adapter->rx_queue[i].rx_ring[0].next2fill;
+	buf[9] = adapter->rx_queue[i].rx_ring[0].next2comp;
+	buf[10] = adapter->rx_queue[i].rx_ring[0].gen;
 	buf[11] = 0;
 
-	buf[12] = adapter->rx_queue.rx_ring[1].next2fill;
-	buf[13] = adapter->rx_queue.rx_ring[1].next2comp;
-	buf[14] = adapter->rx_queue.rx_ring[1].gen;
+	buf[12] = adapter->rx_queue[i].rx_ring[1].next2fill;
+	buf[13] = adapter->rx_queue[i].rx_ring[1].next2comp;
+	buf[14] = adapter->rx_queue[i].rx_ring[1].gen;
 	buf[15] = 0;
 
-	buf[16] = adapter->rx_queue.comp_ring.next2proc;
-	buf[17] = adapter->rx_queue.comp_ring.gen;
+	buf[16] = adapter->rx_queue[i].comp_ring.next2proc;
+	buf[17] = adapter->rx_queue[i].comp_ring.gen;
 	buf[18] = 0;
 	buf[19] = 0;
 }
@@ -437,8 +439,10 @@ vmxnet3_get_ringparam(struct net_device *netdev,
 	param->rx_mini_max_pending = 0;
 	param->rx_jumbo_max_pending = 0;
 
-	param->rx_pending = adapter->rx_queue.rx_ring[0].size;
-	param->tx_pending = adapter->tx_queue.tx_ring.size;
+	param->rx_pending = adapter->rx_queue[0].rx_ring[0].size *
+			    adapter->num_rx_queues;
+	param->tx_pending = adapter->tx_queue[0].tx_ring.size *
+			    adapter->num_tx_queues;
 	param->rx_mini_pending = 0;
 	param->rx_jumbo_pending = 0;
 }
@@ -482,8 +486,8 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 							   sz) != 0)
 		return -EINVAL;
 
-	if (new_tx_ring_size == adapter->tx_queue.tx_ring.size &&
-			new_rx_ring_size == adapter->rx_queue.rx_ring[0].size) {
+	if (new_tx_ring_size == adapter->tx_queue[0].tx_ring.size &&
+	    new_rx_ring_size == adapter->rx_queue[0].rx_ring[0].size) {
 		return 0;
 	}
 
@@ -500,11 +504,12 @@ vmxnet3_set_ringparam(struct net_device *netdev,
 
 		/* recreate the rx queue and the tx queue based on the
 		 * new sizes */
-		vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
-		vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
+		vmxnet3_tq_destroy_all(adapter);
+		vmxnet3_rq_destroy_all(adapter);
 
 		err = vmxnet3_create_queues(adapter, new_tx_ring_size,
 			new_rx_ring_size, VMXNET3_DEF_RX_RING_SIZE);
+
 		if (err) {
 			/* failed, most likely because of OOM, try default
 			 * size */
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index f4ec597..c7f8332 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -68,11 +68,15 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.0.14.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.0.16.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01000E00
+#define VMXNET3_DRIVER_VERSION_NUM      0x01001000
 
+#if defined(CONFIG_PCI_MSI)
+	/* RSS only makes sense if MSI-X is supported. */
+	#define VMXNET3_RSS
+#endif
 
 /*
  * Capabilities
@@ -225,16 +229,19 @@ struct vmxnet3_tx_ctx {
 };
 
 struct vmxnet3_tx_queue {
+	char			name[IFNAMSIZ+8]; /* To identify interrupt */
+	struct vmxnet3_adapter		*adapter;
 	spinlock_t                      tx_lock;
 	struct vmxnet3_cmd_ring         tx_ring;
-	struct vmxnet3_tx_buf_info     *buf_info;
+	struct vmxnet3_tx_buf_info      *buf_info;
 	struct vmxnet3_tx_data_ring     data_ring;
 	struct vmxnet3_comp_ring        comp_ring;
-	struct Vmxnet3_TxQueueCtrl            *shared;
+	struct Vmxnet3_TxQueueCtrl      *shared;
 	struct vmxnet3_tq_driver_stats  stats;
 	bool                            stopped;
 	int                             num_stop;  /* # of times the queue is
 						    * stopped */
+	int				qid;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 enum vmxnet3_rx_buf_type {
@@ -266,6 +273,9 @@ struct vmxnet3_rq_driver_stats {
 };
 
 struct vmxnet3_rx_queue {
+	char			name[IFNAMSIZ + 8]; /* To identify interrupt */
+	struct vmxnet3_adapter	  *adapter;
+	struct napi_struct        napi;
 	struct vmxnet3_cmd_ring   rx_ring[2];
 	struct vmxnet3_comp_ring  comp_ring;
 	struct vmxnet3_rx_ctx     rx_ctx;
@@ -279,7 +289,16 @@ struct vmxnet3_rx_queue {
 	struct sk_buff			*spare_skb;      /* starvation skb */
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
-#define VMXNET3_LINUX_MAX_MSIX_VECT     1
+#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
+#define VMXNET3_DEVICE_MAX_RX_QUEUES 8   /* Keep this value as a power of 2 */
+
+/* Should be less than UPT1_RSS_MAX_IND_TABLE_SIZE */
+#define VMXNET3_RSS_IND_TABLE_SIZE (VMXNET3_DEVICE_MAX_RX_QUEUES * 4)
+
+#define VMXNET3_LINUX_MAX_MSIX_VECT     (VMXNET3_DEVICE_MAX_TX_QUEUES + \
+					 VMXNET3_DEVICE_MAX_RX_QUEUES + 1)
+#define VMXNET3_LINUX_MIN_MSIX_VECT     3    /* 1 for each : tx, rx and event */
+
 
 struct vmxnet3_intr {
 	enum vmxnet3_intr_mask_mode  mask_mode;
@@ -287,28 +306,32 @@ struct vmxnet3_intr {
 	u8  num_intrs;			/* # of intr vectors */
 	u8  event_intr_idx;		/* idx of the intr vector for event */
 	u8  mod_levels[VMXNET3_LINUX_MAX_MSIX_VECT]; /* moderation level */
+	char	event_msi_vector_name[IFNAMSIZ+11];
 #ifdef CONFIG_PCI_MSI
 	struct msix_entry msix_entries[VMXNET3_LINUX_MAX_MSIX_VECT];
 #endif
 };
 
+/* Interrupt sharing schemes, share_intr */
+#define VMXNET3_INTR_DONTSHARE 0     /* each queue has its own irq */
+#define VMXNET3_INTR_TXSHARE 1	     /* All tx queues share one irq */
+#define VMXNET3_INTR_BUDDYSHARE 2    /* Corresponding tx,rx queues share irq */
+
 #define VMXNET3_STATE_BIT_RESETTING   0
 #define VMXNET3_STATE_BIT_QUIESCED    1
-struct vmxnet3_adapter {
-	struct vmxnet3_tx_queue         tx_queue;
-	struct vmxnet3_rx_queue         rx_queue;
-	struct napi_struct              napi;
-	struct vlan_group              *vlan_grp;
-
-	struct vmxnet3_intr             intr;
-
-	struct Vmxnet3_DriverShared    *shared;
-	struct Vmxnet3_PMConf          *pm_conf;
-	struct Vmxnet3_TxQueueDesc     *tqd_start;     /* first tx queue desc */
-	struct Vmxnet3_RxQueueDesc     *rqd_start;     /* first rx queue desc */
-	struct net_device              *netdev;
-	struct pci_dev                 *pdev;
 
+struct vmxnet3_adapter {
+	struct vmxnet3_tx_queue		tx_queue[VMXNET3_DEVICE_MAX_TX_QUEUES];
+	struct vmxnet3_rx_queue		rx_queue[VMXNET3_DEVICE_MAX_RX_QUEUES];
+	struct vlan_group		*vlan_grp;
+	struct vmxnet3_intr		intr;
+	struct Vmxnet3_DriverShared	*shared;
+	struct Vmxnet3_PMConf		*pm_conf;
+	struct Vmxnet3_TxQueueDesc	*tqd_start;     /* all tx queue desc */
+	struct Vmxnet3_RxQueueDesc	*rqd_start;	/* all rx queue desc */
+	struct net_device		*netdev;
+	struct net_device_stats		net_stats;
+	struct pci_dev			*pdev;
 	u8				*hw_addr0; /* for BAR 0 */
 	u8				*hw_addr1; /* for BAR 1 */
 
@@ -316,6 +339,12 @@ struct vmxnet3_adapter {
 	bool				rxcsum;
 	bool				lro;
 	bool				jumbo_frame;
+#ifdef VMXNET3_RSS
+	struct UPT1_RSSConf		*rss_conf;
+	bool				rss;
+#endif
+	u32				num_rx_queues;
+	u32				num_tx_queues;
 
 	/* rx buffer related */
 	unsigned			skb_buf_size;
@@ -335,6 +364,7 @@ struct vmxnet3_adapter {
 	unsigned long  state;    /* VMXNET3_STATE_BIT_xxx */
 
 	int dev_number;
+	int share_intr;
 };
 
 #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val)  \
@@ -378,12 +408,10 @@ void
 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter);
 
 void
-vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
-		   struct vmxnet3_adapter *adapter);
+vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter);
 
 void
-vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
-		   struct vmxnet3_adapter *adapter);
+vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter);
 
 int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter,


^ permalink raw reply related

* Re: [PATCH net-next] net:  allocate skbs on local node
From: Christoph Lameter @ 2010-10-13 21:43 UTC (permalink / raw)
  To: David Rientjes
  Cc: Pekka Enberg, Andrew Morton, Eric Dumazet, David Miller, netdev,
	Michael Chan, Eilon Greenstein, Christoph Hellwig, LKML,
	Nick Piggin
In-Reply-To: <alpine.DEB.2.00.1010131342310.15185@chino.kir.corp.google.com>

On Wed, 13 Oct 2010, David Rientjes wrote:

> > Basically have slab.c with the basic functions and then slab_queueing.c
> > and slab_noqueue.c for SLAB/SLUB with the particulars of the allocation
> > strategy?
> >
>
> I was going to mention that as an idea, but I thought storing the metadata
> for certain debugging features might differ from the two allocators so
> substantially that it would be even more convoluted and difficult to
> maintain?

We could have some callbacks to store allocator specific metadata?

^ permalink raw reply

* Re: [PATCH] Documentation: Update Phonet doc for Pipe controller changes
From: David Miller @ 2010-10-13 21:41 UTC (permalink / raw)
  To: remi
  Cc: kumar.sanghvi, remi.denis-courmont, netdev, linus.walleij,
	gulshan.karmani, sudeep.divakaran
In-Reply-To: <c0eb9fcfec2efac3da8ce45c4dbd23cb@chewa.net>

From: Rémi Denis-Courmont <remi@remlab.net>
Date: Wed, 13 Oct 2010 09:19:51 +0200

> 
> 
> 
> On Wed, 13 Oct 2010 11:47:25 +0530, Kumar A Sanghvi
> <kumar.sanghvi@stericsson.com> wrote:
>> From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
>> 
>> Updates to Phonet doc for Pipe controller 'connect' socket
>> implementation and changes related to socket options.
>> 
>> Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
> 
> Acked-by Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Applied.

^ permalink raw reply

* Re: [PATCH V2] Phonet: 'connect' socket implementation for Pipe controller
From: David Miller @ 2010-10-13 21:41 UTC (permalink / raw)
  To: remi
  Cc: kumar.sanghvi, remi.denis-courmont, netdev, linus.walleij,
	gulshan.karmani, sudeep.divakaran
In-Reply-To: <ec51036a9c6fbbb550fa5ef7ae7f13e3@chewa.net>

From: Rémi Denis-Courmont <remi@remlab.net>
Date: Wed, 13 Oct 2010 09:18:51 +0200

>> Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
> 
> Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Applied.

^ permalink raw reply

* Re: [PATCH] b44: fix resume, request_irq after hw reset
From: James Hogan @ 2010-10-13 21:39 UTC (permalink / raw)
  To: David Miller
  Cc: zambrano, jpirko, fujita.tomonori, hauke, Larry.Finger, netdev,
	linux-kernel
In-Reply-To: <20101013.094659.226765041.davem@davemloft.net>

On Wednesday 13 October 2010 17:46:59 David Miller wrote:
> From: James Hogan <james@albanarts.com>
> Date: Tue, 12 Oct 2010 00:22:12 +0100
> 
> > @@ -2309,6 +2303,12 @@ static int b44_resume(struct ssb_device *sdev)
> > 
> >  	netif_device_attach(bp->dev);
> >  	spin_unlock_irq(&bp->lock);
> > 
> > +	rc = request_irq(dev->irq, b44_interrupt, IRQF_SHARED, dev->name, 
dev);
> > +	if (rc) {
> > +		netdev_err(dev, "request_irq failed\n");
> > +		return rc;
> > +	}
> > +
> > 
> >  	b44_enable_ints(bp);
> >  	netif_wake_queue(dev);
> 
> Since you've moved the request_irq() down, you'll need to adjust
> the error handling so that it undoes side effects made by this
> function up until this point.
> 
> F.e. netif_device_attach() has to be undone for one thing.
> 
> Next, b44_init_rings() allocates memory that you must now free.
> 
> Etc. etc. etc.
> 
> This change is not so simple. :-)

Very good point!

Does the ssb_bus_powerup need undoing as well? I'm guessing not since it
wasn't undone before.

How's the patch (at the bottom) looking? it does some better error handling 
and leaves the netif_device_attach(bp->dev); until after the irq is obtained.

I just noticed I actually get the following in my log after resume, so it 
appears something's going wrong even without the request_irq failing. Any idea 
what could be causing the padding to be overwritten? (my experience of net 
drivers and DMA are both non existent). If the net device is closed before 
suspend, this happens on open instead of resume.

Thanks
James

=============================================================================
BUG kmalloc_dma-2048: Padding overwritten. 
0xffff88000003fda8-0xffff88000003fdff
-----------------------------------------------------------------------------

INFO: Slab 0xffffea0000000c40 objects=15 used=1 fp=0xffff880000038000 
flags=0x40c1
Pid: 21848, comm: bash Not tainted 2.6.36-rc7-custom+ #18
Call Trace:
 [<ffffffff8111582d>] slab_err+0xaa/0xcc
 [<ffffffff81006666>] ? xen_set_pud+0x18/0x49
 [<ffffffff811171f4>] ? unfreeze_slab+0x53/0xb0
 [<ffffffff8111756c>] ? get_partial_node+0x20/0x79
 [<ffffffff81115cbd>] slab_pad_check+0xd2/0x124
 [<ffffffff81115da4>] check_slab+0x95/0x9c
 [<ffffffff811178eb>] __slab_alloc+0x326/0x42a
 [<ffffffff813d9096>] ? __netdev_alloc_skb+0x34/0x52
 [<ffffffff812467c6>] ? should_fail+0x91/0xf3
 [<ffffffff81119abb>] __kmalloc_node_track_caller+0x115/0x193
 [<ffffffff813d9096>] ? __netdev_alloc_skb+0x34/0x52
 [<ffffffff813d8076>] __alloc_skb+0x83/0x141
 [<ffffffff813d9096>] __netdev_alloc_skb+0x34/0x52
 [<ffffffffa030f6cf>] b44_alloc_rx_skb+0xf9/0x247 [b44]
 [<ffffffffa03b8000>] ? ssb_device_resume+0x0/0x36 [ssb]
 [<ffffffffa030f8b0>] b44_init_rings+0x93/0xa8 [b44]
 [<ffffffffa030fab3>] b44_resume+0x86/0x142 [b44]
 [<ffffffffa03b8030>] ssb_device_resume+0x30/0x36 [ssb]
 [<ffffffff813031df>] legacy_resume+0x24/0x5c
 [<ffffffff81303b9e>] device_resume+0xcd/0x1ba
 [<ffffffff81303dc3>] dpm_resume_end+0x138/0x3d8
 [<ffffffff8108db83>] suspend_devices_and_enter+0x1ba/0x203
 [<ffffffff8108dcb3>] enter_state+0xe7/0x12e
 [<ffffffff8108d3a5>] state_store+0xb6/0xd3
 [<ffffffff81235877>] kobj_attr_store+0x17/0x19
 [<ffffffff81183223>] sysfs_write_file+0x108/0x144
 [<ffffffff81126330>] vfs_write+0xae/0x10a
 [<ffffffff8112644f>] sys_write+0x4d/0x74
 [<ffffffff81009c32>] system_call_fastpath+0x16/0x1b
 Padding 0xffff88000003fa38:  5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 
ZZZZZZZZZZZZZZZZ
  <snip>
 Padding 0xffff88000003fd98:  5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 
ZZZZZZZZZZZZZZZZ
 Padding 0xffff88000003fda8:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
................
 Padding 0xffff88000003fdb8:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
................
 Padding 0xffff88000003fdc8:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
................
 Padding 0xffff88000003fdd8:  00 00 00 00 5a 5a 5a 5a 00 00 00 00 00 00 00 00 
....ZZZZ........
 Padding 0xffff88000003fde8:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
................
 Padding 0xffff88000003fdf8:  64 2a 8b 00 00 00 00 00                         
d*......        




---
 drivers/net/b44.c |   12 +++++++++---
 1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 1e620e2..5fd251c 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -2296,16 +2296,22 @@ static int b44_resume(struct ssb_device *sdev)
 	if (!netif_running(dev))
 		return 0;
 
+	spin_lock_irq(&bp->lock);
+	b44_init_rings(bp);
+	b44_init_hw(bp, B44_FULL_RESET);
+	spin_unlock_irq(&bp->lock);
+
 	rc = request_irq(dev->irq, b44_interrupt, IRQF_SHARED, dev->name, dev);
 	if (rc) {
 		netdev_err(dev, "request_irq failed\n");
+		spin_lock_irq(&bp->lock);
+		b44_halt(bp);
+		b44_free_rings(bp);
+		spin_unlock_irq(&bp->lock);
 		return rc;
 	}
 
 	spin_lock_irq(&bp->lock);
-
-	b44_init_rings(bp);
-	b44_init_hw(bp, B44_FULL_RESET);
 	netif_device_attach(bp->dev);
 	spin_unlock_irq(&bp->lock);
 
-- 
1.7.2.3


^ permalink raw reply related

* Re: bnx2 is spamming dmesg
From: David Miller @ 2010-10-13 21:37 UTC (permalink / raw)
  To: kmannth; +Cc: netdev, mchan
In-Reply-To: <1287005434.8004.32.camel@keith-laptop>

From: Keith Mannthey <kmannth@us.ibm.com>
Date: Wed, 13 Oct 2010 14:30:34 -0700

>   For the last couple of months (since at least 2.6.35 somewhere) I have
> been seeing 

It's not the bnx2 driver, it's the intel-iommu driver.

This is coming from a pr_debug() statement, so turn off
CONFIG_PCI_DEBUG to get rid of the "DEBUG" define that
gets added to the kernel build command line which causes
that line to print the message unconditionally.


^ permalink raw reply

* Re: bnx2 is spamming dmesg
From: Eric Dumazet @ 2010-10-13 21:36 UTC (permalink / raw)
  To: Keith Mannthey; +Cc: netdev, mchan
In-Reply-To: <1287005434.8004.32.camel@keith-laptop>

Le mercredi 13 octobre 2010 à 14:30 -0700, Keith Mannthey a écrit :
> Hello All,
>   For the last couple of months (since at least 2.6.35 somewhere) I have
> been seeing 
> 
> "
> ....
> [698612.423825] Device 0000:0b:00.0 unmapping: pfn ffc74-ffc74
> [698612.423831] Device 0000:0b:00.0 unmapping: pfn ff96d-ff96d
> [698612.639262] Device 0000:0b:00.0 unmapping: pfn ffc74-ffc74
> [698612.639269] Device 0000:0b:00.0 unmapping: pfn ff96d-ff96d
> ....
> "
> in my dmesg output.  
> 
> "
>  lspci -s 0000:0b:00.0
> 0b:00.0 Ethernet controller: Broadcom Corporation NetXtreme II BCM5709
> Gigabit Ethernet (rev 20)
> "
> 
> The bnx2 driver is loaded on my system.  I am presently on 2.6.36-rc3
> kernels but like I mentioned I have been seeing this for a while.
> 
> 
> Any ideas? I would rather not have this endless message in my kernel
> logs. 
> 

drivers/pci/intel-iommu.c line 2737

pr_debug("Device %s unmapping: pfn %lx-%lx\n",
	pci_name(pdev), start_pfn, last_pfn);

So you asked/activated some debugging, because pr_debug() is not default
enabled...




^ permalink raw reply

* bnx2 is spamming dmesg
From: Keith Mannthey @ 2010-10-13 21:30 UTC (permalink / raw)
  To: netdev; +Cc: mchan

Hello All,
  For the last couple of months (since at least 2.6.35 somewhere) I have
been seeing 

"
....
[698612.423825] Device 0000:0b:00.0 unmapping: pfn ffc74-ffc74
[698612.423831] Device 0000:0b:00.0 unmapping: pfn ff96d-ff96d
[698612.639262] Device 0000:0b:00.0 unmapping: pfn ffc74-ffc74
[698612.639269] Device 0000:0b:00.0 unmapping: pfn ff96d-ff96d
....
"
in my dmesg output.  

"
 lspci -s 0000:0b:00.0
0b:00.0 Ethernet controller: Broadcom Corporation NetXtreme II BCM5709
Gigabit Ethernet (rev 20)
"

The bnx2 driver is loaded on my system.  I am presently on 2.6.36-rc3
kernels but like I mentioned I have been seeing this for a while.


Any ideas? I would rather not have this endless message in my kernel
logs. 


Thanks,
  Keith Mannthey 
  IBM LTC Local Filesystems  





^ permalink raw reply

* Re: [PATCH net-next 5/5] tipc: clean out all instances of #if 0'd unused code
From: David Miller @ 2010-10-13 21:28 UTC (permalink / raw)
  To: paul.gortmaker; +Cc: nhorman, netdev, allan.stephens
In-Reply-To: <4CB5E79B.4060507@windriver.com>

From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 13 Oct 2010 13:08:43 -0400

> This cleanup patch (patch #5) doesn't explicitly depend on
> the other 4 bearer related patches, so it can be applied
> at whatever time is most convenient for Dave.

I've added patch #5 to net-next-2.6, thanks.

^ permalink raw reply

* Re: [PATCH] ehea: Fix a checksum issue on the receive path
From: David Miller @ 2010-10-13 21:25 UTC (permalink / raw)
  To: leitao; +Cc: shemminger, eric.dumazet, netdev, fubar
In-Reply-To: <4CB5BCD6.4000906@linux.vnet.ibm.com>

From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Wed, 13 Oct 2010 11:06:14 -0300

> Just to clarify, this patch that started this thead is not invalidated
> by this "problem". So, I'd like to see this patch committed on your
> tree. Does it make sense?

Yep, I've added the fix to net-2.6, thanks.

^ permalink raw reply

* Re: [RFC PATCH 2/7] vlan: Centralize handling of hardware acceleration.
From: Eric Dumazet @ 2010-10-13 21:12 UTC (permalink / raw)
  To: Jesse Gross; +Cc: davem, netdev
In-Reply-To: <1287000177-7126-3-git-send-email-jesse@nicira.com>

Le mercredi 13 octobre 2010 à 13:02 -0700, Jesse Gross a écrit :
> Currently each driver that is capable of vlan hardware acceleration
> must be aware of the vlan groups that are configured and then pass
> the stripped tag to a specialized receive function.  This is
> different from other types of hardware offload in that it places a
> significant amount of knowledge in the driver itself rather keeping
> it in the networking core.
> 
> This makes vlan offloading function more similarly to other forms
> of offloading (such as checksum offloading or TSO) by doing the
> following:
> * On receive, stripped vlans are passed directly to the network
> core, without attempting to check for vlan groups or reconstructing
> the header if no group
> * vlans are made less special by folding the logic into the main
> receive routines
> * On transmit, the device layer will add the vlan header in software
> if the hardware doesn't support it, instead of spreading that logic
> out in upper layers, such as bonding.
> 
> There are a number of advantages to this:
> * Fixes all bugs with drivers incorrectly dropping vlan headers at once.
> * Avoids having to disable VLAN acceleration when in promiscuous mode
> (good for bridging since it always puts devices in promiscuous mode).
> * Keeps VLAN tag separate until given to ultimate consumer, which
> avoids needing to do header reconstruction as in tg3 unless absolutely
> necessary.
> * Consolidates common code in core networking.
> 
> Signed-off-by: Jesse Gross <jesse@nicira.com>


Hi Jesse !

Very nice and exciting code consolidation, but please read on :)

> ---
>  include/linux/if_vlan.h         |   27 ++++++++-
>  include/linux/netdevice.h       |   12 +++-
>  net/8021q/vlan.c                |  102 ++++++++-----------------------
>  net/8021q/vlan.h                |   17 -----
>  net/8021q/vlan_core.c           |  125 +++++++++------------------------------
>  net/8021q/vlan_dev.c            |    2 +-
>  net/bridge/netfilter/ebt_vlan.c |    4 +-
>  net/core/dev.c                  |   42 ++++++++++++--
>  8 files changed, 129 insertions(+), 202 deletions(-)
> 
> diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
> index a523207..e21028b 100644
> --- a/include/linux/if_vlan.h
> +++ b/include/linux/if_vlan.h
> @@ -68,6 +68,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
>  #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
>  #define VLAN_TAG_PRESENT	VLAN_CFI_MASK
>  #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
> +#define VLAN_N_VID		4096
>  

This should be a patch on its own (change VLAN_GROUP_ARRAY_LEN to
VLAN_N_ID), because this patch is too big.

Please try to not change too many things at once, you remove many
temporary variables and this only makes review very time consuming.

>  /* found in socket.c */
>  extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
> @@ -76,7 +77,7 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
>   * depends on completely exhausting the VLAN identifier space.  Thus
>   * it gives constant time look-up, but in many cases it wastes memory.
>   */
> -#define VLAN_GROUP_ARRAY_LEN          4096
> +#define VLAN_GROUP_ARRAY_LEN          VLAN_N_VID
>  #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
>  #define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS)
>  
> @@ -114,12 +115,24 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
>  #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
>  
>  #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
> +/* Must be invoked with rcu_read_lock or with RTNL. */
> +static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
> +					       u16 vlan_id)
> +{
> +	struct vlan_group *grp = rcu_dereference(real_dev->vlgrp);
> +

This rcu_dereference() doesnt match the comment.

You might want rcu_dereference_rtnl() instead and use CONFIG_PROVE_RCU

> +	if (grp)
> +		return vlan_group_get_device(grp, vlan_id);
> +
> +	return NULL;
> +}
> +
>  extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
>  extern u16 vlan_dev_vlan_id(const struct net_device *dev);
>  
>  extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
>  			     u16 vlan_tci, int polling);
> -extern void vlan_hwaccel_do_receive(struct sk_buff *skb);
> +extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
>  extern gro_result_t
>  vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
>  		 unsigned int vlan_tci, struct sk_buff *skb);
> @@ -128,6 +141,12 @@ vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
>  	       unsigned int vlan_tci);
>  
>  #else
> +static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
> +					       u16 vlan_id)
> +{
> +	return NULL;
> +}
> +
>  static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
>  {
>  	BUG();
> @@ -147,8 +166,10 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
>  	return NET_XMIT_SUCCESS;
>  }
>  
> -static inline void vlan_hwaccel_do_receive(struct sk_buff *skb)
> +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
>  {
> +	BUG();
> +	return 0;
>  }
>  
>  static inline gro_result_t
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 14fbb04..ef4bbcb 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -942,7 +942,10 @@ struct net_device {
>  
> 
>  	/* Protocol specific pointers */
> -	
> +
> +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
> +	struct vlan_group	*vlgrp;		/* VLAN group */
> +#endif
>  #ifdef CONFIG_NET_DSA
>  	void			*dsa_ptr;	/* dsa specific data */
>  #endif
> @@ -2248,8 +2251,13 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
>  
>  static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
>  {
> +	int features = dev->features;
> +
> +	if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
> +		features &= dev->vlan_features;
> +
>  	return skb_is_gso(skb) &&
> -	       (!skb_gso_ok(skb, dev->features) ||
> +	       (!skb_gso_ok(skb, features) ||
>  		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));


Maybe reorder tests to common case, avoiding some uneeded computations
if !skb_is_gso()

	if (skb_is_gso(skb)) {
		int features = dev->features;

		if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
			features &= dev->vlan_features;
		
		return !skb_gso_ok(skb, features) ||
			skb->ip_summed != CHECKSUM_PARTIAL;

	}
	return 0;

>  }
>  
> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
> index 25c2133..77634b9 100644
> --- a/net/8021q/vlan.c
> +++ b/net/8021q/vlan.c
> @@ -44,9 +44,6 @@
>  
>  int vlan_net_id __read_mostly;
>  
> -/* Our listing of VLAN group(s) */
> -static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
> -
>  const char vlan_fullname[] = "802.1Q VLAN Support";
>  const char vlan_version[] = DRV_VERSION;
>  static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
> @@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
>  
>  /* End of global variables definitions. */
>  
> -static inline unsigned int vlan_grp_hashfn(unsigned int idx)
> -{
> -	return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
> -}
> -
> -/* Must be invoked with RCU read lock (no preempt) */
> -static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
> -{
> -	struct vlan_group *grp;
> -	struct hlist_node *n;
> -	int hash = vlan_grp_hashfn(real_dev->ifindex);
> -
> -	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
> -		if (grp->real_dev == real_dev)
> -			return grp;
> -	}
> -
> -	return NULL;
> -}
> -
> -/*  Find the protocol handler.  Assumes VID < VLAN_VID_MASK.
> - *
> - * Must be invoked with RCU read lock (no preempt)
> - */
> -struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
> -{
> -	struct vlan_group *grp = __vlan_find_group(real_dev);
> -
> -	if (grp)
> -		return vlan_group_get_device(grp, vlan_id);
> -
> -	return NULL;
> -}
> -
>  static void vlan_group_free(struct vlan_group *grp)
>  {
>  	int i;
> @@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
>  		return NULL;
>  
>  	grp->real_dev = real_dev;
> -	hlist_add_head_rcu(&grp->hlist,
> -			&vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
>  	return grp;
>  }
>  
> @@ -146,13 +107,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
>  	struct vlan_dev_info *vlan = vlan_dev_info(dev);
>  	struct net_device *real_dev = vlan->real_dev;
>  	const struct net_device_ops *ops = real_dev->netdev_ops;
> -	struct vlan_group *grp;
>  	u16 vlan_id = vlan->vlan_id;
>  
>  	ASSERT_RTNL();
> -
> -	grp = __vlan_find_group(real_dev);
> -	BUG_ON(!grp);
> +	BUG_ON(!real_dev->vlgrp);
>  
>  	/* Take it out of our own structures, but be sure to interlock with
>  	 * HW accelerating devices or SW vlan input packet processing if
> @@ -161,25 +119,26 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
>  	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
>  		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
>  
> -	grp->nr_vlans--;
> +	real_dev->vlgrp->nr_vlans--;
>  
> -	vlan_group_set_device(grp, vlan_id, NULL);
> -	if (!grp->killall)
> +	vlan_group_set_device(real_dev->vlgrp, vlan_id, NULL);
> +	if (!real_dev->vlgrp->killall)
>  		synchronize_net();
>  
>  	unregister_netdevice_queue(dev, head);
>  
>  	/* If the group is now empty, kill off the group. */
> -	if (grp->nr_vlans == 0) {
> -		vlan_gvrp_uninit_applicant(real_dev);
> +	if (real_dev->vlgrp->nr_vlans == 0) {
> +		struct vlan_group *vlgrp = real_dev->vlgrp;
>  
> -		if (real_dev->features & NETIF_F_HW_VLAN_RX)
> +		rcu_assign_pointer(real_dev->vlgrp, NULL);
> +		if (ops->ndo_vlan_rx_register)
>  			ops->ndo_vlan_rx_register(real_dev, NULL);
>  
> -		hlist_del_rcu(&grp->hlist);
> +		vlan_gvrp_uninit_applicant(real_dev);
>  
>  		/* Free the group, after all cpu's are done. */
> -		call_rcu(&grp->rcu, vlan_rcu_free);
> +		call_rcu(&vlgrp->rcu, vlan_rcu_free);
>  	}
>  
>  	/* Get rid of the vlan's reference to real_dev */
> @@ -196,18 +155,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
>  		return -EOPNOTSUPP;
>  	}
>  
> -	if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
> -		pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
> -		return -EOPNOTSUPP;
> -	}
> -
>  	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
>  	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
>  		pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
>  		return -EOPNOTSUPP;
>  	}
>  
> -	if (__find_vlan_dev(real_dev, vlan_id) != NULL)
> +	if (vlan_find_dev(real_dev, vlan_id) != NULL)
>  		return -EEXIST;
>  
>  	return 0;
> @@ -222,7 +176,7 @@ int register_vlan_dev(struct net_device *dev)
>  	struct vlan_group *grp, *ngrp = NULL;
>  	int err;
>  
> -	grp = __vlan_find_group(real_dev);
> +	grp = real_dev->vlgrp;
>  	if (!grp) {
>  		ngrp = grp = vlan_group_alloc(real_dev);
>  		if (!grp)
> @@ -252,8 +206,11 @@ int register_vlan_dev(struct net_device *dev)
>  	vlan_group_set_device(grp, vlan_id, dev);
>  	grp->nr_vlans++;
>  
> -	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
> -		ops->ndo_vlan_rx_register(real_dev, ngrp);
> +	if (ngrp) {
> +		if (ops->ndo_vlan_rx_register)
> +			ops->ndo_vlan_rx_register(real_dev, ngrp);
> +		rcu_assign_pointer(real_dev->vlgrp, ngrp);
> +	}
>  	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
>  		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
>  
> @@ -264,7 +221,6 @@ out_uninit_applicant:
>  		vlan_gvrp_uninit_applicant(real_dev);
>  out_free_group:
>  	if (ngrp) {
> -		hlist_del_rcu(&ngrp->hlist);
>  		/* Free the group, after all cpu's are done. */
>  		call_rcu(&ngrp->rcu, vlan_rcu_free);
>  	}
> @@ -428,7 +384,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
>  	}
>  
> -	grp = __vlan_find_group(dev);
> +	grp = dev->vlgrp;
>  	if (!grp)
>  		goto out;
>  
> @@ -439,7 +395,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  	switch (event) {
>  	case NETDEV_CHANGE:
>  		/* Propagate real device state to vlan devices */
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {
>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -450,7 +406,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  
>  	case NETDEV_CHANGEADDR:
>  		/* Adjust unicast filters on underlying device */
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {
>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -464,7 +420,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  		break;
>  
>  	case NETDEV_CHANGEMTU:
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {
>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -478,7 +434,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  
>  	case NETDEV_FEAT_CHANGE:
>  		/* Propagate device features to underlying device */
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {

cleanup patch please


>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -490,7 +446,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  
>  	case NETDEV_DOWN:
>  		/* Put all VLANs for this dev in the down state too.  */
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {

cleanup patch please

>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -508,7 +464,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  
>  	case NETDEV_UP:
>  		/* Put all VLANs for this dev in the up state too.  */
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {

cleanup patch please

>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -532,7 +488,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  		/* Delete all VLANs for this dev. */
>  		grp->killall = 1;
>  
> -		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
> +		for (i = 0; i < VLAN_N_VID; i++) {

cleanup patch please

>  			vlandev = vlan_group_get_device(grp, i);
>  			if (!vlandev)
>  				continue;
> @@ -540,7 +496,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>  			/* unregistration of last vlan destroys group, abort
>  			 * afterwards */
>  			if (grp->nr_vlans == 1)
> -				i = VLAN_GROUP_ARRAY_LEN;
> +				i = VLAN_N_VID;
>  
>  			unregister_vlan_dev(vlandev, &list);
>  		}
> @@ -746,8 +702,6 @@ err0:
>  
>  static void __exit vlan_cleanup_module(void)
>  {
> -	unsigned int i;
> -
>  	vlan_ioctl_set(NULL);
>  	vlan_netlink_fini();
>  
> @@ -755,10 +709,6 @@ static void __exit vlan_cleanup_module(void)
>  
>  	dev_remove_pack(&vlan_packet_type);
>  
> -	/* This table must be empty if there are no module references left. */
> -	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
> -		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
> -
>  	unregister_pernet_subsys(&vlan_net_ops);
>  	rcu_barrier(); /* Wait for completion of call_rcu()'s */
>  
> diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
> index 8d9503a..db01b31 100644
> --- a/net/8021q/vlan.h
> +++ b/net/8021q/vlan.h
> @@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
>  	return netdev_priv(dev);
>  }
>  
> -#define VLAN_GRP_HASH_SHIFT	5
> -#define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
> -#define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
> -
> -/*  Find a VLAN device by the MAC address of its Ethernet device, and
> - *  it's VLAN ID.  The default configuration is to have VLAN's scope
> - *  to be box-wide, so the MAC will be ignored.  The mac will only be
> - *  looked at if we are configured to have a separate set of VLANs per
> - *  each MAC addressable interface.  Note that this latter option does
> - *  NOT follow the spec for VLANs, but may be useful for doing very
> - *  large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
> - *
> - *  Must be invoked with rcu_read_lock (ie preempt disabled)
> - *  or with RTNL.
> - */
> -struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
> -
>  /* found in vlan_dev.c */
>  int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
>  		  struct packet_type *ptype, struct net_device *orig_dev);
> diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
> index dee727c..df90412 100644
> --- a/net/8021q/vlan_core.c
> +++ b/net/8021q/vlan_core.c
> @@ -4,54 +4,33 @@
>  #include <linux/netpoll.h>
>  #include "vlan.h"
>  
> -/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
> -int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
> -		      u16 vlan_tci, int polling)
> +int vlan_hwaccel_do_receive(struct sk_buff *skb)
>  {
> +	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
>  	struct net_device *vlan_dev;
> -	u16 vlan_id;
> -
> -	if (netpoll_rx(skb))
> -		return NET_RX_DROP;
> -
> -	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
> -		skb->deliver_no_wcard = 1;
> -
> -	skb->skb_iif = skb->dev->ifindex;
> -	__vlan_hwaccel_put_tag(skb, vlan_tci);
> -	vlan_id = vlan_tci & VLAN_VID_MASK;
> -	vlan_dev = vlan_group_get_device(grp, vlan_id);
> +	struct vlan_rx_stats *rx_stats;
>  
> -	if (vlan_dev)
> -		skb->dev = vlan_dev;
> -	else if (vlan_id) {
> -		if (!(skb->dev->flags & IFF_PROMISC))
> -			goto drop;
> -		skb->pkt_type = PACKET_OTHERHOST;
> +	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
> +	if (!vlan_dev) {
> +		if (vlan_id)
> +			skb->pkt_type = PACKET_OTHERHOST;
> +		return NET_RX_SUCCESS;
>  	}
>  
> -	return polling ? netif_receive_skb(skb) : netif_rx(skb);
> -
> -drop:
> -	atomic_long_inc(&skb->dev->rx_dropped);
> -	dev_kfree_skb_any(skb);
> -	return NET_RX_DROP;
> -}
> -EXPORT_SYMBOL(__vlan_hwaccel_rx);
> -
> -void vlan_hwaccel_do_receive(struct sk_buff *skb)
> -{
> -	struct net_device *dev = skb->dev;

this temporary variable was nice for a better code readability

> -	struct vlan_rx_stats     *rx_stats;
> +	if (netpoll_receive_skb(skb))
> +		return NET_RX_DROP;
>  
> -	skb->dev = vlan_dev_real_dev(dev);

>  	netif_nit_deliver(skb);
Strange you dont change netif_nit_deliver() ?

>  
> -	skb->dev = dev;
> -	skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
> +	skb->skb_iif = skb->dev->ifindex;
> +	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
> +		skb->deliver_no_wcard = 1;
> +
> +	skb->dev = vlan_dev;
> +	skb->priority = vlan_get_ingress_priority(skb->dev, skb->vlan_tci);
>  	skb->vlan_tci = 0;
>  
> -	rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
> +	rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);

vlan_dev here, instead of skb->dev ?

>  
>  	u64_stats_update_begin(&rx_stats->syncp);
>  	rx_stats->rx_packets++;
> @@ -68,11 +47,13 @@ void vlan_hwaccel_do_receive(struct sk_buff *skb)
>  		 * This allows the VLAN to have a different MAC than the
>  		 * underlying device, and still route correctly. */
>  		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
> -					dev->dev_addr))
> +					skb->dev->dev_addr))

all this skb->dev->... are really hard to understand

>  			skb->pkt_type = PACKET_HOST;
>  		break;
>  	}
>  	u64_stats_update_end(&rx_stats->syncp);
> +
> +	return NET_RX_SUCCESS;
>  }
>  
>  struct net_device *vlan_dev_real_dev(const struct net_device *dev)
> @@ -87,75 +68,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
>  }
>  EXPORT_SYMBOL(vlan_dev_vlan_id);
>  
> -static gro_result_t
> -vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
> -		unsigned int vlan_tci, struct sk_buff *skb)
> +/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
> +int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
> +		      u16 vlan_tci, int polling)
>  {
> -	struct sk_buff *p;
> -	struct net_device *vlan_dev;
> -	u16 vlan_id;
> -
> -	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
> -		skb->deliver_no_wcard = 1;
> -
> -	skb->skb_iif = skb->dev->ifindex;
>  	__vlan_hwaccel_put_tag(skb, vlan_tci);
> -	vlan_id = vlan_tci & VLAN_VID_MASK;
> -	vlan_dev = vlan_group_get_device(grp, vlan_id);
> -
> -	if (vlan_dev)
> -		skb->dev = vlan_dev;
> -	else if (vlan_id) {
> -		if (!(skb->dev->flags & IFF_PROMISC))
> -			goto drop;
> -		skb->pkt_type = PACKET_OTHERHOST;
> -	}
> -
> -	for (p = napi->gro_list; p; p = p->next) {
> -		unsigned long diffs;
> -
> -		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
> -		diffs |= compare_ether_header(skb_mac_header(p),
> -					      skb_gro_mac_header(skb));
> -		NAPI_GRO_CB(p)->same_flow = !diffs;
> -		NAPI_GRO_CB(p)->flush = 0;
> -	}
> -
> -	return dev_gro_receive(napi, skb);
> -
> -drop:
> -	atomic_long_inc(&skb->dev->rx_dropped);
> -	return GRO_DROP;
> +	return polling ? netif_receive_skb(skb) : netif_rx(skb);
>  }
> +EXPORT_SYMBOL(__vlan_hwaccel_rx);
>  
>  gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
>  			      unsigned int vlan_tci, struct sk_buff *skb)
>  {
> -	if (netpoll_rx_on(skb))
> -		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
> -			? GRO_DROP : GRO_NORMAL;
> -
> -	skb_gro_reset_offset(skb);
> -
> -	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
> +	__vlan_hwaccel_put_tag(skb, vlan_tci);
> +	return napi_gro_receive(napi, skb);
>  }
>  EXPORT_SYMBOL(vlan_gro_receive);
>  
>  gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
>  			    unsigned int vlan_tci)
>  {
> -	struct sk_buff *skb = napi_frags_skb(napi);
> -
> -	if (!skb)
> -		return GRO_DROP;
> -
> -	if (netpoll_rx_on(skb)) {
> -		skb->protocol = eth_type_trans(skb, skb->dev);
> -		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
> -			? GRO_DROP : GRO_NORMAL;
> -	}
> -
> -	return napi_frags_finish(napi, skb,
> -				 vlan_gro_common(napi, grp, vlan_tci, skb));
> +	__vlan_hwaccel_put_tag(napi->skb, vlan_tci);
> +	return napi_gro_frags(napi);
>  }
>  EXPORT_SYMBOL(vlan_gro_frags);
> diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
> index f54251e..14e3d1f 100644
> --- a/net/8021q/vlan_dev.c
> +++ b/net/8021q/vlan_dev.c
> @@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
>  	vlan_id = vlan_tci & VLAN_VID_MASK;
>  
>  	rcu_read_lock();
> -	vlan_dev = __find_vlan_dev(dev, vlan_id);
> +	vlan_dev = vlan_find_dev(dev, vlan_id);
>  
>  	/* If the VLAN device is defined, we use it.
>  	 * If not, and the VID is 0, it is a 802.1p packet (not
> diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
> index a39d92d..e724720 100644
> --- a/net/bridge/netfilter/ebt_vlan.c
> +++ b/net/bridge/netfilter/ebt_vlan.c
> @@ -119,10 +119,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
>  	 * 0 - The null VLAN ID.
>  	 * 1 - The default Port VID (PVID)
>  	 * 0x0FFF - Reserved for implementation use.
> -	 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
> +	 * if_vlan.h: VLAN_N_VID 4096. */
>  	if (GET_BITMASK(EBT_VLAN_ID)) {
>  		if (!!info->id) { /* if id!=0 => check vid range */
> -			if (info->id > VLAN_GROUP_ARRAY_LEN) {
> +			if (info->id > VLAN_N_VID) {
>  				pr_debug("id %d is out of range (1-4096)\n",
>  					 info->id);
>  				return -EINVAL;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 04972a4..9586aff 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1692,7 +1692,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
>  
>  static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
>  {
> -	if (can_checksum_protocol(dev->features, skb->protocol))
> +	int features = dev->features;
> +
> +	if (vlan_tx_tag_present(skb))
> +		features &= dev->vlan_features;
> +
> +	if (can_checksum_protocol(features, skb->protocol))
>  		return true;
>  
>  	if (skb->protocol == htons(ETH_P_8021Q)) {
> @@ -1791,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
>  	__be16 type = skb->protocol;
>  	int err;
>  
> +	if (type == htons(ETH_P_8021Q)) {
> +		struct vlan_ethhdr *veh;
> +
> +		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
> +			return ERR_PTR(-EINVAL);
> +
> +		veh = (struct vlan_ethhdr *)skb->data;
> +		type = veh->h_vlan_encapsulated_proto;
> +	}
> +
>  	skb_reset_mac_header(skb);
>  	skb->mac_len = skb->network_header - skb->mac_header;
>  	__skb_pull(skb, skb->mac_len);
> @@ -1962,9 +1977,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
>  static inline int skb_needs_linearize(struct sk_buff *skb,
>  				      struct net_device *dev)
>  {
> +	int features = dev->features;
> +
> +	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
> +		features &= dev->vlan_features;
> +
>  	return skb_is_nonlinear(skb) &&
> -	       ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
> -	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
> +	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
> +	        (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
>  					      illegal_highdma(dev, skb))));
>  }
>  
> @@ -1987,6 +2007,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  
>  		skb_orphan_try(skb);
>  
> +		if (vlan_tx_tag_present(skb) &&
> +		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
> +			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
> +			if (unlikely(!skb))
> +				goto out;
> +
> +			skb->vlan_tci = 0;
> +		}
> +
>  		if (netif_needs_gso(dev, skb)) {
>  			if (unlikely(dev_gso_segment(skb)))
>  				goto out_kfree_skb;
> @@ -2048,6 +2077,7 @@ out_kfree_gso_skb:
>  		skb->destructor = DEV_GSO_CB(skb)->destructor;
>  out_kfree_skb:
>  	kfree_skb(skb);
> +out:
>  	return rc;
>  }
>  
> @@ -2893,8 +2923,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
>  	if (!netdev_tstamp_prequeue)
>  		net_timestamp_check(skb);
>  
> -	if (vlan_tx_tag_present(skb))
> -		vlan_hwaccel_do_receive(skb);
> +	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
> +		return NET_RX_DROP;
>  
>  	/* if we've gotten here through NAPI, check netpoll */
>  	if (netpoll_receive_skb(skb))
> @@ -3232,6 +3262,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>  		unsigned long diffs;
>  
>  		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
> +		diffs |= p->vlan_tci ^ skb->vlan_tci;
>  		diffs |= compare_ether_header(skb_mac_header(p),
>  					      skb_gro_mac_header(skb));
>  		NAPI_GRO_CB(p)->same_flow = !diffs;
> @@ -3291,6 +3322,7 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
>  {
>  	__skb_pull(skb, skb_headlen(skb));
>  	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
> +	skb->vlan_tci = 0;
>  
>  	napi->skb = skb;
>  }


I believe this stuff is a great idea, but you should take more time to
make your patches more understandable.

Given 2.6.36 is about to be released, and Netfilter Workshop 2010 begins
in few days, there is no hurry, because there is no chance we add so
many fundamental changes before three weeks at least.

I believe this patch (2/7), should be split in small units, maybe 3 or 4
different patches.

Thanks



^ permalink raw reply

* Re: [PATCH net-next] net:  allocate skbs on local node
From: David Rientjes @ 2010-10-13 20:48 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Pekka Enberg, Andrew Morton, Eric Dumazet, David Miller, netdev,
	Michael Chan, Eilon Greenstein, Christoph Hellwig, LKML,
	Nick Piggin
In-Reply-To: <alpine.DEB.2.00.1010131056180.29099@router.home>

On Wed, 13 Oct 2010, Christoph Lameter wrote:

> > Will you be adding the extensive slub debugging to slab then?  It would be
> > a shame to lose it because one allocator is chosen over another for
> > performance reasons and then we need to recompile to debug issues as they
> > arise.
> 
> Well basically we would copy SLUB to SLAB apply unification patches to
> SLAB instead of SLUBB. We first have to make sure that the unified patches
> have the same performance as SLAB.
> 

I see, so all of the development will be done in Pekka's tree on mm/slub.c 
and then when we can see no performance regression compared to the slab 
baseline, merge it into Linus' tree as mm/slab.c.  I'm not exactly sure 
how that set of diffs being sent to Linus would look.

Are the changes to slub in the unification patchset so intrusive that it 
wouldn't be possible to isolate many of the features under #ifdef or 
boot-time options in a single, truly unified, allocator?  It seems like a 
shame that we'll have two allocators where the base is the same and much 
of the debugging code is the same.

> It maybe much better to isolate the debug features and general bootstrap
> from the particulars of the allocation strategy of either SLUB or SLAB.
> That way a common code base exists and it would be easier to add different
> allocation strategies.
> 
> Basically have slab.c with the basic functions and then slab_queueing.c
> and slab_noqueue.c for SLAB/SLUB with the particulars of the allocation
> strategy?
> 

I was going to mention that as an idea, but I thought storing the metadata 
for certain debugging features might differ from the two allocators so 
substantially that it would be even more convoluted and difficult to 
maintain?

^ permalink raw reply

* [PATCH] secmark: do not return early if there was no error
From: Eric Paris @ 2010-10-13 20:21 UTC (permalink / raw)
  To: netfilter-devel, netfilter, coreteam, netdev
  Cc: kaber, davem, jengelh, paul.moore, jmorris

Commit 4a5a5c73 attempted to pass decent error messages back to userspace for
netfilter errors.  In xt_SECMARK.c however the patch screwed up and returned
on 0 (aka no error) early and didn't finish setting up secmark.  This results
in a kernel BUG if you use SECMARK.

------------[ cut here ]------------
kernel BUG at net/netfilter/xt_SECMARK.c:38!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/devices/system/cpu/cpu2/cache/index2/shared_cpu_map
CPU 0
Modules linked in: xt_SECMARK iptable_mangle nfs lockd fscache nfs_acl
auth_rpcgss sunrpc ip6t_REJECT nf_conntrack_ipv6 ip6table_filter ip6_tables
uinput virtio_net virtio_balloon i2c_piix4 i2c_core joydev microcode ipv6
virtio_blk virtio_pci virtio_ring virtio [last unloaded: speedstep_lib]

Pid: 0, comm: swapper Not tainted 2.6.36-0.8.rc2.git0.fc15.x86_64 #1 /KVM
RIP: 0010:[<ffffffffa022117d>]  [<ffffffffa022117d>] secmark_tg+0x17/0x2e [xt_SECMARK]
RSP: 0018:ffff880003e03a40  EFLAGS: 00010202
RAX: ffff88001f3074b0 RBX: ffff88001f3073f0 RCX: ffff88001f307490
RDX: ffff88001f307401 RSI: ffff880003e03b30 RDI: ffff88001f18e500
RBP: ffff880003e03a40 R08: 0000000000000002 R09: ffff880003e03a10
R10: ffff880003fd2ad8 R11: ffffffff00000001 R12: ffff88001a85d498
R13: ffffe8ffff808240 R14: ffff88001ac133ae R15: ffff88001f18e500
FS:  0000000000000000(0000) GS:ffff880003e00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 000000000073b130 CR3: 000000000fdc0000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffffffff81a00000, task
ffffffff81a4b020)
Stack:
ffff880003e03b90 ffffffff814599ff 0000000000003a18 0000000000000000
ffff880003e03b70 ffffffffffffffb8 0000000000000000 ffffffff82a39d60
ffff880003e03a90 ffffffff8140db60 ffff880003e03ae0 ffffffff8140f2c0
Call Trace:
 <IRQ>
[<ffffffff814599ff>] ipt_do_table+0x58a/0x6e2
[<ffffffff8140db60>] ? rcu_read_unlock+0x21/0x23
[<ffffffff8140f2c0>] ? nf_conntrack_find_get+0xb4/0xc7
[<ffffffffa021b182>] iptable_mangle_hook+0x10a/0x120 [iptable_mangle]
[<ffffffff8140c226>] nf_iterate+0x46/0x89
[<ffffffff8141d2e8>] ? ip_rcv_finish+0x0/0x3c6
[<ffffffff8140c2e1>] nf_hook_slow+0x78/0xe3
[<ffffffff8141d2e8>] ? ip_rcv_finish+0x0/0x3c6
[<ffffffff81472f06>] ? run_filter+0x0/0xc0
[<ffffffff813e6802>] ? dev_seq_stop+0x8/0x10
[<ffffffff8141d2e8>] ? ip_rcv_finish+0x0/0x3c6
[<ffffffff8141d9a9>] NF_HOOK.clone.6+0x46/0x58
[<ffffffff8141dd93>] ip_rcv+0x21f/0x24c
[<ffffffff813e7d43>] __netif_receive_skb+0x3e0/0x40a
[<ffffffff813e8834>] netif_receive_skb+0x6c/0x73
[<ffffffffa00c954e>] virtnet_poll+0x55b/0x6cb [virtio_net]
[<ffffffff8107fb92>] ? lock_release+0x19a/0x1a6
[<ffffffff813e9bc4>] net_rx_action+0xb1/0x1e3
[<ffffffff8107d64b>] ? print_lock_contention_bug+0x1b/0xd5
[<ffffffff8100ac1c>] ? call_softirq+0x1c/0x30
[<ffffffff8105752a>] __do_softirq+0xfa/0x1cf
[<ffffffff8107fb92>] ? lock_release+0x19a/0x1a6
[<ffffffff8100ac1c>] call_softirq+0x1c/0x30
[<ffffffff8100c3d9>] do_softirq+0x4b/0xa2
[<ffffffff810576d0>] irq_exit+0x4a/0x8c
[<ffffffff814a198d>] do_IRQ+0x9d/0xb4
[<ffffffff8149b813>] ret_from_intr+0x0/0x16
 <EOI>
[<ffffffff81010faf>] ? default_idle+0x3c/0x61
[<ffffffff8102c7b1>] ? native_safe_halt+0xb/0xd
[<ffffffff810800c0>] ? trace_hardirqs_on+0xd/0xf
[<ffffffff81010fb4>] default_idle+0x41/0x61
[<ffffffff8100830b>] cpu_idle+0xb3/0x10f
[<ffffffff814824c3>] rest_init+0xb7/0xbe
[<ffffffff8148240c>] ? rest_init+0x0/0xbe
[<ffffffff81d76c50>] start_kernel+0x412/0x41d
[<ffffffff81d762c6>] x86_64_start_reservations+0xb1/0xb5
[<ffffffff81d763c2>] x86_64_start_kernel+0xf8/0x107
Code: 41 8a 04 24 88 05 1c 05 00 00 5a 89 d8 5b 41 5c 41 5d c9 c3 55 48 89 e5
0f 1f 44 00 00 48 8b 46 08 8a 10 3a 15 fd 04 00 00 74 02 <0f> 0b fe ca 75 0e
8b 40 04 89 87 b4 00 00 00 83 c8 ff c9 c3 0f
RIP  [<ffffffffa022117d>] secmark_tg+0x17/0x2e [xt_SECMARK]
RSP <ffff880003e03a40>
---[ end trace 9aa5d06a71143e74 ]---

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---

 net/netfilter/xt_SECMARK.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 23b2d6c..364ad16 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -101,7 +101,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
 		err = checkentry_selinux(info);
-		if (err <= 0)
+		if (err)
 			return err;
 		break;
 


^ permalink raw reply related

* [RFC PATCH 7/7] vlan: Remove accleration legacy functions.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

This removes the explicit vlan accleration functions that acted
as shims in favor of the main receive functions that can now
handle vlans.

Signed-off-by: Jesse Gross <jesse@nicira.com>
--
This patch can only be applied once all drivers that use vlan acceleration
have been converted over to the new model.
---
 include/linux/if_vlan.h   |   66 +++-----------------------------------------
 include/linux/netdevice.h |    8 -----
 net/8021q/vlan.c          |    8 +-----
 net/8021q/vlan_core.c     |   25 -----------------
 4 files changed, 6 insertions(+), 101 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e21028b..c6952af 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -73,13 +73,16 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
+#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
+#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 /* if this changes, algorithm will have to be reworked because this
  * depends on completely exhausting the VLAN identifier space.  Thus
  * it gives constant time look-up, but in many cases it wastes memory.
  */
-#define VLAN_GROUP_ARRAY_LEN          VLAN_N_VID
 #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
-#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
 
 struct vlan_group {
 	struct net_device	*real_dev; /* The ethernet(like) device
@@ -111,10 +114,6 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
-#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
-#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
-
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 /* Must be invoked with rcu_read_lock or with RTNL. */
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
 					       u16 vlan_id)
@@ -130,15 +129,7 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
-extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-			     u16 vlan_tci, int polling);
 extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
-extern gro_result_t
-vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-		 unsigned int vlan_tci, struct sk_buff *skb);
-extern gro_result_t
-vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-	       unsigned int vlan_tci);
 
 #else
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
@@ -159,61 +150,14 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
 	return 0;
 }
 
-static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-				    u16 vlan_tci, int polling)
-{
-	BUG();
-	return NET_XMIT_SUCCESS;
-}
-
 static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
 	BUG();
 	return 0;
 }
-
-static inline gro_result_t
-vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-		 unsigned int vlan_tci, struct sk_buff *skb)
-{
-	return GRO_DROP;
-}
-
-static inline gro_result_t
-vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-	       unsigned int vlan_tci)
-{
-	return GRO_DROP;
-}
 #endif
 
 /**
- * vlan_hwaccel_rx - netif_rx wrapper for VLAN RX acceleration
- * @skb: buffer
- * @grp: vlan group
- * @vlan_tci: VLAN TCI as received from the card
- */
-static inline int vlan_hwaccel_rx(struct sk_buff *skb,
-				  struct vlan_group *grp,
-				  u16 vlan_tci)
-{
-	return __vlan_hwaccel_rx(skb, grp, vlan_tci, 0);
-}
-
-/**
- * vlan_hwaccel_receive_skb - netif_receive_skb wrapper for VLAN RX acceleration
- * @skb: buffer
- * @grp: vlan group
- * @vlan_tci: VLAN TCI as received from the card
- */
-static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
-					   struct vlan_group *grp,
-					   u16 vlan_tci)
-{
-	return __vlan_hwaccel_rx(skb, grp, vlan_tci, 1);
-}
-
-/**
  * __vlan_put_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef4bbcb..0444994 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -682,12 +682,6 @@ struct netdev_rx_queue {
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
- * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp);
- *	If device support VLAN receive accleration
- *	(ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called
- *	when vlan groups for the device changes.  Note: grp is NULL
- *	if no vlan's groups are being used.
- *
  * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
  *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
  *	this function is called when a VLAN id is registered.
@@ -739,8 +733,6 @@ struct net_device_ops {
 						     struct rtnl_link_stats64 *storage);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
-	void			(*ndo_vlan_rx_register)(struct net_device *dev,
-						        struct vlan_group *grp);
 	void			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
 						       unsigned short vid);
 	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 77634b9..5b26ca9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -132,9 +132,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		struct vlan_group *vlgrp = real_dev->vlgrp;
 
 		rcu_assign_pointer(real_dev->vlgrp, NULL);
-		if (ops->ndo_vlan_rx_register)
-			ops->ndo_vlan_rx_register(real_dev, NULL);
-
 		vlan_gvrp_uninit_applicant(real_dev);
 
 		/* Free the group, after all cpu's are done. */
@@ -206,11 +203,8 @@ int register_vlan_dev(struct net_device *dev)
 	vlan_group_set_device(grp, vlan_id, dev);
 	grp->nr_vlans++;
 
-	if (ngrp) {
-		if (ops->ndo_vlan_rx_register)
-			ops->ndo_vlan_rx_register(real_dev, ngrp);
+	if (ngrp)
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
-	}
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index df90412..ac8fcc5 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -67,28 +67,3 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
 	return vlan_dev_info(dev)->vlan_id;
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
-
-/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
-int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-		      u16 vlan_tci, int polling)
-{
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	return polling ? netif_receive_skb(skb) : netif_rx(skb);
-}
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
-
-gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
-			      unsigned int vlan_tci, struct sk_buff *skb)
-{
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	return napi_gro_receive(napi, skb);
-}
-EXPORT_SYMBOL(vlan_gro_receive);
-
-gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-			    unsigned int vlan_tci)
-{
-	__vlan_hwaccel_put_tag(napi->skb, vlan_tci);
-	return napi_gro_frags(napi);
-}
-EXPORT_SYMBOL(vlan_gro_frags);
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 6/7] bonding: Update bonding for new vlan model.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

It is no longer necessary to register vlan groups, so update bonding
to not do that on its slaves.  Although the new vlan accleration
model allows additional flexibility, bonding continues to require vlan
devices since it needs addtional system state to handle ARP/IGMP.  This
also removes fallback code for non-vlan acclerated slaves since core
networking now handles that.

Signed-off-by: Jesse Gross <jesse@nicira.com>
--
This patch can only be applied once all drivers that use vlan acceleration
have been converted over to the new model.
---
 drivers/net/bonding/bond_alb.c  |    8 +--
 drivers/net/bonding/bond_ipv6.c |    5 +-
 drivers/net/bonding/bond_main.c |  134 ++++++++-------------------------------
 drivers/net/bonding/bonding.h   |    1 -
 4 files changed, 30 insertions(+), 118 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 26bb118..c911456 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -685,10 +685,8 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 			client_info->ntt = 0;
 		}
 
-		if (bond->vlgrp) {
-			if (!vlan_get_tag(skb, &client_info->vlan_id))
-				client_info->tag = 1;
-		}
+		if (!vlan_get_tag(skb, &client_info->vlan_id))
+			client_info->tag = 1;
 
 		if (!client_info->assigned) {
 			u32 prev_tbl_head = bond_info->rx_hashtbl_head;
@@ -907,7 +905,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
 
-		if (bond->vlgrp) {
+		if (!list_empty(&bond->vlan_list)) {
 			struct vlan_entry *vlan;
 
 			vlan = bond_next_vlan(bond,
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 121b073..c276b5a 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -178,10 +178,7 @@ static int bond_inet6addr_event(struct notifier_block *this,
 		}
 
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-			if (!bond->vlgrp)
-				continue;
-			vlan_dev = vlan_group_get_device(bond->vlgrp,
-							 vlan->vlan_id);
+			vlan_dev = vlan_find_dev(bond->dev, vlan->vlan_id);
 			if (vlan_dev == event_dev) {
 				switch (event) {
 				case NETDEV_UP:
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7703d35..5600946 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -414,36 +414,11 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
  * @bond: bond device that got this skb for tx.
  * @skb: hw accel VLAN tagged skb to transmit
  * @slave_dev: slave that is supposed to xmit this skbuff
- *
- * When the bond gets an skb to transmit that is
- * already hardware accelerated VLAN tagged, and it
- * needs to relay this skb to a slave that is not
- * hw accel capable, the skb needs to be "unaccelerated",
- * i.e. strip the hwaccel tag and re-insert it as part
- * of the payload.
  */
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 			struct net_device *slave_dev)
 {
-	unsigned short uninitialized_var(vlan_id);
-
-	/* Test vlan_list not vlgrp to catch and handle 802.1p tags */
-	if (!list_empty(&bond->vlan_list) &&
-	    !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
-	    vlan_get_tag(skb, &vlan_id) == 0) {
-		skb->dev = slave_dev;
-		skb = vlan_put_tag(skb, vlan_id);
-		if (!skb) {
-			/* vlan_put_tag() frees the skb in case of error,
-			 * so return success here so the calling functions
-			 * won't attempt to free is again.
-			 */
-			return 0;
-		}
-	} else {
-		skb->dev = slave_dev;
-	}
-
+	skb->dev = slave_dev;
 	skb->priority = 1;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
@@ -462,8 +437,8 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 }
 
 /*
- * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid
- * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
+ * In the following 2 functions, bond_vlan_rx_add_vid and
+ * bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a
  * lock because:
  * a. This operation is performed in IOCTL context,
  * b. The operation is protected by the RTNL semaphore in the 8021q code,
@@ -480,33 +455,6 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 */
 
 /**
- * bond_vlan_rx_register - Propagates registration to slaves
- * @bond_dev: bonding net device that got called
- * @grp: vlan group being registered
- */
-static void bond_vlan_rx_register(struct net_device *bond_dev,
-				  struct vlan_group *grp)
-{
-	struct bonding *bond = netdev_priv(bond_dev);
-	struct slave *slave;
-	int i;
-
-	write_lock(&bond->lock);
-	bond->vlgrp = grp;
-	write_unlock(&bond->lock);
-
-	bond_for_each_slave(bond, slave, i) {
-		struct net_device *slave_dev = slave->dev;
-		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-		if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
-		    slave_ops->ndo_vlan_rx_register) {
-			slave_ops->ndo_vlan_rx_register(slave_dev, grp);
-		}
-	}
-}
-
-/**
  * bond_vlan_rx_add_vid - Propagates adding an id to slaves
  * @bond_dev: bonding net device that got called
  * @vid: vlan id being added
@@ -543,7 +491,6 @@ static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
-	struct net_device *vlan_dev;
 	int i, res;
 
 	bond_for_each_slave(bond, slave, i) {
@@ -551,14 +498,8 @@ static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 
 		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-		    slave_ops->ndo_vlan_rx_kill_vid) {
-			/* Save and then restore vlan_dev in the grp array,
-			 * since the slave's driver might clear it.
-			 */
-			vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
+		    slave_ops->ndo_vlan_rx_kill_vid)
 			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
-			vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
-		}
 	}
 
 	res = bond_del_vlan(bond, vid);
@@ -573,13 +514,6 @@ static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *sla
 	struct vlan_entry *vlan;
 	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 
-	if (!bond->vlgrp)
-		return;
-
-	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
-	    slave_ops->ndo_vlan_rx_register)
-		slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp);
-
 	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
 	    !(slave_ops->ndo_vlan_rx_add_vid))
 		return;
@@ -593,30 +527,17 @@ static void bond_del_vlans_from_slave(struct bonding *bond,
 {
 	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 	struct vlan_entry *vlan;
-	struct net_device *vlan_dev;
-
-	if (!bond->vlgrp)
-		return;
 
 	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
 	    !(slave_ops->ndo_vlan_rx_kill_vid))
-		goto unreg;
+		return;
 
 	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 		if (!vlan->vlan_id)
 			continue;
-		/* Save and then restore vlan_dev in the grp array,
-		 * since the slave's driver might clear it.
-		 */
-		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+
 		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
-		vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
 	}
-
-unreg:
-	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) &&
-	    slave_ops->ndo_vlan_rx_register)
-		slave_ops->ndo_vlan_rx_register(slave_dev, NULL);
 }
 
 /*------------------------------- Link status -------------------------------*/
@@ -894,23 +815,22 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	struct vlan_entry *vlan;
 
 	read_lock(&bond->lock);
+	rcu_read_lock();
 
 	/* rejoin all groups on bond device */
 	__bond_resend_igmp_join_requests(bond->dev);
 
 	/* rejoin all groups on vlan devices */
-	if (bond->vlgrp) {
-		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-			vlan_dev = vlan_group_get_device(bond->vlgrp,
-							 vlan->vlan_id);
-			if (vlan_dev)
-				__bond_resend_igmp_join_requests(vlan_dev);
-		}
+	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+		vlan_dev = vlan_find_dev(bond->dev, vlan->vlan_id);
+		if (vlan_dev)
+			__bond_resend_igmp_join_requests(vlan_dev);
 	}
 
 	if (--bond->igmp_retrans > 0)
 		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
 
+	rcu_read_unlock();
 	read_unlock(&bond->lock);
 }
 
@@ -1476,7 +1396,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	/* no need to lock since we're protected by rtnl_lock */
 	if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
 		pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name);
-		if (bond->vlgrp) {
+		if (!list_empty(&bond->vlan_list)) {
 			pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
 			       bond_dev->name, slave_dev->name, bond_dev->name);
 			return -EPERM;
@@ -1975,9 +1895,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 		 */
 		memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
 
-		if (!bond->vlgrp) {
-			bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
-		} else {
+		if (!list_empty(&bond->vlan_list)) {
 			pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
 				   bond_dev->name, bond_dev->name);
 			pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
@@ -2167,9 +2085,7 @@ static int bond_release_all(struct net_device *bond_dev)
 	 */
 	memset(bond_dev->dev_addr, 0, bond_dev->addr_len);
 
-	if (!bond->vlgrp) {
-		bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
-	} else {
+	if (!list_empty(&bond->vlan_list)) {
 		pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
 			   bond_dev->name, bond_dev->name);
 		pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
@@ -2601,11 +2517,13 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 	struct flowi fl;
 	struct rtable *rt;
 
+	rcu_read_lock();
+
 	for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
 		if (!targets[i])
 			break;
 		pr_debug("basa: target %x\n", targets[i]);
-		if (!bond->vlgrp) {
+		if (list_empty(&bond->vlan_list)) {
 			pr_debug("basa: empty vlan: arp_send\n");
 			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
 				      bond->master_ip, 0);
@@ -2643,7 +2561,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 
 		vlan_id = 0;
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+			vlan_dev = vlan_find_dev(bond->dev, vlan->vlan_id);
 			if (vlan_dev == rt->dst.dev) {
 				vlan_id = vlan->vlan_id;
 				pr_debug("basa: vlan match on %s %d\n",
@@ -2666,6 +2584,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		}
 		ip_rt_put(rt);
 	}
+
+	rcu_read_unlock();
 }
 
 /*
@@ -2694,16 +2614,17 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
 				bond->master_ip, 0);
 	}
 
-	if (!bond->vlgrp)
-		return;
+	rcu_read_lock();
 
 	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+		vlan_dev = vlan_find_dev(bond->dev, vlan->vlan_id);
 		if (vlan->vlan_ip) {
 			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
 				      vlan->vlan_ip, vlan->vlan_id);
 		}
 	}
+
+	rcu_read_unlock();
 }
 
 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
@@ -3653,9 +3574,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
 		}
 
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-			if (!bond->vlgrp)
-				continue;
-			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+			vlan_dev = vlan_find_dev(bond->dev, vlan->vlan_id);
 			if (vlan_dev == event_dev) {
 				switch (event) {
 				case NETDEV_UP:
@@ -4656,7 +4575,6 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_change_mtu		= bond_change_mtu,
 	.ndo_set_mac_address 	= bond_set_mac_address,
 	.ndo_neigh_setup	= bond_neigh_setup,
-	.ndo_vlan_rx_register	= bond_vlan_rx_register,
 	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c15f213..999746b 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -218,7 +218,6 @@ struct bonding {
 	struct   alb_bond_info alb_info;
 	struct   bond_params params;
 	struct   list_head vlan_list;
-	struct   vlan_group *vlgrp;
 	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 4/7] ixgbe: Update ixgbe to use new vlan accleration.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, Peter Waskiewicz, Emil Tantilov, Jeff Kirsher
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

Make the ixgbe driver use the new vlan accleration model.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com>
CC: Emil Tantilov <emil.s.tantilov@intel.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h      |    4 ++-
 drivers/net/ixgbe/ixgbe_main.c |   60 ++++++++++------------------------------
 2 files changed, 18 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index a8c47b0..5e38de7 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -28,11 +28,13 @@
 #ifndef _IXGBE_H_
 #define _IXGBE_H_
 
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/cpumask.h>
 #include <linux/aer.h>
+#include <linux/if_vlan.h>
 
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
@@ -287,7 +289,7 @@ struct ixgbe_q_vector {
 /* board specific private data structure */
 struct ixgbe_adapter {
 	struct timer_list watchdog_timer;
-	struct vlan_group *vlgrp;
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	u16 bd_number;
 	struct work_struct reset_task;
 	struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 95dbf60..58b3363 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -954,17 +954,13 @@ static void ixgbe_receive_skb(struct ixgbe_q_vector *q_vector,
 	bool is_vlan = (status & IXGBE_RXD_STAT_VP);
 	u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan);
 
-	if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) {
-		if (adapter->vlgrp && is_vlan && (tag & VLAN_VID_MASK))
-			vlan_gro_receive(napi, adapter->vlgrp, tag, skb);
-		else
-			napi_gro_receive(napi, skb);
-	} else {
-		if (adapter->vlgrp && is_vlan && (tag & VLAN_VID_MASK))
-			vlan_hwaccel_rx(skb, adapter->vlgrp, tag);
-		else
-			netif_rx(skb);
-	}
+	if (is_vlan && (tag & VLAN_VID_MASK))
+		__vlan_hwaccel_put_tag(skb, tag);
+
+	if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL))
+		napi_gro_receive(napi, skb);
+	else
+		netif_rx(skb);
 }
 
 /**
@@ -3065,6 +3061,7 @@ static void ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 
 	/* add VID to filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, true);
+	set_bit(vid, adapter->active_vlans);
 }
 
 static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
@@ -3073,16 +3070,9 @@ static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	struct ixgbe_hw *hw = &adapter->hw;
 	int pool_ndx = adapter->num_vfs;
 
-	if (!test_bit(__IXGBE_DOWN, &adapter->state))
-		ixgbe_irq_disable(adapter);
-
-	vlan_group_set_device(adapter->vlgrp, vid, NULL);
-
-	if (!test_bit(__IXGBE_DOWN, &adapter->state))
-		ixgbe_irq_enable(adapter, true, true);
-
 	/* remove VID from filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, false);
+	clear_bit(vid, adapter->active_vlans);
 }
 
 /**
@@ -3157,14 +3147,9 @@ static void ixgbe_vlan_filter_enable(struct ixgbe_adapter *adapter)
 	}
 }
 
-static void ixgbe_vlan_rx_register(struct net_device *netdev,
-				   struct vlan_group *grp)
+static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
-	if (!test_bit(__IXGBE_DOWN, &adapter->state))
-		ixgbe_irq_disable(adapter);
-	adapter->vlgrp = grp;
+	u16 vid;
 
 	/*
 	 * For a DCB driver, always enable VLAN tag stripping so we can
@@ -3173,24 +3158,10 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev,
 	 */
 	ixgbe_vlan_filter_enable(adapter);
 
-	ixgbe_vlan_rx_add_vid(netdev, 0);
-
-	if (!test_bit(__IXGBE_DOWN, &adapter->state))
-		ixgbe_irq_enable(adapter, true, true);
-}
+	ixgbe_vlan_rx_add_vid(adapter->netdev, 0);
 
-static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
-{
-	ixgbe_vlan_rx_register(adapter->netdev, adapter->vlgrp);
-
-	if (adapter->vlgrp) {
-		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
-			if (!vlan_group_get_device(adapter->vlgrp, vid))
-				continue;
-			ixgbe_vlan_rx_add_vid(adapter->netdev, vid);
-		}
-	}
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		ixgbe_vlan_rx_add_vid(adapter->netdev, vid);
 }
 
 /**
@@ -6312,7 +6283,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, struct net_device *netdev
 	int count = 0;
 	unsigned int f;
 
-	if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
+	if (vlan_tx_tag_present(skb)) {
 		tx_flags |= vlan_tx_tag_get(skb);
 		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
 			tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
@@ -6570,7 +6541,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_set_mac_address	= ixgbe_set_mac,
 	.ndo_change_mtu		= ixgbe_change_mtu,
 	.ndo_tx_timeout		= ixgbe_tx_timeout,
-	.ndo_vlan_rx_register	= ixgbe_vlan_rx_register,
 	.ndo_vlan_rx_add_vid	= ixgbe_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= ixgbe_vlan_rx_kill_vid,
 	.ndo_do_ioctl		= ixgbe_ioctl,
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 5/7] lro: Remove explicit vlan support.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

Using the new vlan accleration model, LRO no longer needs to be
explicitly passed the vlan information because it is contained in
the skb.  Since all LRO did was pass the vlan through, this removes
that knowledge.

Signed-off-by: Jesse Gross <jesse@nicira.com>
--
This patch can only be applied once all drivers that use LRO and vlan acceleration
have been converted over to the new model.
---
 include/linux/inet_lro.h |   20 ------------
 net/ipv4/inet_lro.c      |   74 +++++++---------------------------------------
 2 files changed, 11 insertions(+), 83 deletions(-)

diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h
index c4335fa..667281a 100644
--- a/include/linux/inet_lro.h
+++ b/include/linux/inet_lro.h
@@ -50,7 +50,6 @@ struct net_lro_desc {
 	struct skb_frag_struct *next_frag;
 	struct iphdr *iph;
 	struct tcphdr *tcph;
-	struct vlan_group *vgrp;
 	__wsum  data_csum;
 	__be32 tcp_rcv_tsecr;
 	__be32 tcp_rcv_tsval;
@@ -60,9 +59,7 @@ struct net_lro_desc {
 	u16 ip_tot_len;
 	u16 tcp_saw_tstamp; 		/* timestamps enabled */
 	__be16 tcp_window;
-	u16 vlan_tag;
 	int pkt_aggr_cnt;		/* counts aggregated packets */
-	int vlan_packet;
 	int mss;
 	int active;
 };
@@ -137,16 +134,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 		     void *priv);
 
 /*
- * Processes a SKB with VLAN HW acceleration support
- */
-
-void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
-				  struct sk_buff *skb,
-				  struct vlan_group *vgrp,
-				  u16 vlan_tag,
-				  void *priv);
-
-/*
  * Processes a fragment list
  *
  * This functions aggregate fragments and generate SKBs do pass
@@ -165,13 +152,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr,
 		       struct skb_frag_struct *frags,
 		       int len, int true_size, void *priv, __wsum sum);
 
-void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
-				    struct skb_frag_struct *frags,
-				    int len, int true_size,
-				    struct vlan_group *vgrp,
-				    u16 vlan_tag,
-				    void *priv, __wsum sum);
-
 /*
  * Forward all aggregated SKBs held by lro_mgr to network stack
  */
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 47038cb..8945a1d 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 }
 
 static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
-			  struct iphdr *iph, struct tcphdr *tcph,
-			  u16 vlan_tag, struct vlan_group *vgrp)
+			  struct iphdr *iph, struct tcphdr *tcph)
 {
 	int nr_frags;
 	__be32 *ptr;
@@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 	}
 
 	lro_desc->mss = tcp_data_len;
-	lro_desc->vgrp = vgrp;
-	lro_desc->vlan_tag = vlan_tag;
 	lro_desc->active = 1;
 
 	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
@@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
 
 	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
 
-	if (lro_desc->vgrp) {
-		if (lro_mgr->features & LRO_F_NAPI)
-			vlan_hwaccel_receive_skb(lro_desc->parent,
-						 lro_desc->vgrp,
-						 lro_desc->vlan_tag);
-		else
-			vlan_hwaccel_rx(lro_desc->parent,
-					lro_desc->vgrp,
-					lro_desc->vlan_tag);
-
-	} else {
-		if (lro_mgr->features & LRO_F_NAPI)
-			netif_receive_skb(lro_desc->parent);
-		else
-			netif_rx(lro_desc->parent);
-	}
+	if (lro_mgr->features & LRO_F_NAPI)
+		netif_receive_skb(lro_desc->parent);
+	else
+		netif_rx(lro_desc->parent);
 
 	LRO_INC_STATS(lro_mgr, flushed);
 	lro_clear_desc(lro_desc);
 }
 
 static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
-			  struct vlan_group *vgrp, u16 vlan_tag, void *priv)
+			  void *priv)
 {
 	struct net_lro_desc *lro_desc;
 	struct iphdr *iph;
@@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
 			goto out;
 
 		skb->ip_summed = lro_mgr->ip_summed_aggr;
-		lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp);
+		lro_init_desc(lro_desc, skb, iph, tcph);
 		LRO_INC_STATS(lro_mgr, aggregated);
 		return 0;
 	}
@@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
 static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
 					  struct skb_frag_struct *frags,
 					  int len, int true_size,
-					  struct vlan_group *vgrp,
-					  u16 vlan_tag, void *priv, __wsum sum)
+					  void *priv, __wsum sum)
 {
 	struct net_lro_desc *lro_desc;
 	struct iphdr *iph;
@@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
 		tcph = (void *)((u8 *)skb->data + vlan_hdr_len
 				+ IP_HDR_LEN(iph));
 
-		lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL);
+		lro_init_desc(lro_desc, skb, iph, tcph);
 		LRO_INC_STATS(lro_mgr, aggregated);
 		return NULL;
 	}
@@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 		     struct sk_buff *skb,
 		     void *priv)
 {
-	if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) {
+	if (__lro_proc_skb(lro_mgr, skb, priv)) {
 		if (lro_mgr->features & LRO_F_NAPI)
 			netif_receive_skb(skb);
 		else
@@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 }
 EXPORT_SYMBOL(lro_receive_skb);
 
-void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
-				  struct sk_buff *skb,
-				  struct vlan_group *vgrp,
-				  u16 vlan_tag,
-				  void *priv)
-{
-	if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
-		if (lro_mgr->features & LRO_F_NAPI)
-			vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
-		else
-			vlan_hwaccel_rx(skb, vgrp, vlan_tag);
-	}
-}
-EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
-
 void lro_receive_frags(struct net_lro_mgr *lro_mgr,
 		       struct skb_frag_struct *frags,
 		       int len, int true_size, void *priv, __wsum sum)
 {
 	struct sk_buff *skb;
 
-	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0,
-				 priv, sum);
+	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
 	if (!skb)
 		return;
 
@@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr,
 }
 EXPORT_SYMBOL(lro_receive_frags);
 
-void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
-				    struct skb_frag_struct *frags,
-				    int len, int true_size,
-				    struct vlan_group *vgrp,
-				    u16 vlan_tag, void *priv, __wsum sum)
-{
-	struct sk_buff *skb;
-
-	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
-				 vlan_tag, priv, sum);
-	if (!skb)
-		return;
-
-	if (lro_mgr->features & LRO_F_NAPI)
-		vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
-	else
-		vlan_hwaccel_rx(skb, vgrp, vlan_tag);
-}
-EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
-
 void lro_flush_all(struct net_lro_mgr *lro_mgr)
 {
 	int i;
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 2/7] vlan: Centralize handling of hardware acceleration.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

Currently each driver that is capable of vlan hardware acceleration
must be aware of the vlan groups that are configured and then pass
the stripped tag to a specialized receive function.  This is
different from other types of hardware offload in that it places a
significant amount of knowledge in the driver itself rather keeping
it in the networking core.

This makes vlan offloading function more similarly to other forms
of offloading (such as checksum offloading or TSO) by doing the
following:
* On receive, stripped vlans are passed directly to the network
core, without attempting to check for vlan groups or reconstructing
the header if no group
* vlans are made less special by folding the logic into the main
receive routines
* On transmit, the device layer will add the vlan header in software
if the hardware doesn't support it, instead of spreading that logic
out in upper layers, such as bonding.

There are a number of advantages to this:
* Fixes all bugs with drivers incorrectly dropping vlan headers at once.
* Avoids having to disable VLAN acceleration when in promiscuous mode
(good for bridging since it always puts devices in promiscuous mode).
* Keeps VLAN tag separate until given to ultimate consumer, which
avoids needing to do header reconstruction as in tg3 unless absolutely
necessary.
* Consolidates common code in core networking.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/if_vlan.h         |   27 ++++++++-
 include/linux/netdevice.h       |   12 +++-
 net/8021q/vlan.c                |  102 ++++++++-----------------------
 net/8021q/vlan.h                |   17 -----
 net/8021q/vlan_core.c           |  125 +++++++++------------------------------
 net/8021q/vlan_dev.c            |    2 +-
 net/bridge/netfilter/ebt_vlan.c |    4 +-
 net/core/dev.c                  |   42 ++++++++++++--
 8 files changed, 129 insertions(+), 202 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a523207..e21028b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -68,6 +68,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
 #define VLAN_TAG_PRESENT	VLAN_CFI_MASK
 #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
+#define VLAN_N_VID		4096
 
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
@@ -76,7 +77,7 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
  * depends on completely exhausting the VLAN identifier space.  Thus
  * it gives constant time look-up, but in many cases it wastes memory.
  */
-#define VLAN_GROUP_ARRAY_LEN          4096
+#define VLAN_GROUP_ARRAY_LEN          VLAN_N_VID
 #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
 #define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS)
 
@@ -114,12 +115,24 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+/* Must be invoked with rcu_read_lock or with RTNL. */
+static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
+					       u16 vlan_id)
+{
+	struct vlan_group *grp = rcu_dereference(real_dev->vlgrp);
+
+	if (grp)
+		return vlan_group_get_device(grp, vlan_id);
+
+	return NULL;
+}
+
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern void vlan_hwaccel_do_receive(struct sk_buff *skb);
+extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -128,6 +141,12 @@ vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	       unsigned int vlan_tci);
 
 #else
+static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
+					       u16 vlan_id)
+{
+	return NULL;
+}
+
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
 	BUG();
@@ -147,8 +166,10 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline void vlan_hwaccel_do_receive(struct sk_buff *skb)
+static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
+	BUG();
+	return 0;
 }
 
 static inline gro_result_t
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14fbb04..ef4bbcb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -942,7 +942,10 @@ struct net_device {
 
 
 	/* Protocol specific pointers */
-	
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	struct vlan_group	*vlgrp;		/* VLAN group */
+#endif
 #ifdef CONFIG_NET_DSA
 	void			*dsa_ptr;	/* dsa specific data */
 #endif
@@ -2248,8 +2251,13 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
+	int features = dev->features;
+
+	if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
+		features &= dev->vlan_features;
+
 	return skb_is_gso(skb) &&
-	       (!skb_gso_ok(skb, dev->features) ||
+	       (!skb_gso_ok(skb, features) ||
 		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 25c2133..77634b9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -44,9 +44,6 @@
 
 int vlan_net_id __read_mostly;
 
-/* Our listing of VLAN group(s) */
-static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
-
 const char vlan_fullname[] = "802.1Q VLAN Support";
 const char vlan_version[] = DRV_VERSION;
 static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
@@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
 
 /* End of global variables definitions. */
 
-static inline unsigned int vlan_grp_hashfn(unsigned int idx)
-{
-	return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
-}
-
-/* Must be invoked with RCU read lock (no preempt) */
-static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
-{
-	struct vlan_group *grp;
-	struct hlist_node *n;
-	int hash = vlan_grp_hashfn(real_dev->ifindex);
-
-	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
-		if (grp->real_dev == real_dev)
-			return grp;
-	}
-
-	return NULL;
-}
-
-/*  Find the protocol handler.  Assumes VID < VLAN_VID_MASK.
- *
- * Must be invoked with RCU read lock (no preempt)
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
-{
-	struct vlan_group *grp = __vlan_find_group(real_dev);
-
-	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
-
-	return NULL;
-}
-
 static void vlan_group_free(struct vlan_group *grp)
 {
 	int i;
@@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
 		return NULL;
 
 	grp->real_dev = real_dev;
-	hlist_add_head_rcu(&grp->hlist,
-			&vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
 	return grp;
 }
 
@@ -146,13 +107,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
-	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
-
-	grp = __vlan_find_group(real_dev);
-	BUG_ON(!grp);
+	BUG_ON(!real_dev->vlgrp);
 
 	/* Take it out of our own structures, but be sure to interlock with
 	 * HW accelerating devices or SW vlan input packet processing if
@@ -161,25 +119,26 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
-	grp->nr_vlans--;
+	real_dev->vlgrp->nr_vlans--;
 
-	vlan_group_set_device(grp, vlan_id, NULL);
-	if (!grp->killall)
+	vlan_group_set_device(real_dev->vlgrp, vlan_id, NULL);
+	if (!real_dev->vlgrp->killall)
 		synchronize_net();
 
 	unregister_netdevice_queue(dev, head);
 
 	/* If the group is now empty, kill off the group. */
-	if (grp->nr_vlans == 0) {
-		vlan_gvrp_uninit_applicant(real_dev);
+	if (real_dev->vlgrp->nr_vlans == 0) {
+		struct vlan_group *vlgrp = real_dev->vlgrp;
 
-		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+		rcu_assign_pointer(real_dev->vlgrp, NULL);
+		if (ops->ndo_vlan_rx_register)
 			ops->ndo_vlan_rx_register(real_dev, NULL);
 
-		hlist_del_rcu(&grp->hlist);
+		vlan_gvrp_uninit_applicant(real_dev);
 
 		/* Free the group, after all cpu's are done. */
-		call_rcu(&grp->rcu, vlan_rcu_free);
+		call_rcu(&vlgrp->rcu, vlan_rcu_free);
 	}
 
 	/* Get rid of the vlan's reference to real_dev */
@@ -196,18 +155,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 		return -EOPNOTSUPP;
 	}
 
-	if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
-		pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
-		return -EOPNOTSUPP;
-	}
-
 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
 		pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
 		return -EOPNOTSUPP;
 	}
 
-	if (__find_vlan_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -222,7 +176,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
-	grp = __vlan_find_group(real_dev);
+	grp = real_dev->vlgrp;
 	if (!grp) {
 		ngrp = grp = vlan_group_alloc(real_dev);
 		if (!grp)
@@ -252,8 +206,11 @@ int register_vlan_dev(struct net_device *dev)
 	vlan_group_set_device(grp, vlan_id, dev);
 	grp->nr_vlans++;
 
-	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
-		ops->ndo_vlan_rx_register(real_dev, ngrp);
+	if (ngrp) {
+		if (ops->ndo_vlan_rx_register)
+			ops->ndo_vlan_rx_register(real_dev, ngrp);
+		rcu_assign_pointer(real_dev->vlgrp, ngrp);
+	}
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
 
@@ -264,7 +221,6 @@ out_uninit_applicant:
 		vlan_gvrp_uninit_applicant(real_dev);
 out_free_group:
 	if (ngrp) {
-		hlist_del_rcu(&ngrp->hlist);
 		/* Free the group, after all cpu's are done. */
 		call_rcu(&ngrp->rcu, vlan_rcu_free);
 	}
@@ -428,7 +384,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
 	}
 
-	grp = __vlan_find_group(dev);
+	grp = dev->vlgrp;
 	if (!grp)
 		goto out;
 
@@ -439,7 +395,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -450,7 +406,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -464,7 +420,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -478,7 +434,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -490,7 +446,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -508,7 +464,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -532,7 +488,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		/* Delete all VLANs for this dev. */
 		grp->killall = 1;
 
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -540,7 +496,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			/* unregistration of last vlan destroys group, abort
 			 * afterwards */
 			if (grp->nr_vlans == 1)
-				i = VLAN_GROUP_ARRAY_LEN;
+				i = VLAN_N_VID;
 
 			unregister_vlan_dev(vlandev, &list);
 		}
@@ -746,8 +702,6 @@ err0:
 
 static void __exit vlan_cleanup_module(void)
 {
-	unsigned int i;
-
 	vlan_ioctl_set(NULL);
 	vlan_netlink_fini();
 
@@ -755,10 +709,6 @@ static void __exit vlan_cleanup_module(void)
 
 	dev_remove_pack(&vlan_packet_type);
 
-	/* This table must be empty if there are no module references left. */
-	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
-		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
-
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503a..db01b31 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
-#define VLAN_GRP_HASH_SHIFT	5
-#define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
-#define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
-
-/*  Find a VLAN device by the MAC address of its Ethernet device, and
- *  it's VLAN ID.  The default configuration is to have VLAN's scope
- *  to be box-wide, so the MAC will be ignored.  The mac will only be
- *  looked at if we are configured to have a separate set of VLANs per
- *  each MAC addressable interface.  Note that this latter option does
- *  NOT follow the spec for VLANs, but may be useful for doing very
- *  large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
- *
- *  Must be invoked with rcu_read_lock (ie preempt disabled)
- *  or with RTNL.
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
-
 /* found in vlan_dev.c */
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index dee727c..df90412 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,54 +4,33 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
-int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-		      u16 vlan_tci, int polling)
+int vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
+	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
-	u16 vlan_id;
-
-	if (netpoll_rx(skb))
-		return NET_RX_DROP;
-
-	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		skb->deliver_no_wcard = 1;
-
-	skb->skb_iif = skb->dev->ifindex;
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-	vlan_dev = vlan_group_get_device(grp, vlan_id);
+	struct vlan_rx_stats *rx_stats;
 
-	if (vlan_dev)
-		skb->dev = vlan_dev;
-	else if (vlan_id) {
-		if (!(skb->dev->flags & IFF_PROMISC))
-			goto drop;
-		skb->pkt_type = PACKET_OTHERHOST;
+	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+	if (!vlan_dev) {
+		if (vlan_id)
+			skb->pkt_type = PACKET_OTHERHOST;
+		return NET_RX_SUCCESS;
 	}
 
-	return polling ? netif_receive_skb(skb) : netif_rx(skb);
-
-drop:
-	atomic_long_inc(&skb->dev->rx_dropped);
-	dev_kfree_skb_any(skb);
-	return NET_RX_DROP;
-}
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
-
-void vlan_hwaccel_do_receive(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	struct vlan_rx_stats     *rx_stats;
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
 
-	skb->dev = vlan_dev_real_dev(dev);
 	netif_nit_deliver(skb);
 
-	skb->dev = dev;
-	skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
+	skb->skb_iif = skb->dev->ifindex;
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
+		skb->deliver_no_wcard = 1;
+
+	skb->dev = vlan_dev;
+	skb->priority = vlan_get_ingress_priority(skb->dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
@@ -68,11 +47,13 @@ void vlan_hwaccel_do_receive(struct sk_buff *skb)
 		 * This allows the VLAN to have a different MAC than the
 		 * underlying device, and still route correctly. */
 		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					dev->dev_addr))
+					skb->dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 		break;
 	}
 	u64_stats_update_end(&rx_stats->syncp);
+
+	return NET_RX_SUCCESS;
 }
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -87,75 +68,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
 
-static gro_result_t
-vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
-		unsigned int vlan_tci, struct sk_buff *skb)
+/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
+int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+		      u16 vlan_tci, int polling)
 {
-	struct sk_buff *p;
-	struct net_device *vlan_dev;
-	u16 vlan_id;
-
-	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		skb->deliver_no_wcard = 1;
-
-	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-	vlan_dev = vlan_group_get_device(grp, vlan_id);
-
-	if (vlan_dev)
-		skb->dev = vlan_dev;
-	else if (vlan_id) {
-		if (!(skb->dev->flags & IFF_PROMISC))
-			goto drop;
-		skb->pkt_type = PACKET_OTHERHOST;
-	}
-
-	for (p = napi->gro_list; p; p = p->next) {
-		unsigned long diffs;
-
-		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
-		diffs |= compare_ether_header(skb_mac_header(p),
-					      skb_gro_mac_header(skb));
-		NAPI_GRO_CB(p)->same_flow = !diffs;
-		NAPI_GRO_CB(p)->flush = 0;
-	}
-
-	return dev_gro_receive(napi, skb);
-
-drop:
-	atomic_long_inc(&skb->dev->rx_dropped);
-	return GRO_DROP;
+	return polling ? netif_receive_skb(skb) : netif_rx(skb);
 }
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
 
 gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 			      unsigned int vlan_tci, struct sk_buff *skb)
 {
-	if (netpoll_rx_on(skb))
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
-			? GRO_DROP : GRO_NORMAL;
-
-	skb_gro_reset_offset(skb);
-
-	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
+	__vlan_hwaccel_put_tag(skb, vlan_tci);
+	return napi_gro_receive(napi, skb);
 }
 EXPORT_SYMBOL(vlan_gro_receive);
 
 gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 			    unsigned int vlan_tci)
 {
-	struct sk_buff *skb = napi_frags_skb(napi);
-
-	if (!skb)
-		return GRO_DROP;
-
-	if (netpoll_rx_on(skb)) {
-		skb->protocol = eth_type_trans(skb, skb->dev);
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
-			? GRO_DROP : GRO_NORMAL;
-	}
-
-	return napi_frags_finish(napi, skb,
-				 vlan_gro_common(napi, grp, vlan_tci, skb));
+	__vlan_hwaccel_put_tag(napi->skb, vlan_tci);
+	return napi_gro_frags(napi);
 }
 EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f54251e..14e3d1f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	vlan_id = vlan_tci & VLAN_VID_MASK;
 
 	rcu_read_lock();
-	vlan_dev = __find_vlan_dev(dev, vlan_id);
+	vlan_dev = vlan_find_dev(dev, vlan_id);
 
 	/* If the VLAN device is defined, we use it.
 	 * If not, and the VID is 0, it is a 802.1p packet (not
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index a39d92d..e724720 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -119,10 +119,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	 * 0 - The null VLAN ID.
 	 * 1 - The default Port VID (PVID)
 	 * 0x0FFF - Reserved for implementation use.
-	 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
+	 * if_vlan.h: VLAN_N_VID 4096. */
 	if (GET_BITMASK(EBT_VLAN_ID)) {
 		if (!!info->id) { /* if id!=0 => check vid range */
-			if (info->id > VLAN_GROUP_ARRAY_LEN) {
+			if (info->id > VLAN_N_VID) {
 				pr_debug("id %d is out of range (1-4096)\n",
 					 info->id);
 				return -EINVAL;
diff --git a/net/core/dev.c b/net/core/dev.c
index 04972a4..9586aff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1692,7 +1692,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 
 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
 {
-	if (can_checksum_protocol(dev->features, skb->protocol))
+	int features = dev->features;
+
+	if (vlan_tx_tag_present(skb))
+		features &= dev->vlan_features;
+
+	if (can_checksum_protocol(features, skb->protocol))
 		return true;
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1791,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	__be16 type = skb->protocol;
 	int err;
 
+	if (type == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh;
+
+		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+			return ERR_PTR(-EINVAL);
+
+		veh = (struct vlan_ethhdr *)skb->data;
+		type = veh->h_vlan_encapsulated_proto;
+	}
+
 	skb_reset_mac_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
@@ -1962,9 +1977,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
+	int features = dev->features;
+
+	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
+		features &= dev->vlan_features;
+
 	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
 					      illegal_highdma(dev, skb))));
 }
 
@@ -1987,6 +2007,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_orphan_try(skb);
 
+		if (vlan_tx_tag_present(skb) &&
+		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				goto out;
+
+			skb->vlan_tci = 0;
+		}
+
 		if (netif_needs_gso(dev, skb)) {
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
@@ -2048,6 +2077,7 @@ out_kfree_gso_skb:
 		skb->destructor = DEV_GSO_CB(skb)->destructor;
 out_kfree_skb:
 	kfree_skb(skb);
+out:
 	return rc;
 }
 
@@ -2893,8 +2923,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
-	if (vlan_tx_tag_present(skb))
-		vlan_hwaccel_do_receive(skb);
+	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
+		return NET_RX_DROP;
 
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
@@ -3232,6 +3262,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		unsigned long diffs;
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		diffs |= compare_ether_header(skb_mac_header(p),
 					      skb_gro_mac_header(skb));
 		NAPI_GRO_CB(p)->same_flow = !diffs;
@@ -3291,6 +3322,7 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+	skb->vlan_tci = 0;
 
 	napi->skb = skb;
 }
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 3/7] bnx2: Update bnx2 to use new vlan accleration.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, Michael Chan
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

Make the bnx2 driver use the new vlan accleration model.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Michael Chan <mchan@broadcom.com>
---
 drivers/net/bnx2.c |   75 +++------------------------------------------------
 drivers/net/bnx2.h |    4 ---
 2 files changed, 5 insertions(+), 74 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index ae894bc..2d306f4 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -37,9 +37,6 @@
 #include <linux/ethtool.h>
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-#define BCM_VLAN 1
-#endif
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/checksum.h>
@@ -3087,8 +3084,6 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
 		struct sw_bd *rx_buf, *next_rx_buf;
 		struct sk_buff *skb;
 		dma_addr_t dma_addr;
-		u16 vtag = 0;
-		int hw_vlan __maybe_unused = 0;
 
 		sw_ring_cons = RX_RING_IDX(sw_cons);
 		sw_ring_prod = RX_RING_IDX(sw_prod);
@@ -3168,23 +3163,8 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
 			goto next_rx;
 
 		if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) &&
-		    !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) {
-			vtag = rx_hdr->l2_fhdr_vlan_tag;
-#ifdef BCM_VLAN
-			if (bp->vlgrp)
-				hw_vlan = 1;
-			else
-#endif
-			{
-				struct vlan_ethhdr *ve = (struct vlan_ethhdr *)
-					__skb_push(skb, 4);
-
-				memmove(ve, skb->data + 4, ETH_ALEN * 2);
-				ve->h_vlan_proto = htons(ETH_P_8021Q);
-				ve->h_vlan_TCI = htons(vtag);
-				len += 4;
-			}
-		}
+		    !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG))
+			__vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag);
 
 		skb->protocol = eth_type_trans(skb, bp->dev);
 
@@ -3211,14 +3191,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
 			skb->rxhash = rx_hdr->l2_fhdr_hash;
 
 		skb_record_rx_queue(skb, bnapi - &bp->bnx2_napi[0]);
-
-#ifdef BCM_VLAN
-		if (hw_vlan)
-			vlan_gro_receive(&bnapi->napi, bp->vlgrp, vtag, skb);
-		else
-#endif
-			napi_gro_receive(&bnapi->napi, skb);
-
+		napi_gro_receive(&bnapi->napi, skb);
 		rx_pkt++;
 
 next_rx:
@@ -3533,13 +3506,6 @@ bnx2_set_rx_mode(struct net_device *dev)
 	rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS |
 				  BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG);
 	sort_mode = 1 | BNX2_RPM_SORT_USER0_BC_EN;
-#ifdef BCM_VLAN
-	if (!bp->vlgrp && (bp->flags & BNX2_FLAG_CAN_KEEP_VLAN))
-		rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG;
-#else
-	if (bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)
-		rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG;
-#endif
 	if (dev->flags & IFF_PROMISC) {
 		/* Promiscuous mode. */
 		rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS;
@@ -6365,29 +6331,6 @@ bnx2_tx_timeout(struct net_device *dev)
 	schedule_work(&bp->reset_task);
 }
 
-#ifdef BCM_VLAN
-/* Called with rtnl_lock */
-static void
-bnx2_vlan_rx_register(struct net_device *dev, struct vlan_group *vlgrp)
-{
-	struct bnx2 *bp = netdev_priv(dev);
-
-	if (netif_running(dev))
-		bnx2_netif_stop(bp, false);
-
-	bp->vlgrp = vlgrp;
-
-	if (!netif_running(dev))
-		return;
-
-	bnx2_set_rx_mode(dev);
-	if (bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)
-		bnx2_fw_sync(bp, BNX2_DRV_MSG_CODE_KEEP_VLAN_UPDATE, 0, 1);
-
-	bnx2_netif_start(bp, false);
-}
-#endif
-
 /* Called with netif_tx_lock.
  * bnx2_tx_int() runs without netif_tx_lock unless it needs to call
  * netif_wake_queue().
@@ -6428,12 +6371,11 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
-#ifdef BCM_VLAN
-	if (bp->vlgrp && vlan_tx_tag_present(skb)) {
+	if (vlan_tx_tag_present(skb)) {
 		vlan_tag_flags |=
 			(TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
 	}
-#endif
+
 	if ((mss = skb_shinfo(skb)->gso_size)) {
 		u32 tcp_opt_len;
 		struct iphdr *iph;
@@ -8318,9 +8260,6 @@ static const struct net_device_ops bnx2_netdev_ops = {
 	.ndo_set_mac_address	= bnx2_change_mac_addr,
 	.ndo_change_mtu		= bnx2_change_mtu,
 	.ndo_tx_timeout		= bnx2_tx_timeout,
-#ifdef BCM_VLAN
-	.ndo_vlan_rx_register	= bnx2_vlan_rx_register,
-#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= poll_bnx2,
 #endif
@@ -8328,9 +8267,7 @@ static const struct net_device_ops bnx2_netdev_ops = {
 
 static void inline vlan_features_add(struct net_device *dev, unsigned long flags)
 {
-#ifdef BCM_VLAN
 	dev->vlan_features |= flags;
-#endif
 }
 
 static int __devinit
@@ -8379,9 +8316,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev->features |= NETIF_F_IPV6_CSUM;
 		vlan_features_add(dev, NETIF_F_IPV6_CSUM);
 	}
-#ifdef BCM_VLAN
 	dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
-#endif
 	dev->features |= NETIF_F_TSO | NETIF_F_TSO_ECN;
 	vlan_features_add(dev, NETIF_F_TSO | NETIF_F_TSO_ECN);
 	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index efdfbc2..4f44db6 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6742,10 +6742,6 @@ struct bnx2 {
 
 	struct bnx2_napi	bnx2_napi[BNX2_MAX_MSIX_VEC];
 
-#ifdef BCM_VLAN
-	struct			vlan_group *vlgrp;
-#endif
-
 	u32			rx_buf_use_size;	/* useable size */
 	u32			rx_buf_size;		/* with alignment */
 	u32			rx_copy_thresh;
-- 
1.7.0.4


^ permalink raw reply related

* [RFC PATCH 0/7] Move vlan acceleration into networking core.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev

Hardware vlan acceleration behaves fairly differently from other types of
offloading, which limits its usefulness.  This patch series aims to bring
it more in line with other common forms of acceleration, such as checksum
offloading and TSO.  In doing this it eliminates common driver bugs, increases
flexibility, and improves performance, while reducing the number of lines of
code.

The first four patches can be applied immediately, while the last three need
to wait until all drivers that support vlan acceleration are updated.  If
people agree that this patch set makes sense I will go ahead and switch over
the dozen or so drivers that would need to change.

Jesse Gross (7):
  ebtables: Allow filtering of hardware accelerated vlan frames.
  vlan: Centralize handling of hardware acceleration.
  bnx2: Update bnx2 to use new vlan accleration.
  ixgbe: Update ixgbe to use new vlan accleration.
  lro: Remove explicit vlan support.
  bonding: Update bonding for new vlan model.
  vlan: Remove accleration legacy functions.

 drivers/net/bnx2.c              |   75 ++--------------------
 drivers/net/bnx2.h              |    4 -
 drivers/net/bonding/bond_alb.c  |    8 +--
 drivers/net/bonding/bond_ipv6.c |    5 +-
 drivers/net/bonding/bond_main.c |  134 ++++++++-------------------------------
 drivers/net/bonding/bonding.h   |    1 -
 drivers/net/ixgbe/ixgbe.h       |    4 +-
 drivers/net/ixgbe/ixgbe_main.c  |   60 +++++-------------
 include/linux/if_vlan.h         |   87 ++++++++------------------
 include/linux/inet_lro.h        |   20 ------
 include/linux/netdevice.h       |   20 +++---
 net/8021q/vlan.c                |  100 +++++++-----------------------
 net/8021q/vlan.h                |   17 -----
 net/8021q/vlan_core.c           |  132 ++++++--------------------------------
 net/8021q/vlan_dev.c            |    2 +-
 net/bridge/br_netfilter.c       |   16 +++--
 net/bridge/netfilter/ebt_vlan.c |   42 +++++++-----
 net/bridge/netfilter/ebtables.c |   15 +++-
 net/core/dev.c                  |   42 +++++++++++--
 net/ipv4/inet_lro.c             |   74 +++------------------
 20 files changed, 225 insertions(+), 633 deletions(-)


^ permalink raw reply

* [RFC PATCH 1/7] ebtables: Allow filtering of hardware accelerated vlan frames.
From: Jesse Gross @ 2010-10-13 20:02 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1287000177-7126-1-git-send-email-jesse@nicira.com>

An upcoming commit will allow packets with hardware vlan acceleration
information to be passed though more parts of the network stack, including
packets trunked through the bridge.  This adds support for matching and
filtering those packets through ebtables.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/bridge/br_netfilter.c       |   16 +++++++++-------
 net/bridge/netfilter/ebt_vlan.c |   38 +++++++++++++++++++++++---------------
 net/bridge/netfilter/ebtables.c |   15 +++++++++++----
 3 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 7f9ce96..d6a4fec 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -64,22 +64,24 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
 
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
-	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+	if (skb->protocol == htons(ETH_P_8021Q))
+		return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+	else if (vlan_tx_tag_present(skb))
+		return skb->protocol;
+	else
+		return 0;
 }
 
 #define IS_VLAN_IP(skb) \
-	(skb->protocol == htons(ETH_P_8021Q) && \
-	 vlan_proto(skb) == htons(ETH_P_IP) && 	\
+	(vlan_proto(skb) == htons(ETH_P_IP) && 	\
 	 brnf_filter_vlan_tagged)
 
 #define IS_VLAN_IPV6(skb) \
-	(skb->protocol == htons(ETH_P_8021Q) && \
-	 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
+	(vlan_proto(skb) == htons(ETH_P_IPV6) &&\
 	 brnf_filter_vlan_tagged)
 
 #define IS_VLAN_ARP(skb) \
-	(skb->protocol == htons(ETH_P_8021Q) &&	\
-	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
+	(vlan_proto(skb) == htons(ETH_P_ARP) &&	\
 	 brnf_filter_vlan_tagged)
 
 static inline __be16 pppoe_proto(const struct sk_buff *skb)
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 87b53b3..a39d92d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -39,8 +39,6 @@ static bool
 ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
-	const struct vlan_hdr *fp;
-	struct vlan_hdr _frame;
 
 	unsigned short TCI;	/* Whole TCI, given from parsed frame */
 	unsigned short id;	/* VLAN ID, given from frame TCI */
@@ -48,21 +46,31 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/* VLAN encapsulated Type/Length field, given from orig frame */
 	__be16 encap;
 
-	fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
-	if (fp == NULL)
-		return false;
-
-	/* Tag Control Information (TCI) consists of the following elements:
-	 * - User_priority. The user_priority field is three bits in length,
-	 * interpreted as a binary number.
-	 * - Canonical Format Indicator (CFI). The Canonical Format Indicator
-	 * (CFI) is a single bit flag value. Currently ignored.
-	 * - VLAN Identifier (VID). The VID is encoded as
-	 * an unsigned binary number. */
-	TCI = ntohs(fp->h_vlan_TCI);
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		const struct vlan_hdr *fp;
+		struct vlan_hdr _frame;
+
+		fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
+		if (fp == NULL)
+			return false;
+
+		/* Tag Control Information (TCI) consists of the following elements:
+		 * - User_priority. The user_priority field is three bits in length,
+		 * interpreted as a binary number.
+		 * - Canonical Format Indicator (CFI). The Canonical Format Indicator
+		 * (CFI) is a single bit flag value. Currently ignored.
+		 * - VLAN Identifier (VID). The VID is encoded as
+		 * an unsigned binary number. */
+		TCI = ntohs(fp->h_vlan_TCI);
+
+		encap = fp->h_vlan_encapsulated_proto;
+	} else {
+		TCI = vlan_tx_tag_get(skb);
+		encap = skb->protocol;
+	}
+
 	id = TCI & VLAN_VID_MASK;
 	prio = (TCI >> 13) & 0x7;
-	encap = fp->h_vlan_encapsulated_proto;
 
 	/* Checking VLAN Identifier (VID) */
 	if (GET_BITMASK(EBT_VLAN_ID))
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcc102e..a1dcf83 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -124,16 +124,23 @@ ebt_dev_check(const char *entry, const struct net_device *device)
 #define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
 /* process standard matches */
 static inline int
-ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
+ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
                 const struct net_device *in, const struct net_device *out)
 {
+	const struct ethhdr *h = eth_hdr(skb);
+	__be16 ethproto;
 	int verdict, i;
 
+	if (vlan_tx_tag_present(skb))
+		ethproto = htons(ETH_P_8021Q);
+	else
+		ethproto = h->h_proto;
+
 	if (e->bitmask & EBT_802_3) {
-		if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO))
+		if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
 			return 1;
 	} else if (!(e->bitmask & EBT_NOPROTO) &&
-	   FWINV2(e->ethproto != h->h_proto, EBT_IPROTO))
+	   FWINV2(e->ethproto != ethproto, EBT_IPROTO))
 		return 1;
 
 	if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
@@ -213,7 +220,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	base = private->entries;
 	i = 0;
 	while (i < nentries) {
-		if (ebt_basic_match(point, eth_hdr(skb), in, out))
+		if (ebt_basic_match(point, skb, in, out))
 			goto letscontinue;
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
-- 
1.7.0.4


^ permalink raw reply related

* Re: [PATCH -next] sundance: Add initial ethtool stats support
From: Eric Dumazet @ 2010-10-13 19:44 UTC (permalink / raw)
  To: Denis Kirjanov; +Cc: netdev, David Miller, Ben Hutchings, Jeff Garzik
In-Reply-To: <4CB59049.6060405@kernel.org>

Le mercredi 13 octobre 2010 à 14:56 +0400, Denis Kirjanov a écrit :
> On 10/13/2010 02:36 PM, Eric Dumazet wrote:
> > Le mercredi 13 octobre 2010 à 14:28 +0400, Denis Kirjanov a écrit :
> > 
> >> +		u64 tx_defered;
> >> +		u64 tx_defered_excessive;
> > 
> PATCH -next v5] sundance: Add ethtool stats support
> 
> Add ethtool stats support.
> 
> Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
> ---

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>



^ permalink raw reply

* Re: [Bugme-new] [Bug 20292] New: unable to handle kernel NULL pointer dereference in skb_dequeue
From: Andrew Morton @ 2010-10-13 19:33 UTC (permalink / raw)
  To: netdev; +Cc: bugzilla-daemon, bugme-daemon, Michal Ostrowski, gvs
In-Reply-To: <bug-20292-10286@https.bugzilla.kernel.org/>


(switched to email.  Please respond via emailed reply-to-all, not via the
bugzilla web interface).

On Wed, 13 Oct 2010 19:24:53 GMT
bugzilla-daemon@bugzilla.kernel.org wrote:

> https://bugzilla.kernel.org/show_bug.cgi?id=20292
> 
>            Summary: unable to handle kernel NULL pointer dereference in
>                     skb_dequeue
>            Product: Networking
>            Version: 2.5
>     Kernel Version: 2.6.36-rc7

Thanks.  Do you know if this is a regression?  Did it work OK on 2.6.35?

>           Platform: All
>         OS/Version: Linux
>               Tree: Mainline
>             Status: NEW
>           Severity: blocking
>           Priority: P1
>          Component: Other
>         AssignedTo: acme@ghostprotocols.net
>         ReportedBy: gvs@zemos.net
>         Regression: No
> 
> 
> Created an attachment (id=33512)
>  --> (https://bugzilla.kernel.org/attachment.cgi?id=33512)
> Kernel config (gzipped)
> 
> I was trying to get pppoe working (the 'pon' command seemed to hang) and then
> this happened:
> 
> Oct 13 20:57:07 bes kernel: BUG: unable to handle kernel NULL pointer
> dereference at (null)
> Oct 13 20:57:07 bes kernel: IP: [<c1241674>] skb_dequeue+0x24/0x40
> Oct 13 20:57:07 bes kernel: *pde = 00000000
> Oct 13 20:57:07 bes kernel: Oops: 0002 [#1]
> Oct 13 20:57:07 bes kernel: last sysfs file:
> /sys/devices/virtual/net/ppp0/uevent
> Oct 13 20:57:07 bes kernel: Modules linked in: xt_TCPMSS xt_tcpmss xt_tcpudp
> iptable_mangle pppoe pppox ppp_generic slhc cpufreq_conservative
> cpufreq_userspace cpufreq_powersave fuse ipt_MASQUERADE iptable_nat nf_nat
> nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter ip_tables
> x_tables loop sd_mod usb_storage usblp i2c_viapro uhci_hcd fan ehci_hcd button
> i2c_core
> Oct 13 20:57:07 bes kernel:
> Oct 13 20:57:07 bes kernel: Pid: 5495, comm: pppd Not tainted 2.6.36-rc7 #12
> VX800 /VX800
> Oct 13 20:57:07 bes kernel: EIP: 0060:[<c1241674>] EFLAGS: 00010046 CPU: 0
> Oct 13 20:57:07 bes kernel: EIP is at skb_dequeue+0x24/0x40
> Oct 13 20:57:07 bes kernel: EAX: 00000000 EBX: 00000202 ECX: f6ba4cc0 EDX:
> 00000000
> Oct 13 20:57:07 bes kernel: ESI: f6c93bc0 EDI: f6adfee4 EBP: f6ade000 ESP:
> f6adfe68
> Oct 13 20:57:07 bes kernel: DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
> Oct 13 20:57:07 bes kernel: Process pppd (pid: 5495, ti=f6ade000 task=f70f2200
> task.ti=f6ade000)
> Oct 13 20:57:07 bes kernel: Stack:
> Oct 13 20:57:07 bes kernel: f68836c4 c1243a94 f68836c0 f866825b 00000000
> f72e4a00 f72e4a00 f86761cb
> Oct 13 20:57:07 bes kernel: <0> f72e4a00 f8683c97 c143ea14 ffffffea c12ba92d
> 00000286 f68f7d7c f6adfee4
> Oct 13 20:57:07 bes kernel: <0> f68f7bfc 00000286 00000000 00000000 00000000
> f68f7b9c f6adff68 f6adff64
> Oct 13 20:57:07 bes kernel: Call Trace:
> Oct 13 20:57:07 bes kernel: [<c1243a94>] ? skb_queue_purge+0x14/0x30
> Oct 13 20:57:07 bes kernel: [<f866825b>] ? ppp_destroy_channel+0x1b/0x50
> [ppp_generic]
> Oct 13 20:57:07 bes kernel: [<f86761cb>] ? pppox_unbind_sock+0x1b/0x24 [pppox]
> Oct 13 20:57:07 bes kernel: [<f8683c97>] ? pppoe_connect+0x87/0x4b0 [pppoe]
> Oct 13 20:57:07 bes kernel: [<c12ba92d>] ? schedule_timeout+0xfd/0x150
> Oct 13 20:57:07 bes kernel: [<c123d4d4>] ? sys_connect+0x84/0xd0
> Oct 13 20:57:07 bes kernel: [<c10ba730>] ? do_lock_file_wait+0x30/0xf0
> Oct 13 20:57:07 bes kernel: [<c10ba9f9>] ? fcntl_setlk+0x59/0x1b0
> Oct 13 20:57:07 bes kernel: [<c123e5a4>] ? sys_socketcall+0x294/0x2c0
> Oct 13 20:57:07 bes kernel: [<c1002a10>] ? sysenter_do_call+0x12/0x26
> Oct 13 20:57:07 bes kernel: Code: 81 a8 00 00 00 5b c3 53 9c 5b fa 8b 08 39 c8
> 74 25 85 c9 74 1b 83 68 08 01 8b 11 8b 41 04 c7 01 00 00 00 00 c7 41 04 00 00
> 00 00 <89> 10 89 42 04 53 9d 89 c8 5b c3 31 c9 eb f6 8d b6 00 00 00 00
> Oct 13 20:57:07 bes kernel: EIP: [<c1241674>] skb_dequeue+0x24/0x40 SS:ESP
> 0068:f6adfe68
> Oct 13 20:57:07 bes kernel: CR2: 0000000000000000
> Oct 13 20:57:07 bes kernel: ---[ end trace 4914adf67d1ace25 ]---
> 
> Oct 13 20:57:30 bes kernel: BUG: unable to handle kernel NULL pointer
> dereference at (null)
> Oct 13 20:57:30 bes kernel: IP: [<c1241674>] skb_dequeue+0x24/0x40
> Oct 13 20:57:30 bes kernel: *pde = 00000000
> Oct 13 20:57:30 bes kernel: Oops: 0002 [#2]
> Oct 13 20:57:30 bes kernel: last sysfs file:
> /sys/devices/virtual/net/ppp0/uevent
> Oct 13 20:57:30 bes kernel: Modules linked in: xt_TCPMSS xt_tcpmss xt_tcpudp
> iptable_mangle pppoe pppox ppp_generic slhc cpufreq_conservative
> cpufreq_userspace cpufreq_powersave fuse ipt_MASQUERADE iptable_nat nf_nat
> nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter ip_tables
> x_tables loop sd_mod usb_storage usblp i2c_viapro uhci_hcd fan ehci_hcd button
> i2c_core
> Oct 13 20:57:30 bes kernel:
> Oct 13 20:57:30 bes kernel: Pid: 5445, comm: pppd Tainted: G      D    
> 2.6.36-rc7 #12 VX800 /VX800
> Oct 13 20:57:30 bes kernel: EIP: 0060:[<c1241674>] EFLAGS: 00010046 CPU: 0
> Oct 13 20:57:30 bes kernel: EIP is at skb_dequeue+0x24/0x40
> Oct 13 20:57:30 bes kernel: EAX: 00000000 EBX: 00000202 ECX: f6ae7200 EDX:
> 00000000
> Oct 13 20:57:30 bes kernel: ESI: f6c99080 EDI: f7161ee4 EBP: f7160000 ESP:
> f7161e68
> Oct 13 20:57:30 bes kernel: DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
> Oct 13 20:57:30 bes kernel: Process pppd (pid: 5445, ti=f7160000 task=f7107280
> task.ti=f7160000)
> Oct 13 20:57:30 bes kernel: Stack:
> Oct 13 20:57:30 bes kernel: f6883344 c1243a94 f6883340 f866825b 00000000
> f72e4c00 f72e4c00 f86761cb
> Oct 13 20:57:30 bes kernel: <0> f72e4c00 f8683c97 c143ea14 ffffffea c12ba92d
> 00000286 f68f73bc f7161ee4
> Oct 13 20:57:30 bes kernel: <0> f68f753c 00000286 00000000 00000000 00000000
> f68f759c f7161f68 f7161f64
> Oct 13 20:57:30 bes kernel: [<c1243a94>] ? skb_queue_purge+0x14/0x30
> Oct 13 20:57:30 bes kernel: [<f866825b>] ? ppp_destroy_channel+0x1b/0x50
> [ppp_generic]
> Oct 13 20:57:30 bes kernel: [<f86761cb>] ? pppox_unbind_sock+0x1b/0x24 [pppox]
> Oct 13 20:57:30 bes kernel: [<f8683c97>] ? pppoe_connect+0x87/0x4b0 [pppoe]
> Oct 13 20:57:30 bes kernel: [<c12ba92d>] ? schedule_timeout+0xfd/0x150
> Oct 13 20:57:30 bes kernel: [<c123d4d4>] ? sys_connect+0x84/0xd0
> Oct 13 20:57:30 bes kernel: [<c10ba730>] ? do_lock_file_wait+0x30/0xf0
> Oct 13 20:57:30 bes kernel: [<c10ba9f9>] ? fcntl_setlk+0x59/0x1b0
> Oct 13 20:57:30 bes kernel: [<c123e5a4>] ? sys_socketcall+0x294/0x2c0
> Oct 13 20:57:30 bes kernel: [<c1002a10>] ? sysenter_do_call+0x12/0x26
> Oct 13 20:57:30 bes kernel: Code: 81 a8 00 00 00 5b c3 53 9c 5b fa 8b 08 39 c8
> 74 25 85 c9 74 1b 83 68 08 01 8b 11 8b 41 04 c7 01 00 00 00 00 c7 41 04 00 00
> 00 00 <89> 10 89 42 04 53 9d 89 c8 5b c3 31 c9 eb f6 8d b6 00 00 00 00
> Oct 13 20:57:30 bes kernel: EIP: [<c1241674>] skb_dequeue+0x24/0x40 SS:ESP
> 0068:f7161e68
> Oct 13 20:57:30 bes kernel: CR2: 0000000000000000
> Oct 13 20:57:30 bes kernel: ---[ end trace 4914adf67d1ace26 ]---
> Oct 13 20:57:30 bes kernel: ------------[ cut here ]------------
> Oct 13 20:57:30 bes kernel: WARNING: at kernel/softirq.c:143
> local_bh_enable+0x60/0x90()
> Oct 13 20:57:30 bes kernel: Hardware name: VX800
> Oct 13 20:57:30 bes kernel: Modules linked in: xt_TCPMSS xt_tcpmss xt_tcpudp
> iptable_mangle pppoe pppox ppp_generic slhc cpufreq_conservative
> cpufreq_userspace cpufreq_powersave fuse ipt_MASQUERADE iptable_nat nf_nat
> nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter ip_tables
> x_tables loop sd_mod usb_storage usblp i2c_viapro uhci_hcd fan ehci_hcd button
> i2c_core
> Oct 13 20:57:30 bes kernel: Pid: 5445, comm: pppd Tainted: G      D    
> 2.6.36-rc7 #12
> Oct 13 20:57:30 bes kernel: Call Trace:
> Oct 13 20:57:30 bes kernel: [<c1028640>] ? local_bh_enable+0x60/0x90
> Oct 13 20:57:30 bes kernel: [<c1028640>] ? local_bh_enable+0x60/0x90
> Oct 13 20:57:30 bes kernel: [<c1023a1e>] ? warn_slowpath_common+0x7e/0xc0
> Oct 13 20:57:30 bes kernel: [<c1028640>] ? local_bh_enable+0x60/0x90
> Oct 13 20:57:30 bes kernel: [<c1023a7b>] ? warn_slowpath_null+0x1b/0x20
> Oct 13 20:57:30 bes kernel: [<c1028640>] ? local_bh_enable+0x60/0x90
> Oct 13 20:57:30 bes kernel: [<c12a5bb5>] ? unix_release_sock+0x45/0x1f0
> Oct 13 20:57:30 bes kernel: [<c123dc4a>] ? sock_release+0x1a/0x80
> Oct 13 20:57:30 bes kernel: [<c123dcbf>] ? sock_close+0xf/0x30
> Oct 13 20:57:30 bes kernel: [<c1089cd9>] ? fput+0xb9/0x200
> Oct 13 20:57:30 bes kernel: [<c1086f7e>] ? filp_close+0x3e/0x70
> Oct 13 20:57:30 bes kernel: [<c10254b2>] ? put_files_struct+0x62/0xb0
> Oct 13 20:57:30 bes kernel: [<c1026c47>] ? do_exit+0x567/0x630
> Oct 13 20:57:30 bes kernel: [<c12ba037>] ? printk+0x17/0x20
> Oct 13 20:57:30 bes kernel: [<c1005477>] ? oops_end+0x87/0x90
> Oct 13 20:57:30 bes kernel: [<c12ba037>] ? printk+0x17/0x20
> Oct 13 20:57:30 bes kernel: [<c10194a2>] ? no_context+0xc2/0x160
> Oct 13 20:57:30 bes kernel: [<c10195a5>] ? __bad_area_nosemaphore+0x65/0x180
> Oct 13 20:57:30 bes kernel: [<c1249a3b>] ? dev_txq_stats_fold+0x8b/0xf0
> Oct 13 20:57:30 bes kernel: [<c117dc80>] ? __nla_reserve+0x40/0x60
> Oct 13 20:57:30 bes kernel: [<c1255c33>] ? rtnl_fill_ifinfo+0x413/0x8d0
> Oct 13 20:57:30 bes kernel: [<c101971a>] ? bad_area+0x3a/0x50
> Oct 13 20:57:30 bes kernel: [<c1019b8e>] ? do_page_fault+0x33e/0x390
> Oct 13 20:57:30 bes kernel: [<c101e6ab>] ? wakeup_preempt_entity+0x3b/0xa0
> Oct 13 20:57:30 bes kernel: [<c101e79a>] ? check_preempt_wakeup+0x8a/0xe0
> Oct 13 20:57:30 bes kernel: [<c1097675>] ? pollwake+0x65/0x80
> Oct 13 20:57:30 bes kernel: [<c1021170>] ? default_wake_function+0x0/0x10
> Oct 13 20:57:30 bes kernel: [<c1019850>] ? do_page_fault+0x0/0x390
> Oct 13 20:57:30 bes kernel: [<c12bbcf0>] ? error_code+0x58/0x60
> Oct 13 20:57:30 bes kernel: [<c1019850>] ? do_page_fault+0x0/0x390
> Oct 13 20:57:30 bes kernel: [<c1241674>] ? skb_dequeue+0x24/0x40
> Oct 13 20:57:30 bes kernel: [<c1243a94>] ? skb_queue_purge+0x14/0x30
> Oct 13 20:57:30 bes kernel: [<f866825b>] ? ppp_destroy_channel+0x1b/0x50
> [ppp_generic]
> Oct 13 20:57:30 bes kernel: [<f86761cb>] ? pppox_unbind_sock+0x1b/0x24 [pppox]
> Oct 13 20:57:30 bes kernel: [<f8683c97>] ? pppoe_connect+0x87/0x4b0 [pppoe]
> Oct 13 20:57:30 bes kernel: [<c12ba92d>] ? schedule_timeout+0xfd/0x150
> Oct 13 20:57:30 bes kernel: [<c123d4d4>] ? sys_connect+0x84/0xd0
> Oct 13 20:57:30 bes kernel: [<c10ba730>] ? do_lock_file_wait+0x30/0xf0
> Oct 13 20:57:30 bes kernel: [<c10ba9f9>] ? fcntl_setlk+0x59/0x1b0
> Oct 13 20:57:30 bes kernel: [<c123e5a4>] ? sys_socketcall+0x294/0x2c0
> Oct 13 20:57:30 bes kernel: [<c1002a10>] ? sysenter_do_call+0x12/0x26
> Oct 13 20:57:30 bes kernel: ---[ end trace 4914adf67d1ace27 ]---
> 
> Some other information:
> /proc/version:
> Linux version 2.6.36-rc7 (root@bes) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #12
> Sun Oct 10 21:12:58 CEST 2010
> 
> ver_linux:
> Linux bes 2.6.36-rc7 #12 Sun Oct 10 21:12:58 CEST 2010 i686 GNU/Linux
> 
> Gnu C                  4.4.5
> Gnu make               3.81
> binutils               2.20.1
> util-linux             2.17.2
> mount                  support
> module-init-tools      3.12
> e2fsprogs              1.41.12
> PPP                    2.4.5
> Linux C Library        2.11.2
> Dynamic linker (ldd)   2.11.2
> Procps                 3.2.8
> Net-tools              1.60
> Console-tools          0.2.3
> Sh-utils               8.5
> Modules Loaded         cpufreq_conservative cpufreq_userspace cpufreq_powersave
> fuse ppp_generic slhc ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4
> nf_defrag_ipv4 xt_state nf_conntrack iptable_filter ip_tables x_tables loop
> sd_mod usb_storage usblp i2c_viapro uhci_hcd fan i2c_core ehci_hcd button
> 
> cpuinfo:
> processor       : 0
> vendor_id       : CentaurHauls
> cpu family      : 6
> model           : 13
> model name      : VIA Eden Processor 1600MHz
> stepping        : 0
> cpu MHz         : 800.000
> cache size      : 128 KB
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 1
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge cmov pat
> clflush acpi mmx fxsr sse sse2 tm nx pni est tm2 xtpr rng rng_en ace ace_en
> ace2 ace2_en phe phe_en pmm pmm_en
> bogomips        : 1599.76
> clflush size    : 64
> cache_alignment : 64
> address sizes   : 36 bits physical, 32 bits virtual
> power management:
> 
> If anything else is needed I'd be happy to assist.
> 


^ permalink raw reply

* [PATCH] via-velocity: forced 1000 Mbps mode support.
From: Francois Romieu @ 2010-10-13 19:26 UTC (permalink / raw)
  To: David Lv
  Cc: netdev, DavidLv, ShirleyHu, AndersMa, David S. Miller,
	Seguier Regis
In-Reply-To: <AANLkTinn8UQfOoPgf=13jfvD5Ld6p4GhT7jpyYmXKT9X@mail.gmail.com>

Full duplex only. Half duplex 1000 Mbps is not supported.

Signed-off-by: David Lv <DavidLv@viatech.com.cn>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Seguier Regis <rseguier@e-teleport.net>

---
 David (Lv), your mail agent apparently wrapped some more-than-80-columns lines
 in the patch, thus making it unusable. I fixed it.

 drivers/net/via-velocity.c |   82 ++++++++++++++++++++++++++++++++++++++++---
 drivers/net/via-velocity.h |    5 ++-
 2 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index f534123..b21a3d9 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -312,13 +312,14 @@ VELOCITY_PARAM(flow_control, "Enable flow control ability");

 #define MED_LNK_DEF 0
 #define MED_LNK_MIN 0
-#define MED_LNK_MAX 4
+#define MED_LNK_MAX 5
 /* speed_duplex[] is used for setting the speed and duplex mode of NIC.
    0: indicate autonegotiation for both speed and duplex mode
    1: indicate 100Mbps half duplex mode
    2: indicate 100Mbps full duplex mode
    3: indicate 10Mbps half duplex mode
    4: indicate 10Mbps full duplex mode
+   5: indicate 1000Mbps full duplex mode

    Note:
    if EEPROM have been set to the force mode, this option is ignored
@@ -617,6 +618,9 @@ static u32 velocity_get_opt_media_mode(struct velocity_info *vptr)
 	case SPD_DPX_10_HALF:
 		status = VELOCITY_SPEED_10;
 		break;
+	case SPD_DPX_1000_FULL:
+		status = VELOCITY_SPEED_1000 | VELOCITY_DUPLEX_FULL;
+		break;
 	}
 	vptr->mii_status = status;
 	return status;
@@ -922,6 +926,7 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 		/* enable AUTO-NEGO mode */
 		mii_set_auto_on(vptr);
 	} else {
+		u16 CTRL1000;
 		u16 ANAR;
 		u8 CHIPGCR;

@@ -936,7 +941,11 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 		BYTE_REG_BITS_ON(CHIPGCR_FCMODE, &regs->CHIPGCR);

 		CHIPGCR = readb(&regs->CHIPGCR);
-		CHIPGCR &= ~CHIPGCR_FCGMII;
+
+		if (mii_status & VELOCITY_SPEED_1000)
+			CHIPGCR |= CHIPGCR_FCGMII;
+		else
+			CHIPGCR &= ~CHIPGCR_FCGMII;

 		if (mii_status & VELOCITY_DUPLEX_FULL) {
 			CHIPGCR |= CHIPGCR_FCFDX;
@@ -952,7 +961,13 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 				BYTE_REG_BITS_ON(TCR_TB2BDIS, &regs->TCR);
 		}

-		MII_REG_BITS_OFF(ADVERTISE_1000FULL | ADVERTISE_1000HALF, MII_CTRL1000, vptr->mac_regs);
+		velocity_mii_read(vptr->mac_regs, MII_CTRL1000, &CTRL1000);
+		CTRL1000 &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
+		if ((mii_status & VELOCITY_SPEED_1000) &&
+		    (mii_status & VELOCITY_DUPLEX_FULL)) {
+			CTRL1000 |= ADVERTISE_1000FULL;
+		}
+		velocity_mii_write(vptr->mac_regs, MII_CTRL1000, CTRL1000);

 		if (!(mii_status & VELOCITY_DUPLEX_FULL) && (mii_status & VELOCITY_SPEED_10))
 			BYTE_REG_BITS_OFF(TESTCFG_HBDIS, &regs->TESTCFG);
@@ -967,7 +982,7 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 				ANAR |= ADVERTISE_100FULL;
 			else
 				ANAR |= ADVERTISE_100HALF;
-		} else {
+		} else if (mii_status & VELOCITY_SPEED_10) {
 			if (mii_status & VELOCITY_DUPLEX_FULL)
 				ANAR |= ADVERTISE_10FULL;
 			else
@@ -1013,6 +1028,9 @@ static void velocity_print_link_status(struct velocity_info *vptr)
 	} else {
 		VELOCITY_PRT(MSG_LEVEL_INFO, KERN_NOTICE "%s: Link forced", vptr->dev->name);
 		switch (vptr->options.spd_dpx) {
+		case SPD_DPX_1000_FULL:
+			VELOCITY_PRT(MSG_LEVEL_INFO, " speed 1000M bps full duplex\n");
+			break;
 		case SPD_DPX_100_HALF:
 			VELOCITY_PRT(MSG_LEVEL_INFO, " speed 100M bps half duplex\n");
 			break;
@@ -3170,6 +3188,37 @@ static int velocity_get_settings(struct net_device *dev, struct ethtool_cmd *cmd
 			SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Half |
 			SUPPORTED_1000baseT_Full;
+
+	cmd->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
+	if (vptr->options.spd_dpx == SPD_DPX_AUTO) {
+		cmd->advertising |=
+			ADVERTISED_10baseT_Half |
+			ADVERTISED_10baseT_Full |
+			ADVERTISED_100baseT_Half |
+			ADVERTISED_100baseT_Full |
+			ADVERTISED_1000baseT_Half |
+			ADVERTISED_1000baseT_Full;
+	} else {
+		switch (vptr->options.spd_dpx) {
+		case SPD_DPX_1000_FULL:
+			cmd->advertising |= ADVERTISED_1000baseT_Full;
+			break;
+		case SPD_DPX_100_HALF:
+			cmd->advertising |= ADVERTISED_100baseT_Half;
+			break;
+		case SPD_DPX_100_FULL:
+			cmd->advertising |= ADVERTISED_100baseT_Full;
+			break;
+		case SPD_DPX_10_HALF:
+			cmd->advertising |= ADVERTISED_10baseT_Half;
+			break;
+		case SPD_DPX_10_FULL:
+			cmd->advertising |= ADVERTISED_10baseT_Full;
+			break;
+		default:
+			break;
+		}
+	}
 	if (status & VELOCITY_SPEED_1000)
 		cmd->speed = SPEED_1000;
 	else if (status & VELOCITY_SPEED_100)
@@ -3200,14 +3249,35 @@ static int velocity_set_settings(struct net_device *dev, struct ethtool_cmd *cmd
 	curr_status &= (~VELOCITY_LINK_FAIL);

 	new_status |= ((cmd->autoneg) ? VELOCITY_AUTONEG_ENABLE : 0);
+	new_status |= ((cmd->speed == SPEED_1000) ? VELOCITY_SPEED_1000 : 0);
 	new_status |= ((cmd->speed == SPEED_100) ? VELOCITY_SPEED_100 : 0);
 	new_status |= ((cmd->speed == SPEED_10) ? VELOCITY_SPEED_10 : 0);
 	new_status |= ((cmd->duplex == DUPLEX_FULL) ? VELOCITY_DUPLEX_FULL : 0);

-	if ((new_status & VELOCITY_AUTONEG_ENABLE) && (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE)))
+	if ((new_status & VELOCITY_AUTONEG_ENABLE) &&
+	    (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE))) {
 		ret = -EINVAL;
-	else
+	} else {
+		enum speed_opt spd_dpx;
+
+		if (new_status & VELOCITY_AUTONEG_ENABLE)
+			spd_dpx = SPD_DPX_AUTO;
+		else if ((new_status & VELOCITY_SPEED_1000) &&
+			 (new_status & VELOCITY_DUPLEX_FULL)) {
+			spd_dpx = SPD_DPX_1000_FULL;
+		} else if (new_status & VELOCITY_SPEED_100)
+			spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+				SPD_DPX_100_FULL : SPD_DPX_100_HALF;
+		else if (new_status & VELOCITY_SPEED_10)
+			spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+				SPD_DPX_10_FULL : SPD_DPX_10_HALF;
+		else
+			return -EOPNOTSUPP;
+
+		vptr->options.spd_dpx = spd_dpx;
+
 		velocity_set_media_mode(vptr, new_status);
+	}

 	return ret;
 }
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index f7b33ae..df55f6c 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -848,7 +848,7 @@ enum  velocity_owner {
  *	Bits in CHIPGCR register
  */

-#define CHIPGCR_FCGMII      0x80
+#define CHIPGCR_FCGMII      0x80	/* enable GMII mode */
 #define CHIPGCR_FCFDX       0x40
 #define CHIPGCR_FCRESV      0x20
 #define CHIPGCR_FCMODE      0x10
@@ -1390,7 +1390,8 @@ enum speed_opt {
 	SPD_DPX_100_HALF = 1,
 	SPD_DPX_100_FULL = 2,
 	SPD_DPX_10_HALF = 3,
-	SPD_DPX_10_FULL = 4
+	SPD_DPX_10_FULL = 4,
+	SPD_DPX_1000_FULL = 5
 };

 enum velocity_init_type {
--
1.7.2.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox