Netdev List
 help / color / mirror / Atom feed
* [net-next 5/5] igb: Update igb version to 4.1.2
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1354860695-27039-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 60a2380..06513d9 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -60,8 +60,8 @@
 #include "igb.h"
 
 #define MAJ 4
-#define MIN 0
-#define BUILD 17
+#define MIN 1
+#define BUILD 2
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
 char igb_driver_name[] = "igb";
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 4/5] igb: release already assigned MSI-X interrupts if setup fails
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Stefan Assmann, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1354860695-27039-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Stefan Assmann <sassmann@kpanic.de>

During MSI-X setup the system might run out of vectors. If this happens the
already assigned vectors for this NIC should be freed before trying the
disable MSI-X. Failing to do so results in the following oops.

kernel BUG at drivers/pci/msi.c:341!
[...]
Call Trace:
 [<ffffffff8128f39d>] pci_disable_msix+0x3d/0x60
 [<ffffffffa037d1ce>] igb_reset_interrupt_capability+0x27/0x5c [igb]
 [<ffffffffa037d229>] igb_clear_interrupt_scheme+0x26/0x2d [igb]
 [<ffffffffa0384268>] igb_request_irq+0x73/0x297 [igb]
 [<ffffffffa0384554>] __igb_open+0xc8/0x223 [igb]
 [<ffffffffa0384815>] igb_open+0x13/0x15 [igb]
 [<ffffffff8144592f>] __dev_open+0xbf/0x120
 [<ffffffff81443e51>] __dev_change_flags+0xa1/0x180
 [<ffffffff81445828>] dev_change_flags+0x28/0x70
 [<ffffffff814af537>] devinet_ioctl+0x5b7/0x620
 [<ffffffff814b01c8>] inet_ioctl+0x88/0xa0
 [<ffffffff8142e8a0>] sock_do_ioctl+0x30/0x70
 [<ffffffff8142ecf2>] sock_ioctl+0x72/0x270
 [<ffffffff8118062c>] do_vfs_ioctl+0x8c/0x340
 [<ffffffff81180981>] sys_ioctl+0xa1/0xb0
 [<ffffffff815161a9>] system_call_fastpath+0x16/0x1b
Code: 48 89 df e8 1f 40 ed ff 4d 39 e6 49 8b 45 10 75 b6 48 83 c4 18 5b 41 5c 41 5d 41 5e 41 5f c9 c3 48 8b 7b 20 e8 3e 91 db ff eb ae <0f> 0b eb fe 0f 1f 84 00 00 00 00 00 55 48 89 e5 0f 1f 44 00 00
RIP  [<ffffffff8128e144>] free_msi_irqs+0x124/0x130
 RSP <ffff880037503bd8>

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 615b68c..60a2380 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -832,17 +832,18 @@ static int igb_request_msix(struct igb_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_hw *hw = &adapter->hw;
-	int i, err = 0, vector = 0;
+	int i, err = 0, vector = 0, free_vector = 0;
 
 	err = request_irq(adapter->msix_entries[vector].vector,
 	                  igb_msix_other, 0, netdev->name, adapter);
 	if (err)
-		goto out;
-	vector++;
+		goto err_out;
 
 	for (i = 0; i < adapter->num_q_vectors; i++) {
 		struct igb_q_vector *q_vector = adapter->q_vector[i];
 
+		vector++;
+
 		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
 
 		if (q_vector->rx.ring && q_vector->tx.ring)
@@ -861,13 +862,22 @@ static int igb_request_msix(struct igb_adapter *adapter)
 		                  igb_msix_ring, 0, q_vector->name,
 		                  q_vector);
 		if (err)
-			goto out;
-		vector++;
+			goto err_free;
 	}
 
 	igb_configure_msix(adapter);
 	return 0;
-out:
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
 	return err;
 }
 
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 3/5] igb: remove duplicate code for fallback interrupt initialization
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Stefan Assmann, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1354860695-27039-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Stefan Assmann <sassmann@kpanic.de>

Given a small change to igb_init_interrupt_scheme() the function fits
igb_request_irq() for MSI/legacy interrupts initialization as well, instead of
duplicating most of its code there.

Also adding a missing igb_configure() to igb_request_irq() for MSI fallback
to work properly.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index de4ebb3..615b68c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -122,6 +122,7 @@ static void igb_remove(struct pci_dev *pdev);
 static int igb_sw_init(struct igb_adapter *);
 static int igb_open(struct net_device *);
 static int igb_close(struct net_device *);
+static void igb_configure(struct igb_adapter *);
 static void igb_configure_tx(struct igb_adapter *);
 static void igb_configure_rx(struct igb_adapter *);
 static void igb_clean_all_tx_rings(struct igb_adapter *);
@@ -948,11 +949,14 @@ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
  * Attempt to configure interrupts using the best available
  * capabilities of the hardware and kernel.
  **/
-static void igb_set_interrupt_capability(struct igb_adapter *adapter)
+static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
 {
 	int err;
 	int numvecs, i;
 
+	if (!msix)
+		goto msi_only;
+
 	/* Number of supported queues. */
 	adapter->num_rx_queues = adapter->rss_queues;
 	if (adapter->vfs_allocated_count)
@@ -1199,12 +1203,12 @@ err_out:
  *
  * This function initializes the interrupts and allocates all of the queues.
  **/
-static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
+static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	int err;
 
-	igb_set_interrupt_capability(adapter);
+	igb_set_interrupt_capability(adapter, msix);
 
 	err = igb_alloc_q_vectors(adapter);
 	if (err) {
@@ -1240,20 +1244,15 @@ static int igb_request_irq(struct igb_adapter *adapter)
 		/* fall back to MSI */
 		igb_free_all_tx_resources(adapter);
 		igb_free_all_rx_resources(adapter);
+
 		igb_clear_interrupt_scheme(adapter);
-		if (!pci_enable_msi(pdev))
-			adapter->flags |= IGB_FLAG_HAS_MSI;
-		adapter->num_tx_queues = 1;
-		adapter->num_rx_queues = 1;
-		adapter->num_q_vectors = 1;
-		err = igb_alloc_q_vectors(adapter);
-		if (err) {
-			dev_err(&pdev->dev,
-			        "Unable to allocate memory for vectors\n");
+		err = igb_init_interrupt_scheme(adapter, false);
+		if (err)
 			goto request_done;
-		}
+
 		igb_setup_all_tx_resources(adapter);
 		igb_setup_all_rx_resources(adapter);
+		igb_configure(adapter);
 	}
 
 	igb_assign_vector(adapter->q_vector[0], 0);
@@ -2444,7 +2443,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
 				GFP_ATOMIC);
 
 	/* This call may decrease the number of queues */
-	if (igb_init_interrupt_scheme(adapter)) {
+	if (igb_init_interrupt_scheme(adapter, true)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
@@ -6818,7 +6817,7 @@ static int igb_resume(struct device *dev)
 	pci_enable_wake(pdev, PCI_D3hot, 0);
 	pci_enable_wake(pdev, PCI_D3cold, 0);
 
-	if (igb_init_interrupt_scheme(adapter)) {
+	if (igb_init_interrupt_scheme(adapter, true)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 2/5] ixgbe: check whether thermal sensor is enabled.
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1354860695-27039-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The X540's internal thermal sensor should not be enabled for all devices, but
only those devices which enable it in the NVM image. It is expected that
actively cooled devices will have it enabled, but passively cooled devices might
not want it enabled. This is due to passively cooled devices operating very near
the thermal threshold, sometimes within the margin of error of the thermal
sensor. Thus these devices may not be good candidates for using the thermal
sensor.

This patch uses the enabled bit in the FWSM register to check whether we should
be enabling the thermal sensor, and only sets the THERMAL_SENSOR_CAPABLE flag
for those devices which have it enabled.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 ++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b7bc22b..fb165b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4467,6 +4467,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned int rss;
+	u32 fwsm;
 #ifdef CONFIG_IXGBE_DCB
 	int j;
 	struct tc_configuration *tc;
@@ -4490,7 +4491,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 		adapter->max_q_vectors = MAX_Q_VECTORS_82598;
 		break;
 	case ixgbe_mac_X540:
-		adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
+		if (fwsm & IXGBE_FWSM_TS_ENABLED)
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
 	case ixgbe_mac_82599EB:
 		adapter->max_q_vectors = MAX_Q_VECTORS_82599;
 		adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index cab302a..9cd8a13 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1954,6 +1954,8 @@ enum {
 #define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000
 #define IXGBE_MRQC_L3L4TXSWEN            0x00008000
 
+#define IXGBE_FWSM_TS_ENABLED	0x1
+
 /* Queue Drop Enable */
 #define IXGBE_QDE_ENABLE     0x00000001
 #define IXGBE_QDE_IDX_MASK   0x00007F00
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 1/5] ixgbe: Use is_valid_ether_addr
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Joe Perches, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1354860695-27039-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Joe Perches <joe@perches.com>

Use the normal kernel test instead of a module specific one.

Signed-off-by: Joe Perches <joe@perches.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c  |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 26 +------------------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   |  9 ---------
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |  2 +-
 6 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index e75f5a4..1073aea 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1078,7 +1078,7 @@ mac_reset_top:
 	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
 
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
-	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
 		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 5af1eeb..5e68afd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1783,29 +1783,6 @@ s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_validate_mac_addr - Validate MAC address
- *  @mac_addr: pointer to MAC address.
- *
- *  Tests a MAC address to ensure it is a valid Individual Address
- **/
-s32 ixgbe_validate_mac_addr(u8 *mac_addr)
-{
-	s32 status = 0;
-
-	/* Make sure it is not a multicast address */
-	if (IXGBE_IS_MULTICAST(mac_addr))
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-	/* Not a broadcast address */
-	else if (IXGBE_IS_BROADCAST(mac_addr))
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-	/* Reject the zero address */
-	else if (is_zero_ether_addr(mac_addr))
-		status = IXGBE_ERR_INVALID_MAC_ADDR;
-
-	return status;
-}
-
-/**
  *  ixgbe_set_rar_generic - Set Rx address register
  *  @hw: pointer to hardware structure
  *  @index: Receive address register to write
@@ -1909,8 +1886,7 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
 	 * to the permanent address.
 	 * Otherwise, use the permanent address from the eeprom.
 	 */
-	if (ixgbe_validate_mac_addr(hw->mac.addr) ==
-	    IXGBE_ERR_INVALID_MAC_ADDR) {
+	if (!is_valid_ether_addr(hw->mac.addr)) {
 		/* Get the MAC address from the RAR0 for later reference */
 		hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 1b65b6c..f7a0970 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -81,7 +81,6 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
 s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw);
 void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
 
-s32 ixgbe_validate_mac_addr(u8 *mac_addr);
 s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
 void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
 s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2fa16de..b7bc22b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7444,7 +7444,7 @@ static int ixgbe_probe(struct pci_dev *pdev,
 	memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
 	memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
 
-	if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
+	if (!is_valid_ether_addr(netdev->perm_addr)) {
 		e_dev_err("invalid MAC address\n");
 		err = -EIO;
 		goto err_sw_init;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 21915e2..cab302a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1834,15 +1834,6 @@ enum {
 /* Number of 100 microseconds we wait for PCI Express master disable */
 #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800
 
-/* Check whether address is multicast.  This is little-endian specific check.*/
-#define IXGBE_IS_MULTICAST(Address) \
-                (bool)(((u8 *)(Address))[0] & ((u8)0x01))
-
-/* Check whether an address is broadcast. */
-#define IXGBE_IS_BROADCAST(Address)                      \
-                ((((u8 *)(Address))[0] == ((u8)0xff)) && \
-                (((u8 *)(Address))[1] == ((u8)0xff)))
-
 /* RAH */
 #define IXGBE_RAH_VIND_MASK     0x003C0000
 #define IXGBE_RAH_VIND_SHIFT    18
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index de4da52..c73b929 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -152,7 +152,7 @@ mac_reset_top:
 	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
 
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
-	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
 		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
-- 
1.7.11.7

^ permalink raw reply related

* [net-next 0/5][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2012-12-07  6:11 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

This series contains updates to igb and ixgbe.

The following are changes since commit b93196dc5af7729ff7cc50d3d322ab1a364aa14f:
  net: fix some compiler warning in net/core/neighbour.c
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next master

Carolyn Wyborny (1):
  igb: Update igb version to 4.1.2

Jacob Keller (1):
  ixgbe: check whether thermal sensor is enabled.

Joe Perches (1):
  ixgbe: Use is_valid_ether_addr

Stefan Assmann (2):
  igb: remove duplicate code for fallback interrupt initialization
  igb: release already assigned MSI-X interrupts if setup fails

 drivers/net/ethernet/intel/igb/igb_main.c       | 55 ++++++++++++++-----------
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c  |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 26 +-----------
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  7 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   | 11 +----
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |  2 +-
 7 files changed, 42 insertions(+), 62 deletions(-)

-- 
1.7.11.7

^ permalink raw reply

* Re: [net-next PATCH] tun: correctly report an error in tun_flow_init()
From: Jason Wang @ 2012-12-07  5:51 UTC (permalink / raw)
  To: Paul Moore; +Cc: netdev
In-Reply-To: <20121206154838.6073.17102.stgit@localhost>

On Thursday, December 06, 2012 10:48:38 AM Paul Moore wrote:
> On error, the error code from tun_flow_init() is lost inside
> tun_set_iff(), this patch fixes this by assigning the tun_flow_init()
> error code to the "err" variable which is returned by
> the tun_flow_init() function on error.
> 
> Signed-off-by: Paul Moore <pmoore@redhat.com>

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks Paul.
> ---
>  drivers/net/tun.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index a1b2389..14a0454 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1591,7 +1591,8 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr)
> 
>  		tun_net_init(dev);
> 
> -		if (tun_flow_init(tun))
> +		err = tun_flow_init(tun);
> +		if (err < 0)
>  			goto err_free_dev;
> 
>  		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH v2 3/3] tun: fix LSM/SELinux labeling of tun/tap devices
From: Jason Wang @ 2012-12-07  5:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Paul Moore, netdev, linux-security-module, selinux
In-Reply-To: <20121206205716.GA6576@redhat.com>

On Thursday, December 06, 2012 10:57:16 PM Michael S. Tsirkin wrote:
> On Thu, Dec 06, 2012 at 11:56:45AM -0500, Paul Moore wrote:
> > The SETQUEUE/tun_socket:create_queue permissions do not yet exist in any
> > released SELinux policy as we are just now adding them with this patchset.
> > With current policies loaded into a kernel with this patchset applied the
> > SETQUEUE/tun_socket:create_queue permission would be treated according to
> > the policy's unknown permission setting.
> 
> OK I think we need to rethink what we are doing here: what you sent
> addresses the problem as stated but I think we mis-stated it.  Let me
> try to restate the problem: it is not just selinux problem.  Let's
> assume qemu wants to use tun, I (libvirt) don't want to run it as root.
> 
> 1. TUNSETIFF: I can open tun, attach an fd and pass it to qemu.
> Now, qemu does not invoke TUNSETIFF so it can run without
> kernel priveledges.
> 
> 2. TUNSETQUEUE - I can open tun and attach a queue but this
> is not what is needed since this automatically switches
> to multiqueue mode - we want to change number of queues
> on the fly.
> So qemu needs to be allowed to run TUNSETQUEUE.
> Since this checks tun_not_capable(tun) we would need
> to give qemu these priveledges, and we want to avoid this
> (I can go into why if it's not obvious).
> 
> How can we slove this?
> I don't see a way without extending the interface.
> Here's a simple way to extend it: pass a flag to TUNSETQUEUE
> that enables/disables TX on this queue.
> If TX is disabled, ignore this queue for flow steering decisions.
> Allow TUNSETQUEUE for a non priveledged user if it
> it already bound to the currect tun and only changes this flag.
> 
> 
> Now I open tun and SETQUEUE with TX disabled flag. Pass it to qemu.
> qemu calls SETQUEUE with TX enabled flag.

So the check of CAP_NET_ADMIN is bypassed in the situation.  And new selinux 
policy is then needed for this new flag.

The only concern with this is whether it could be treated as a kind of host 
network device configuration and need CAP_NET_ADMIN capability. (But it looks 
the only method that we could let qemu change the queue used by tun).
> 
> Jason? Want to try implementing and see what people think?

Sure, will draft a path based on this.

^ permalink raw reply

* Re: [RFC V3 1/2] I'dcfg80211: Move the definition of struct mac_address up
From: Joe Perches @ 2012-12-07  5:38 UTC (permalink / raw)
  To: Vasanthakumar Thiagarajan
  Cc: linville-2XuSBdqkA4R54TAoqtyWWQ, johannes-cdvu00un1VgdHxzADdlk8Q,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, netdev
In-Reply-To: <1354853850-10537-1-git-send-email-vthiagar-A+ZNKFmMK5xy9aJCnZT0Uw@public.gmane.org>

On Fri, 2012-12-07 at 09:47 +0530, Vasanthakumar Thiagarajan wrote:
> struct mac_address will be used by ACL related configuration ops.

I'd prefer that any new code _not_ use struct mac_address
but use u8 addr[ETH_ALEN] instead.

That's the most common style in the kernel.

$ git grep -E "\bu8\s+\w+\s*\[\s*ETH_ALEN\s*\]" | wc -l
749
$ git grep -E "\bstruct\s+mac_address\b" | wc -l
13

There are also uses of char and unsigned char

$ git grep -E "\bchar\s+\w+\s*\[\s*ETH_ALEN\s*\]" | wc -l
121

Some of those are ancient, but it'd be good to
be consistent throughout the kernel.

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH v2 3/3] tun: fix LSM/SELinux labeling of tun/tap devices
From: Jason Wang @ 2012-12-07  5:29 UTC (permalink / raw)
  To: Paul Moore; +Cc: netdev, linux-security-module, selinux, mst
In-Reply-To: <10346047.LWlqqejLmO@sifl>

On Thursday, December 06, 2012 10:36:11 AM Paul Moore wrote:
> On Thursday, December 06, 2012 06:29:54 PM Jason Wang wrote:
> > On Wednesday, December 05, 2012 03:26:19 PM Paul Moore wrote:
> > > This patch corrects some problems with LSM/SELinux that were introduced
> > > with the multiqueue patchset.  The problem stems from the fact that the
> > > multiqueue work changed the relationship between the tun device and its
> > > associated socket; before the socket persisted for the life of the
> > > device, however after the multiqueue changes the socket only persisted
> > > for the life of the userspace connection (fd open).  For non-persistent
> > > devices this is not an issue, but for persistent devices this can cause
> > > the tun device to lose its SELinux label.
> > > 
> > > We correct this problem by adding an opaque LSM security blob to the
> > > tun device struct which allows us to have the LSM security state, e.g.
> > > SELinux labeling information, persist for the lifetime of the tun
> > > device.  In the process we tweak the LSM hooks to work with this new
> > > approach to TUN device/socket labeling and introduce a new LSM hook,
> > > security_tun_dev_create_queue(), to approve requests to create a new
> > > TUN queue via TUNSETQUEUE.
> > > 
> > > The SELinux code has been adjusted to match the new LSM hooks, the
> > > other LSMs do not make use of the LSM TUN controls.  This patch makes
> > > use of the recently added "tun_socket:create_queue" permission to
> > > restrict access to the TUNSETQUEUE operation.  On older SELinux
> > > policies which do not define the "tun_socket:create_queue" permission
> > > the access control decision for TUNSETQUEUE will be handled according
> > > to the SELinux policy's unknown permission setting.
> > > 
> > > Signed-off-by: Paul Moore <pmoore@redhat.com>
> 
> ...
> 
> > > @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct
> > > sock
> > > *sk) * cause confusion to the TUN user that had no idea network labeling
> > > *
> > > protocols were being used */
> > > 
> > > -	/* see the comments in selinux_tun_dev_create() about why we ...
> > > -
> > > -	sksec->sid = current_sid();
> > > +	sksec->sid = tunsec->sid;
> > 
> > Since both tun_set_iff() and tun_set_queue() would call this. I wonder
> > when
> > it is called by tun_set_queue() we need some checking just like what we
> > done in v1, otherwise it's unconditionally in TUNSETQUEUE. Or we can add
> > them in selinux_tun_dev_create_queue()?
> 
> In all the cases that call tun_attach() we have a new socket which needs to
> be labeled based on the tun->security label, yes?  That is what the

Yes.
> security_tun_dev_attach() code does, there is no need for access control at
> this point as the operation has already been authorized by either
> security_tun_dev_create() (new device), security_tun_dev_create_queue() (new
> queue), or security_tun_dev_open() (opening persistent device).
> 
> I think we are all set, or am I missing something?
> 

Looks fine, thanks for the explanation.
> > >  	sksec->sclass = SECCLASS_TUN_SOCKET;
> > > 
> > > +
> > > +	return 0;
> > > 
> > >  }

^ permalink raw reply

* Re: [PATCH] net: core: fix unused variable sparse warning
From: David Miller @ 2012-12-07  3:25 UTC (permalink / raw)
  To: dinggnu; +Cc: ebiederm, edumazet, xemul, netdev, linux-kernel
In-Reply-To: <20121207001545.GE2510@gmail.com>

From: Cong Ding <dinggnu@gmail.com>
Date: Fri, 7 Dec 2012 00:15:47 +0000

> On Fri, Dec 07, 2012 at 12:06:44AM +0000, Cong Ding wrote:
>> the variables zero and unres_qlen_max are only used when CONFIG_SYSCTL is
>> defined, otherwise it causes the following sparse warning when we turn on
>> CONFIG_SYSCTL.
> sorry for disturbing again, the sparse warning is
> net/core/neighbour.c:65:12: warning: ‘zero’ defined but not used [-Wunused-variable]
> net/core/neighbour.c:66:12: warning: ‘unres_qlen_max’ defined but not used [-Wunused-variable]
> 
> it happens if we turn off CONFIG_SYSCTL.

A patch for this has already in my tree for nearly a whole day.

^ permalink raw reply

* [PATCH net-next v4] bridge: export multicast database via netlink
From: Cong Wang @ 2012-12-07  3:23 UTC (permalink / raw)
  To: netdev
  Cc: bridge, Cong Wang, Herbert Xu, Stephen Hemminger, David S. Miller,
	Thomas Graf, Jesper Dangaard Brouer

From: Cong Wang <amwang@redhat.com>

V4: remove some useless #include
    some coding style fix

V3: drop debugging printk's
    update selinux perm table as well

V2: drop patch 1/2, export ifindex directly
    Redesign netlink attributes
    Improve netlink seq check
    Handle IPv6 addr as well

This patch exports bridge multicast database via netlink
message type RTM_GETMDB. Similar to fdb, but currently bridge-specific.
We may need to support modify multicast database too (RTM_{ADD,DEL}MDB).

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
    
---
 include/uapi/linux/if_bridge.h |   55 ++++++++++++++
 include/uapi/linux/rtnetlink.h |    3 +
 net/bridge/Makefile            |    2 +-
 net/bridge/br_mdb.c            |  157 ++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_multicast.c      |    2 +
 net/bridge/br_private.h        |    2 +
 security/selinux/nlmsgtab.c    |    1 +
 7 files changed, 221 insertions(+), 1 deletions(-)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b388579..9a0f6ff 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -116,4 +116,59 @@ enum {
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ *     [MDBA_MDB_ENTRY] = {
+ *         [MDBA_MDB_ENTRY_INFO]
+ *     }
+ * }
+ * [MDBA_ROUTER] = {
+ *    [MDBA_ROUTER_PORT]
+ * }
+ */
+enum {
+	MDBA_UNSPEC,
+	MDBA_MDB,
+	MDBA_ROUTER,
+	__MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+	MDBA_MDB_UNSPEC,
+	MDBA_MDB_ENTRY,
+	__MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
+enum {
+	MDBA_MDB_ENTRY_UNSPEC,
+	MDBA_MDB_ENTRY_INFO,
+	__MDBA_MDB_ENTRY_MAX,
+};
+#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1)
+
+enum {
+	MDBA_ROUTER_UNSPEC,
+	MDBA_ROUTER_PORT,
+	__MDBA_ROUTER_MAX,
+};
+#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1)
+
+struct br_port_msg {
+	__u32 ifindex;
+};
+
+struct br_mdb_entry {
+	__u32 ifindex;
+	struct {
+		union {
+			__be32	ip4;
+			struct in6_addr ip6;
+		} u;
+		__be16		proto;
+	} addr;
+};
+
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 33d29ce..354a1e7 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -125,6 +125,9 @@ enum {
 	RTM_GETNETCONF = 82,
 #define RTM_GETNETCONF RTM_GETNETCONF
 
+	RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index d0359ea..e859098 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
 bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
new file mode 100644
index 0000000..8bbd357
--- /dev/null
+++ b/net/bridge/br_mdb.c
@@ -0,0 +1,157 @@
+#include <linux/err.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#endif
+
+#include "br_private.h"
+
+static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+			       struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p;
+	struct hlist_node *n;
+	struct nlattr *nest;
+
+	if (!br->multicast_router || hlist_empty(&br->router_list))
+		return 0;
+
+	nest = nla_nest_start(skb, MDBA_ROUTER);
+	if (nest == NULL)
+		return -EMSGSIZE;
+
+	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+		if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
+			goto fail;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+fail:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+			    struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_mdb_htable *mdb;
+	struct nlattr *nest, *nest2;
+	int i, err = 0;
+	int idx = 0, s_idx = cb->args[1];
+
+	if (br->multicast_disabled)
+		return 0;
+
+	mdb = rcu_dereference(br->mdb);
+	if (!mdb)
+		return 0;
+
+	nest = nla_nest_start(skb, MDBA_MDB);
+	if (nest == NULL)
+		return -EMSGSIZE;
+
+	for (i = 0; i < mdb->max; i++) {
+		struct hlist_node *h;
+		struct net_bridge_mdb_entry *mp;
+		struct net_bridge_port_group *p, **pp;
+		struct net_bridge_port *port;
+
+		hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
+			if (idx < s_idx)
+				goto skip;
+
+			nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+			if (nest2 == NULL) {
+				err = -EMSGSIZE;
+				goto out;
+			}
+
+			for (pp = &mp->ports;
+			     (p = rcu_dereference(*pp)) != NULL;
+			      pp = &p->next) {
+				port = p->port;
+				if (port) {
+					struct br_mdb_entry e;
+					e.ifindex = port->dev->ifindex;
+					e.addr.u.ip4 = p->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+					e.addr.u.ip6 = p->addr.u.ip6;
+#endif
+					e.addr.proto = p->addr.proto;
+					if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
+						nla_nest_cancel(skb, nest2);
+						err = -EMSGSIZE;
+						goto out;
+					}
+				}
+			}
+			nla_nest_end(skb, nest2);
+		skip:
+			idx++;
+		}
+	}
+
+out:
+	cb->args[1] = idx;
+	cb->args[2] = mdb->seq;
+	nla_nest_end(skb, nest);
+	return err;
+}
+
+static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net_device *dev;
+	struct net *net = sock_net(skb->sk);
+	struct nlmsghdr *nlh;
+	int idx = 0, s_idx;
+
+	s_idx = cb->args[0];
+
+	rcu_read_lock();
+
+	for_each_netdev_rcu(net, dev) {
+		if (dev->priv_flags & IFF_EBRIDGE) {
+			struct br_port_msg *bpm;
+
+			if (idx < s_idx)
+				goto skip;
+
+			nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+					cb->nlh->nlmsg_seq, RTM_GETMDB,
+					sizeof(*bpm), NLM_F_MULTI);
+			if (nlh == NULL)
+				break;
+
+			bpm = nlmsg_data(nlh);
+			bpm->ifindex = dev->ifindex;
+			if (br_mdb_fill_info(skb, cb, dev) < 0)
+				goto out;
+			if (br_rports_fill_info(skb, cb, dev) < 0)
+				goto out;
+
+			nlmsg_end(skb, nlh);
+		skip:
+			idx++;
+		}
+	}
+
+out:
+	cb->seq = cb->args[2];
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+void br_mdb_init(void)
+{
+	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a2a7a1a..14b26b1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -322,6 +322,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
 
 	mdb->size = old ? old->size : 0;
 	mdb->ver = old ? old->ver ^ 1 : 0;
+	mdb->seq = old ? (old->seq + 1) : 0;
 
 	if (!old || elasticity)
 		get_random_bytes(&mdb->secret, sizeof(mdb->secret));
@@ -1605,6 +1606,7 @@ void br_multicast_init(struct net_bridge *br)
 		    br_multicast_querier_expired, (unsigned long)br);
 	setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
 		    (unsigned long)br);
+	br_mdb_init();
 }
 
 void br_multicast_open(struct net_bridge *br)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cd86222..ddc74bf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -105,6 +105,7 @@ struct net_bridge_mdb_htable
 	u32				max;
 	u32				secret;
 	u32				ver;
+	u32				seq;
 };
 
 struct net_bridge_port
@@ -433,6 +434,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p,
 extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+extern void br_mdb_init(void);
 
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index d309e7f..163aaa7 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -67,6 +67,7 @@ static struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_GETADDRLABEL,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_GETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETMDB,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static struct nlmsg_perm nlmsg_tcpdiag_perms[] =

^ permalink raw reply related

* [PATCH v4] iproute2: add mdb sub-command to bridge
From: Cong Wang @ 2012-12-07  3:23 UTC (permalink / raw)
  To: netdev
  Cc: Cong Wang, bridge, Herbert Xu, Jesper Dangaard Brouer,
	Thomas Graf, Stephen Hemminger, David S. Miller
In-Reply-To: <1354850623-31652-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>

V4: fix filter_dev
    remove some useless #include

V3: improve the output, display router info only for -d
    fix router parsing code

V2: sync with the kernel patch
    handle IPv6 addr
    a few cleanup

Sample output:

	# ./bridge/bridge mdb
	bridge br0:
	port eth0, group 224.8.8.9
	port eth1, group 224.8.8.8

	# ./bridge/bridge -d mdb
	bridge br0:
	port eth0, group 224.8.8.9
	port eth1, group 224.8.8.8
	router ports: eth0

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 bridge/Makefile    |    2 +-
 bridge/br_common.h |    3 +-
 bridge/bridge.c    |    1 +
 bridge/mdb.c       |  173 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/bridge/Makefile b/bridge/Makefile
index 9a6743e..67aceb4 100644
--- a/bridge/Makefile
+++ b/bridge/Makefile
@@ -1,4 +1,4 @@
-BROBJ = bridge.o fdb.o monitor.o link.o
+BROBJ = bridge.o fdb.o monitor.o link.o mdb.o
 
 include ../Config
 
diff --git a/bridge/br_common.h b/bridge/br_common.h
index 718ecb9..892fb76 100644
--- a/bridge/br_common.h
+++ b/bridge/br_common.h
@@ -5,10 +5,11 @@ extern int print_fdb(const struct sockaddr_nl *who,
 		     struct nlmsghdr *n, void *arg);
 
 extern int do_fdb(int argc, char **argv);
+extern int do_mdb(int argc, char **argv);
 extern int do_monitor(int argc, char **argv);
 
 extern int preferred_family;
 extern int show_stats;
-extern int show_detail;
+extern int show_details;
 extern int timestamp;
 extern struct rtnl_handle rth;
diff --git a/bridge/bridge.c b/bridge/bridge.c
index e2c33b0..1fcd365 100644
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@@ -43,6 +43,7 @@ static const struct cmd {
 	int (*func)(int argc, char **argv);
 } cmds[] = {
 	{ "fdb", 	do_fdb },
+	{ "mdb", 	do_mdb },
 	{ "monitor",	do_monitor },
 	{ "help",	do_help },
 	{ 0 }
diff --git a/bridge/mdb.c b/bridge/mdb.c
new file mode 100644
index 0000000..8cf5d2b
--- /dev/null
+++ b/bridge/mdb.c
@@ -0,0 +1,173 @@
+/*
+ * Get mdb table with netlink
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+#include <linux/if_ether.h>
+#include <string.h>
+#include <arpa/inet.h>
+
+#include "libnetlink.h"
+#include "br_common.h"
+#include "rt_names.h"
+#include "utils.h"
+
+#ifndef MDBA_RTA
+#define MDBA_RTA(r) \
+	((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
+#endif
+
+int filter_index;
+
+static void usage(void)
+{
+	fprintf(stderr, "       bridge mdb {show} [ dev DEV ]\n");
+	exit(-1);
+}
+
+static void br_print_router_ports(FILE *f, struct rtattr *attr)
+{
+	uint32_t *port_ifindex;
+	struct rtattr *i;
+	int rem;
+
+	rem = RTA_PAYLOAD(attr);
+	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+		port_ifindex = RTA_DATA(i);
+		fprintf(f, "%s ", ll_index_to_name(*port_ifindex));
+	}
+
+	fprintf(f, "\n");
+}
+
+static void print_mdb_entry(FILE *f, struct br_mdb_entry *e)
+{
+	SPRINT_BUF(abuf);
+
+	if (e->addr.proto == htons(ETH_P_IP))
+		fprintf(f, "port %s, group %s\n", ll_index_to_name(e->ifindex),
+			inet_ntop(AF_INET, &e->addr.u.ip4, abuf, sizeof(abuf)));
+	else
+		fprintf(f, "port %s, group %s\n", ll_index_to_name(e->ifindex),
+			inet_ntop(AF_INET6, &e->addr.u.ip6, abuf, sizeof(abuf)));
+}
+
+static void br_print_mdb_entry(FILE *f, struct rtattr *attr)
+{
+	struct rtattr *i;
+	int rem;
+	struct br_mdb_entry *e;
+
+	rem = RTA_PAYLOAD(attr);
+	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+		e = RTA_DATA(i);
+		print_mdb_entry(f, e);
+	}
+}
+
+int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+	FILE *fp = arg;
+	struct br_port_msg *r = NLMSG_DATA(n);
+	int len = n->nlmsg_len;
+	struct rtattr * tb[MDBA_MAX+1];
+
+	if (n->nlmsg_type != RTM_GETMDB) {
+		fprintf(stderr, "Not RTM_GETMDB: %08x %08x %08x\n",
+			n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+
+		return 0;
+	}
+
+	len -= NLMSG_LENGTH(sizeof(*r));
+	if (len < 0) {
+		fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+		return -1;
+	}
+
+	if (filter_index && filter_index != r->ifindex)
+		return 0;
+
+	if (r->ifindex)
+		fprintf(fp, "bridge %s:\n", ll_index_to_name(r->ifindex));
+
+	parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+
+	if (tb[MDBA_MDB]) {
+		struct rtattr *i;
+		int rem = RTA_PAYLOAD(tb[MDBA_MDB]);
+
+		for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
+			br_print_mdb_entry(fp, i);
+	}
+
+	if (tb[MDBA_ROUTER]) {
+		if (show_details) {
+			fprintf(fp, "router ports: ");
+			br_print_router_ports(fp, tb[MDBA_ROUTER]);
+		}
+	}
+
+	return 0;
+}
+
+static int mdb_show(int argc, char **argv)
+{
+	char *filter_dev = NULL;
+
+	while (argc > 0) {
+		if (strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			if (filter_dev)
+				duparg("dev", *argv);
+			filter_dev = *argv;
+		}
+		argc--; argv++;
+	}
+
+	if (filter_dev) {
+		filter_index = if_nametoindex(filter_dev);
+		if (filter_index == 0) {
+			fprintf(stderr, "Cannot find device \"%s\"\n",
+				filter_dev);
+			return -1;
+		}
+	}
+
+	if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
+		perror("Cannot send dump request");
+		exit(1);
+	}
+
+	if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		exit(1);
+	}
+
+	return 0;
+}
+
+int do_mdb(int argc, char **argv)
+{
+	ll_init_map(&rth);
+
+	if (argc > 0) {
+		if (matches(*argv, "show") == 0 ||
+		    matches(*argv, "lst") == 0 ||
+		    matches(*argv, "list") == 0)
+			return mdb_show(argc-1, argv+1);
+		if (matches(*argv, "help") == 0)
+			usage();
+	} else
+		return mdb_show(0, NULL);
+
+	fprintf(stderr, "Command \"%s\" is unknown, try \"bridge mdb help\".\n", *argv);
+	exit(-1);
+}

^ permalink raw reply related

* Re: [PATCHv5] virtio-spec: virtio network device RFS support
From: Rusty Russell @ 2012-12-07  3:14 UTC (permalink / raw)
  To: Ben Hutchings, Michael S. Tsirkin; +Cc: netdev, kvm, virtualization
In-Reply-To: <1354831403.2828.63.camel@bwh-desktop.uk.solarflarecom.com>

Ben Hutchings <bhutchings@solarflare.com> writes:
>  If you want a name for the whole set of
> features involved, I don't see any better name than 'multiqueue'/'MQ'.

OK, let's go back to multiqueue then, and perhaps refer to the current
receive steering as 'automatic'.

Cheers,
Rusty.

^ permalink raw reply

* (unknown), 
From: Allen and Violet Large @ 2012-12-07  2:47 UTC (permalink / raw)



-- 
My wife Violet and I Allen Large won $11.3 million in a lottery 6-49
in July and we have decided to donate the sum of $900,000USD to you as
part of our own charity project.  Contact us through our personal and
private email for more details (allen_andvioletlarge@yahoo.ie)

^ permalink raw reply

* Re: [PATCH net-next 2/2] net: doc: add default value for neighbour parameters
From: Shan Wei @ 2012-12-07  2:27 UTC (permalink / raw)
  To: Ben Hutchings, David Miller; +Cc: Eric Dumazet, NetDev
In-Reply-To: <1354825223.2828.17.camel@bwh-desktop.uk.solarflarecom.com>

Ben Hutchings said, at 2012/12/7 4:20:
> 'Secluded' is a fine word but your dictionary should have said that it
> is used to describe places, not events.  You're quite right that this is
> not silent, though.  So a word like 'hidden', 'quiet', 'non-obvious' or
> 'unexpected' would be more appropriate.

You are right,good suggestion :-).
Thanks for your review.

-------------------------
[PATCH net-next] net: doc : use more suitable word 'unexpected' to replace 'secluded'

 'secluded' is used to describe places, not suitable here.

Suggested-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Shan Wei <davidshan@tencent.com>
---
 Documentation/networking/ip-sysctl.txt |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 0462a71..1b830ca 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -44,7 +44,7 @@ neigh/default/unres_qlen - INTEGER
 	unresolved address by other network layers.
 	(deprecated in linux 3.3) : use unres_qlen_bytes instead.
 	Prior to linux 3.3, the default value is 3 which may cause
-	secluded packet loss. The current default value is calculated
+	unexpected packet loss. The current default value is calculated
 	according to default value of unres_qlen_bytes and true size of
 	packet.
 	Default: 31
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH] net: core: fix unused variable sparse warning
From: Cong Wang @ 2012-12-07  2:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev
In-Reply-To: <1354838913-20084-1-git-send-email-dinggnu@gmail.com>

On Fri, 07 Dec 2012 at 00:06 GMT, Cong Ding <dinggnu@gmail.com> wrote:
> the variables zero and unres_qlen_max are only used when CONFIG_SYSCTL is
> defined, otherwise it causes the following sparse warning when we turn on
> CONFIG_SYSCTL.
>

commit b93196dc5af7729ff7cc50d3d322ab1a364aa14f
Author: Cong Wang <amwang@redhat.com>
Date:   Thu Dec 6 10:04:04 2012 +0800

    net: fix some compiler warning in net/core/neighbour.c
        
    net/core/neighbour.c:65:12: warning: 'zero' defined but
not used [-Wunused-variable]
    net/core/neighbour.c:66:12: warning: 'unres_qlen_max' defined but
not used [-Wunused-variable]
    
    These variables are only used when CONFIG_SYSCTL is defined,
    so move them under #ifdef CONFIG_SYSCTL.
	        
    Reported-by: Fengguang Wu <fengguang.wu@intel.com>
    Signed-off-by: Cong Wang <amwang@redhat.com>
    Acked-by: Shan Wei <davidshan@tencent.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* [PATCH] vxlan: Add capability of Rx checksum offload for inner packet
From: Joseph Gasparakis @ 2012-12-07  2:05 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Joseph Gasparakis, netdev, linux-kernel, alexander.h.duyck,
	peter.p.waskiewicz.jr

This patch adds capability in vxlan to identify received
checksummed inner packets and signal them to the upper layers of
the stack. The driver needs to set the skb->encapsulation bit
and also set the skb->ip_summed to CHECKSUM_UNNECESSARY.

Signed-off-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
---
 drivers/net/vxlan.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 88b31f2..cf0231d 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -607,7 +607,12 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	__skb_tunnel_rx(skb, vxlan->dev);
 	skb_reset_network_header(skb);
-	skb->ip_summed = CHECKSUM_NONE;
+
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation ||
+	    !(vxlan->dev->features & NETIF_F_RXCSUM))
+		skb->ip_summed = CHECKSUM_NONE;
+
+	skb->encapsulation = 0;
 
 	err = IP_ECN_decapsulate(oip, skb);
 	if (unlikely(err)) {
@@ -1175,7 +1180,9 @@ static void vxlan_setup(struct net_device *dev)
 	dev->features	|= NETIF_F_LLTX;
 	dev->features	|= NETIF_F_NETNS_LOCAL;
 	dev->features	|= NETIF_F_SG | NETIF_F_HW_CSUM;
-	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM;
+	dev->features   |= NETIF_F_RXCSUM;
+
+	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
 
 	spin_lock_init(&vxlan->hash_lock);
-- 
1.7.6.4

^ permalink raw reply related

* Re: vlan tagged packets and libpcap breakage
From: Michael Richardson @ 2012-12-07  1:59 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: netdev, Francesco Ruggeri, tcpdump-workers
In-Reply-To: <87y5had4gi.fsf@xmission.com>


>>>>> "Eric" == Eric W Biederman <ebiederm@xmission.com> writes:
    Eric> It is a bit odd that you are indenting with spaces instead of tabs
    Eric> in a file that is indented with tab.  Again libpcap isn't my code so I
    Eric> don't care but someone else might.

tabs are hysterical raisens.

Send two patches
1) tab->spaces
2) your patch.

github preferred :-)
but, I'll process this patch as is.

-- 
]       He who is tired of Weird Al is tired of life!           |  firewalls  [
]   Michael Richardson, Sandelman Software Works, Ottawa, ON    |net architect[
] mcr@sandelman.ottawa.on.ca http://www.sandelman.ottawa.on.ca/ |device driver[
   Kyoto Plus: watch the video <http://www.youtube.com/watch?v=kzx1ycLXQSE>
	               then sign the petition. 
_______________________________________________
tcpdump-workers mailing list
tcpdump-workers@lists.tcpdump.org
https://lists.sandelman.ca/mailman/listinfo/tcpdump-workers

^ permalink raw reply

* [RFC PATCH v3 4/4] ixgbe: Adding tx encapsulation capability
From: Joseph Gasparakis @ 2012-12-07  1:56 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Joseph Gasparakis, netdev, linux-kernel, dmitry, saeed.bishara,
	Alexander Duyck
In-Reply-To: <1354845419-22483-1-git-send-email-joseph.gasparakis@intel.com>

This patch allows ixgbe to recognize encapsulated packets and do the tx
checksum offload in hardware. This patch is only for demonstration
purposes and should not be applied.

Signed-off-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   46 ++++++++++++++++++++-----
 1 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 484bbed..17ebfe0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5966,17 +5966,42 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 			if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW))
 				return;
 		}
+		vlan_macip_lens |= skb_network_offset(skb)
+				   << IXGBE_ADVTXD_MACLEN_SHIFT;
 	} else {
 		u8 l4_hdr = 0;
-		switch (first->protocol) {
-		case __constant_htons(ETH_P_IP):
-			vlan_macip_lens |= skb_network_header_len(skb);
+		union {
+			struct iphdr *ipv4;
+			struct ipv6hdr *ipv6;
+			u8 *raw;
+		} network_hdr;
+		union {
+			struct tcphdr *tcphdr;
+			u8 *raw;
+		} transport_hdr;
+
+		if (skb->encapsulation) {
+			network_hdr.raw = skb_inner_network_header(skb);
+			transport_hdr.raw = skb_inner_transport_header(skb);
+			vlan_macip_lens |= skb_inner_network_offset(skb) <<
+					   IXGBE_ADVTXD_MACLEN_SHIFT;
+		} else {
+			network_hdr.raw = skb_network_header(skb);
+			transport_hdr.raw = skb_transport_header(skb);
+			vlan_macip_lens |= skb_network_offset(skb) <<
+					   IXGBE_ADVTXD_MACLEN_SHIFT;
+		}
+
+		/* use first 4 bits to determine IP version */
+		switch (network_hdr.ipv4->version) {
+		case 4:
+			vlan_macip_lens |= transport_hdr.raw - network_hdr.raw;
 			type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
-			l4_hdr = ip_hdr(skb)->protocol;
+			l4_hdr = network_hdr.ipv4->protocol;
 			break;
-		case __constant_htons(ETH_P_IPV6):
-			vlan_macip_lens |= skb_network_header_len(skb);
-			l4_hdr = ipv6_hdr(skb)->nexthdr;
+		case 6:
+			vlan_macip_lens |= transport_hdr.raw - network_hdr.raw;
+			l4_hdr = network_hdr.ipv6->nexthdr;
 			break;
 		default:
 			if (unlikely(net_ratelimit())) {
@@ -5990,7 +6015,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 		switch (l4_hdr) {
 		case IPPROTO_TCP:
 			type_tucmd |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
-			mss_l4len_idx = tcp_hdrlen(skb) <<
+			mss_l4len_idx = (transport_hdr.tcphdr->doff * 4) <<
 					IXGBE_ADVTXD_L4LEN_SHIFT;
 			break;
 		case IPPROTO_SCTP:
@@ -6016,7 +6041,6 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 	}
 
 	/* vlan_macip_lens: MACLEN, VLAN tag */
-	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
 	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0,
@@ -7377,6 +7401,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	netdev->hw_features = netdev->features;
 
+	netdev->hw_enc_features = NETIF_F_IP_CSUM |
+				  NETIF_F_IPV6_CSUM |
+				  NETIF_F_SG;
+
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
-- 
1.7.6.4

^ permalink raw reply related

* [PATCH v3 3/4] vxlan: capture inner headers during encapsulation
From: Joseph Gasparakis @ 2012-12-07  1:56 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Joseph Gasparakis, netdev, linux-kernel, dmitry, saeed.bishara,
	Peter P Waskiewicz Jr, Alexander Duyck
In-Reply-To: <1354845419-22483-1-git-send-email-joseph.gasparakis@intel.com>

Allow VXLAN to make use of Tx checksum offloading and Tx scatter-gather.
The advantage to these two changes is that it also allows the VXLAN to
make use of GSO.

Signed-off-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/vxlan.c |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ce77b8b..88b31f2 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -876,6 +876,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 	}
 
+	if (!skb->encapsulation) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
 	/* Need space for new headers (invalidates iph ptr) */
 	if (skb_cow_head(skb, VXLAN_HEADROOM))
 		goto drop;
@@ -947,7 +952,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	vxlan_set_owner(dev, skb);
 
 	/* See iptunnel_xmit() */
-	skb->ip_summed = CHECKSUM_NONE;
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		skb->ip_summed = CHECKSUM_NONE;
 	ip_select_ident(iph, &rt->dst, NULL);
 
 	err = ip_local_out(skb);
@@ -1168,6 +1174,8 @@ static void vxlan_setup(struct net_device *dev)
 	dev->tx_queue_len = 0;
 	dev->features	|= NETIF_F_LLTX;
 	dev->features	|= NETIF_F_NETNS_LOCAL;
+	dev->features	|= NETIF_F_SG | NETIF_F_HW_CSUM;
+	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
 
 	spin_lock_init(&vxlan->hash_lock);
-- 
1.7.6.4

^ permalink raw reply related

* [PATCH v3 2/4] net: Handle encapsulated offloads before fragmentation or handing to lower dev
From: Joseph Gasparakis @ 2012-12-07  1:56 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Alexander Duyck, netdev, linux-kernel, dmitry, saeed.bishara
In-Reply-To: <1354845419-22483-1-git-send-email-joseph.gasparakis@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change allows the VXLAN to enable Tx checksum offloading even on
devices that do not support encapsulated checksum offloads. The
advantage to this is that it allows for the lower device to change due
to routing table changes without impacting features on the VXLAN itself.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 net/core/dev.c       |   15 +++++++++++++--
 net/ipv4/ip_output.c |    4 ++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 2a5f5586..7a2646a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2324,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb->vlan_tci = 0;
 		}
 
+		/* If encapsulation offload request, verify we are testing
+		 * hardware encapsulation features instead of standard
+		 * features for the netdev
+		 */
+		if (skb->encapsulation)
+			features &= dev->hw_enc_features;
+
 		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb, features)))
 				goto out_kfree_skb;
@@ -2339,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			 * checksumming here.
 			 */
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
-				skb_set_transport_header(skb,
-					skb_checksum_start_offset(skb));
+				if (skb->encapsulation)
+					skb_set_inner_transport_header(skb,
+						skb_checksum_start_offset(skb));
+				else
+					skb_set_transport_header(skb,
+						skb_checksum_start_offset(skb));
 				if (!(features & NETIF_F_ALL_CSUM) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6537a40..3e98ed2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -595,6 +595,10 @@ slow_path_clean:
 	}
 
 slow_path:
+	/* for offloaded checksums cleanup checksum before fragmentation */
+	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
+		goto fail;
+
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
 
-- 
1.7.6.4

^ permalink raw reply related

* [PATCH v3 1/4] net: Add support for hardware-offloaded encapsulation
From: Joseph Gasparakis @ 2012-12-07  1:56 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Joseph Gasparakis, netdev, linux-kernel, dmitry, saeed.bishara,
	Peter P Waskiewicz Jr, Alexander Duyck
In-Reply-To: <1354845419-22483-1-git-send-email-joseph.gasparakis@intel.com>

This patch adds support in the kernel for offloading in the NIC Tx and Rx
checksumming for encapsulated packets (such as VXLAN and IP GRE).

Signed-off-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 include/linux/ip.h        |    5 ++
 include/linux/ipv6.h      |    5 ++
 include/linux/netdevice.h |    2 +
 include/linux/skbuff.h    |   90 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/tcp.h       |   10 +++++
 include/linux/udp.h       |    5 ++
 net/core/skbuff.c         |    9 ++++
 7 files changed, 125 insertions(+), 1 deletions(-)

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 58b82a2..492bc65 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -25,6 +25,11 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 	return (struct iphdr *)skb_network_header(skb);
 }
 
+static inline struct iphdr *inner_ip_hdr(const struct sk_buff *skb)
+{
+	return (struct iphdr *)skb_inner_network_header(skb);
+}
+
 static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
 	return (struct iphdr *)skb_transport_header(skb);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5e11905..eded08c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -67,6 +67,11 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_network_header(skb);
 }
 
+static inline struct ipv6hdr *inner_ipv6_hdr(const struct sk_buff *skb)
+{
+	return (struct ipv6hdr *)skb_inner_network_header(skb);
+}
+
 static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 {
 	return (struct ipv6hdr *)skb_transport_header(skb);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e9929ab..58530bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1063,6 +1063,8 @@ struct net_device {
 	netdev_features_t	wanted_features;
 	/* mask of features inheritable by VLAN devices */
 	netdev_features_t	vlan_features;
+	/* mask of features inherited by encapsulating devices */
+	netdev_features_t	hw_enc_features;
 
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2af494..d6933a0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -376,6 +376,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@mark: Generic packet mark
  *	@dropcount: total number of sk_receive_queue overflows
  *	@vlan_tci: vlan tag control information
+ *	@inner_transport_header: Inner transport layer header (encapsulation)
+ *	@inner_network_header: Network layer header (encapsulation)
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
@@ -471,7 +473,8 @@ struct sk_buff {
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
-	/* 8/10 bit hole (depending on ndisc_nodetype presence) */
+	__u8			encapsulation:1;
+	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #ifdef CONFIG_NET_DMA
@@ -486,6 +489,8 @@ struct sk_buff {
 		__u32		avail_size;
 	};
 
+	sk_buff_data_t		inner_transport_header;
+	sk_buff_data_t		inner_network_header;
 	sk_buff_data_t		transport_header;
 	sk_buff_data_t		network_header;
 	sk_buff_data_t		mac_header;
@@ -1435,12 +1440,53 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_inner_headers(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->network_header;
+	skb->inner_transport_header = skb->transport_header;
+}
+
 static inline void skb_reset_mac_len(struct sk_buff *skb)
 {
 	skb->mac_len = skb->network_header - skb->mac_header;
 }
 
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
+static inline unsigned char *skb_inner_transport_header(const struct sk_buff
+							*skb)
+{
+	return skb->head + skb->inner_transport_header;
+}
+
+static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
+{
+	skb->inner_transport_header = skb->data - skb->head;
+}
+
+static inline void skb_set_inner_transport_header(struct sk_buff *skb,
+						   const int offset)
+{
+	skb_reset_inner_transport_header(skb);
+	skb->inner_transport_header += offset;
+}
+
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->head + skb->inner_network_header;
+}
+
+static inline void skb_reset_inner_network_header(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->data - skb->head;
+}
+
+static inline void skb_set_inner_network_header(struct sk_buff *skb,
+						const int offset)
+{
+	skb_reset_inner_network_header(skb);
+	skb->inner_network_header += offset;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->transport_header;
@@ -1496,6 +1542,38 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 }
 
 #else /* NET_SKBUFF_DATA_USES_OFFSET */
+static inline unsigned char *skb_inner_transport_header(const struct sk_buff
+							*skb)
+{
+	return skb->inner_transport_header;
+}
+
+static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
+{
+	skb->inner_transport_header = skb->data;
+}
+
+static inline void skb_set_inner_transport_header(struct sk_buff *skb,
+						   const int offset)
+{
+	skb->inner_transport_header = skb->data + offset;
+}
+
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->inner_network_header;
+}
+
+static inline void skb_reset_inner_network_header(struct sk_buff *skb)
+{
+	skb->inner_network_header = skb->data;
+}
+
+static inline void skb_set_inner_network_header(struct sk_buff *skb,
+						const int offset)
+{
+	skb->inner_network_header = skb->data + offset;
+}
 
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
@@ -1574,11 +1652,21 @@ static inline u32 skb_network_header_len(const struct sk_buff *skb)
 	return skb->transport_header - skb->network_header;
 }
 
+static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
+{
+	return skb->inner_transport_header - skb->inner_network_header;
+}
+
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
 	return skb_network_header(skb) - skb->data;
 }
 
+static inline int skb_inner_network_offset(const struct sk_buff *skb)
+{
+	return skb_inner_network_header(skb) - skb->data;
+}
+
 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 60b7aac..4e1d228 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -35,6 +35,16 @@ static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
 	return tcp_hdr(skb)->doff * 4;
 }
 
+static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb)
+{
+	return (struct tcphdr *)skb_inner_transport_header(skb);
+}
+
+static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb)
+{
+	return inner_tcp_hdr(skb)->doff * 4;
+}
+
 static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 {
 	return (tcp_hdr(skb)->doff - 5) * 4;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0b67d77..9d81de1 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -27,6 +27,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 	return (struct udphdr *)skb_transport_header(skb);
 }
 
+static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
+{
+	return (struct udphdr *)skb_inner_transport_header(skb);
+}
+
 #define UDP_HTABLE_SIZE_MIN		(CONFIG_BASE_SMALL ? 128 : 256)
 
 static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 880722e2..ccbabf5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -682,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
+	new->inner_transport_header = old->inner_transport_header;
+	new->inner_network_header = old->inner_transport_header;
 	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 	new->ooo_okay		= old->ooo_okay;
 	new->l4_rxhash		= old->l4_rxhash;
 	new->no_fcs		= old->no_fcs;
+	new->encapsulation	= old->encapsulation;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -892,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header   += offset;
 	if (skb_mac_header_was_set(new))
 		new->mac_header	      += offset;
+	new->inner_transport_header += offset;
+	new->inner_network_header   += offset;
 #endif
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1089,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		skb->mac_header += off;
+	skb->inner_transport_header += off;
+	skb->inner_network_header += off;
 	/* Only adjust this if it actually is csum_start rather than csum */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		skb->csum_start += nhead;
@@ -1188,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	n->network_header   += off;
 	if (skb_mac_header_was_set(skb))
 		n->mac_header += off;
+	n->inner_transport_header += off;
+	n->inner_network_header	   += off;
 #endif
 
 	return n;
-- 
1.7.6.4

^ permalink raw reply related

* [PATCH v3 net-next 0/4] tunneling: Add support for hardware-offloaded encapsulation
From: Joseph Gasparakis @ 2012-12-07  1:56 UTC (permalink / raw)
  To: davem, shemminger, chrisw, gospo
  Cc: Joseph Gasparakis, netdev, linux-kernel, dmitry, saeed.bishara

The series contains updates to add in the NIC Rx and Tx checksumming support
for encapsulated packets.

The sk_buff needs to somehow have information of the inner packet, and adding
three fields for the inner mac, network and transport headers was the prefered
approach. 

Not adding these fields would mean that the drivers would need to parse the
sk_buff data in hot-path, having a negative impact in the performance.

Adding in sk_buff a pointer to the skbuff of the inner packet made sense, but
would be a complicated change as assumptions needed to be made with regards to
helper functions such as skb_clone() skb_copy(). Also code for the existing
encapsulation protocols (such as VXLAN and IP GRE) had to be reworked, so the
decision was to have the simple approach of adding these three fields.

v2 Makes sure that checksumming for IP GRE does not take place if the offload
   flag is set in the skb's netdev features

v3 Fixes issues picked up by the community in v2 and is intended to provide
   ability to demo vxlan Tx offloading with Intel's ixgbe. As part of this, 
   it provides an RFC patch for ixgbe to take advantage of the offloading
   mechanism

   Now it is possible to create a vxlan interface like this:
    #ip link add vxlan0 type vxlan id 40 ttl 10 group 239.1.1.1 dev eth0

   Then turn on/off the encapsulation offload mechanism by doing:
    #ethtool -K eth0 tx-checksum-ip-generic on

   In v3 ipgre work got paused (and therefore patches not included) and I will
   come back to it when vxlan is accepted by the community.

^ permalink raw reply

* Re: [tcpdump-workers] vlan tagged packets and libpcap breakage
From: Eric W. Biederman @ 2012-12-07  1:41 UTC (permalink / raw)
  To: ani; +Cc: Michael Richardson, tcpdump-workers, netdev, Francesco Ruggeri
In-Reply-To: <alpine.OSX.2.00.1212061730200.58531@animac.local>

Ani Sinha <ani@aristanetworks.com> writes:

>>
>> The patch is whitespace damaged.  And one of your test is using ||
>> instead of &&
>
> OK, using alpine now :-)
>>
>> The test should be && not ||.
>
> aargh! I am retarded! Fixed. Hopefully 3rd time is a charm :-)

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

The code looks ok here.  I don't know what format the tcpdump folks
want patches in.  The typically format is an email with [PATCH] in
the subject.

You indentation now comes through clear.

It is a bit odd that you are indenting with spaces instead of tabs
in a file that is indented with tab.  Again libpcap isn't my code so I
don't care but someone else might.

Eric


> ani
>
>
>  pcap-linux.c |   50 +++++++++++++++++++++++++++++++-------------------
>  1 files changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/pcap-linux.c b/pcap-linux.c
> index a42c3ac..b2c1a08 100644
> --- a/pcap-linux.c
> +++ b/pcap-linux.c
> @@ -133,6 +133,7 @@ static const char rcsid[] _U_ =
>  #include <sys/utsname.h>
>  #include <sys/mman.h>
>  #include <linux/if.h>
> +#include <linux/if_packet.h>
>  #include <netinet/in.h>
>  #include <linux/if_ether.h>
>  #include <net/if_arp.h>
> @@ -1543,7 +1544,13 @@ pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
>  				continue;
>
>  			aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
> -			if (aux->tp_vlan_tci == 0)
> +#if defined(TP_STATUS_VLAN_VALID)
> +                        if ((aux->tp_vlan_tci == 0) && !(aux->tp_status & TP_STATUS_VLAN_VALID))
> +#else
> +			if (aux->tp_vlan_tci == 0) /* this is ambigious but without the
> +                                                      TP_STATUS_VLAN_VALID flag, there is
> +                                                      nothing that we can do */
> +#endif
>  				continue;
>
>  			len = packet_len > iov.iov_len ? iov.iov_len : packet_len;
> @@ -3936,7 +3926,12 @@ pcap_read_linux_mmap(pcap_t *handle, int max_packets, pcap_handler callback,
>  		}
>
>  #ifdef HAVE_TPACKET2
> -		if (handle->md.tp_version == TPACKET_V2 && h.h2->tp_vlan_tci &&
> +                if ((handle->md.tp_version == TPACKET_V2) &&
> +#if defined(TP_STATUS_VLAN_VALID)
> +                    (h.h2->tp_vlan_tci || (h.h2->tp_status & TP_STATUS_VLAN_VALID)) &&
> +#else
> +                    h.h2->tp_vlan_tci &&
> +#endif
>  		    handle->md.vlan_offset != -1 &&
>  		    tp_snaplen >= (unsigned int) handle->md.vlan_offset) {
>  			struct vlan_tag *tag;

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox