Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next  11/14] i40e: reorder block declarations in debugfs
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem
  Cc: Shannon Nelson, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

This is a cleanup of the arguments declared at the beginning
of each function.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 387bf94..246b177 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1147,9 +1147,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		i40e_veb_release(pf->veb[i]);
 
 	} else if (strncmp(cmd_buf, "add macaddr", 11) == 0) {
-		u8 ma[6];
-		int vlan = 0;
 		struct i40e_mac_filter *f;
+		int vlan = 0;
+		u8 ma[6];
 		int ret;
 
 		cnt = sscanf(&cmd_buf[11],
@@ -1185,8 +1185,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 				 ma, vlan, vsi_seid, f, ret);
 
 	} else if (strncmp(cmd_buf, "del macaddr", 11) == 0) {
-		u8 ma[6];
 		int vlan = 0;
+		u8 ma[6];
 		int ret;
 
 		cnt = sscanf(&cmd_buf[11],
@@ -1222,9 +1222,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 				 ma, vlan, vsi_seid, ret);
 
 	} else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
-		int v;
-		u16 vid;
 		i40e_status ret;
+		u16 vid;
+		int v;
 
 		cnt = sscanf(&cmd_buf[8], "%i %u", &vsi_seid, &v);
 		if (cnt != 2) {
@@ -1535,10 +1535,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 	} else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
 		   (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
 		struct i40e_fdir_data fd_data;
-		int ret;
 		u16 packet_len, i, j = 0;
 		char *asc_packet;
 		bool add = false;
+		int ret;
 
 		asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
 				     GFP_KERNEL);
@@ -1626,9 +1626,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 			}
 		} else if (strncmp(&cmd_buf[5],
 			   "get local", 9) == 0) {
+			u16 llen, rlen;
 			int ret, i;
 			u8 *buff;
-			u16 llen, rlen;
 			buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
 			if (!buff)
 				goto command_write_done;
@@ -1659,9 +1659,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 			kfree(buff);
 			buff = NULL;
 		} else if (strncmp(&cmd_buf[5], "get remote", 10) == 0) {
+			u16 llen, rlen;
 			int ret, i;
 			u8 *buff;
-			u16 llen, rlen;
 			buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
 			if (!buff)
 				goto command_write_done;
-- 
1.8.3.1

^ permalink raw reply related

* [net-next  10/14] i40e: tweaking icr0 handling for legacy irq
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem
  Cc: Shannon Nelson, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

Fix the overactive irq issue seen in testing and allow use of
the legacy interrupt.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      |  1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++-------
 drivers/net/ethernet/intel/i40e/i40e_txrx.c |  3 ++-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index c06a76c..49572dc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -545,6 +545,7 @@ static inline void i40e_dbg_init(void) {}
 static inline void i40e_dbg_exit(void) {}
 #endif /* CONFIG_DEBUG_FS*/
 void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector);
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf);
 int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 270190a..727d14d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2532,7 +2532,7 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
  * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
  * @pf: board private structure
  **/
-static void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = &pf->hw;
 	u32 val;
@@ -2742,14 +2742,14 @@ static irqreturn_t i40e_intr(int irq, void *data)
 
 	icr0 = rd32(hw, I40E_PFINT_ICR0);
 
-	/* if sharing a legacy IRQ, we might get called w/o an intr pending */
-	if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
-		return IRQ_NONE;
-
 	val = rd32(hw, I40E_PFINT_DYN_CTL0);
 	val = val | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK;
 	wr32(hw, I40E_PFINT_DYN_CTL0, val);
 
+	/* if sharing a legacy IRQ, we might get called w/o an intr pending */
+	if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
+		return IRQ_NONE;
+
 	ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA);
 
 	/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
@@ -2763,7 +2763,6 @@ static irqreturn_t i40e_intr(int irq, void *data)
 		qval = rd32(hw, I40E_QINT_TQCTL(0));
 		qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
 		wr32(hw, I40E_QINT_TQCTL(0), qval);
-		i40e_flush(hw);
 
 		if (!test_bit(__I40E_DOWN, &pf->state))
 			napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0]->napi);
@@ -2825,7 +2824,6 @@ static irqreturn_t i40e_intr(int irq, void *data)
 
 	/* re-enable interrupt causes */
 	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
-	i40e_flush(hw);
 	if (!test_bit(__I40E_DOWN, &pf->state)) {
 		i40e_service_event_schedule(pf);
 		i40e_irq_dynamic_enable_icr0(pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 41be7a7..f1f03bc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1142,7 +1142,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 			qval = rd32(hw, I40E_QINT_TQCTL(0));
 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
 			wr32(hw, I40E_QINT_TQCTL(0), qval);
-			i40e_flush(hw);
+
+			i40e_irq_dynamic_enable_icr0(vsi->back);
 		}
 	}
 
-- 
1.8.3.1

^ permalink raw reply related

* [net-next  12/14] i40e: check vsi ptrs before dumping them
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem
  Cc: Shannon Nelson, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

Make sure there really are rings and queues before trying to dump
information in them.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 39 +++++++++++++++-----------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 246b177..ef4cb1c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -234,26 +234,33 @@ static ssize_t i40e_dbg_dump_write(struct file *filp,
 			memcpy(p, vsi, len);
 			p += len;
 
-			len = (sizeof(struct i40e_q_vector)
-				* vsi->num_q_vectors);
-			memcpy(p, vsi->q_vectors, len);
-			p += len;
-
-			len = (sizeof(struct i40e_ring) * vsi->num_queue_pairs);
-			memcpy(p, vsi->tx_rings, len);
-			p += len;
-			memcpy(p, vsi->rx_rings, len);
-			p += len;
+			if (vsi->num_q_vectors) {
+				len = (sizeof(struct i40e_q_vector)
+					* vsi->num_q_vectors);
+				memcpy(p, vsi->q_vectors, len);
+				p += len;
+			}
 
-			for (i = 0; i < vsi->num_queue_pairs; i++) {
-				len = sizeof(struct i40e_tx_buffer);
-				memcpy(p, vsi->tx_rings[i]->tx_bi, len);
+			if (vsi->num_queue_pairs) {
+				len = (sizeof(struct i40e_ring) *
+				      vsi->num_queue_pairs);
+				memcpy(p, vsi->tx_rings, len);
+				p += len;
+				memcpy(p, vsi->rx_rings, len);
 				p += len;
 			}
-			for (i = 0; i < vsi->num_queue_pairs; i++) {
+
+			if (vsi->tx_rings[0]) {
+				len = sizeof(struct i40e_tx_buffer);
+				for (i = 0; i < vsi->num_queue_pairs; i++) {
+					memcpy(p, vsi->tx_rings[i]->tx_bi, len);
+					p += len;
+				}
 				len = sizeof(struct i40e_rx_buffer);
-				memcpy(p, vsi->rx_rings[i]->rx_bi, len);
-				p += len;
+				for (i = 0; i < vsi->num_queue_pairs; i++) {
+					memcpy(p, vsi->rx_rings[i]->rx_bi, len);
+					p += len;
+				}
 			}
 
 			/* macvlan filter list */
-- 
1.8.3.1

^ permalink raw reply related

* [net-next  13/14] i40e: use pf_id for pf function id in qtx_ctl
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem
  Cc: Shannon Nelson, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@intel.com>

Simplify code by using an already existing variable.

Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        | 4 ++--
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 727d14d..d889342 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2174,8 +2174,8 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 
 	/* Now associate this queue with this PCI function */
 	qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
-	qtx_ctl |= ((hw->hmc.hmc_fn_id << I40E_QTX_CTL_PF_INDX_SHIFT)
-						& I40E_QTX_CTL_PF_INDX_MASK);
+	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
+		    I40E_QTX_CTL_PF_INDX_MASK);
 	wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
 	i40e_flush(hw);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 202139f..4ee6d6a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -383,7 +383,7 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
 
 	/* associate this queue with the PCI VF function */
 	qtx_ctl = I40E_QTX_CTL_VF_QUEUE;
-	qtx_ctl |= ((hw->hmc.hmc_fn_id << I40E_QTX_CTL_PF_INDX_SHIFT)
+	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT)
 		    & I40E_QTX_CTL_PF_INDX_MASK);
 	qtx_ctl |= (((vf->vf_id + hw->func_caps.vf_base_id)
 		     << I40E_QTX_CTL_VFVM_INDX_SHIFT)
-- 
1.8.3.1

^ permalink raw reply related

* [net-next  09/14] i40e: refactor fdir setup function
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem; +Cc: Jesse Brandeburg, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jesse Brandeburg <jesse.brandeburg@intel.com>

This function did a lot of unnecessary cpu_to_xxx(foo) and making it
worse, each of these calls caused a lot of line wrapping.

Fix look and feel via a refactor of this function.  No functional
changes.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 78 ++++++++++++-----------------
 1 file changed, 33 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 8fc313c..41be7a7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -37,6 +37,7 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
 }
 
+#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 /**
  * i40e_program_fdir_filter - Program a Flow Director filter
  * @fdir_input: Packet data that will be filter parameters
@@ -50,6 +51,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	struct i40e_tx_buffer *tx_buf;
 	struct i40e_tx_desc *tx_desc;
 	struct i40e_ring *tx_ring;
+	unsigned int fpt, dcc;
 	struct i40e_vsi *vsi;
 	struct device *dev;
 	dma_addr_t dma;
@@ -68,7 +70,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	dev = tx_ring->dev;
 
 	dma = dma_map_single(dev, fdir_data->raw_packet,
-				I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
+			     I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, dma))
 		goto dma_fail;
 
@@ -77,74 +79,61 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
 	tx_buf = &tx_ring->tx_bi[i];
 
-	i++;
-	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+	tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
 
-	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index
-					     << I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
-					     & I40E_TXD_FLTR_QW0_QINDEX_MASK);
+	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
+	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
 
-	fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->flex_off
-					    << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
-					    & I40E_TXD_FLTR_QW0_FLEXOFF_MASK);
+	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
+	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
 
-	fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->pctype
-					     << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
-					     & I40E_TXD_FLTR_QW0_PCTYPE_MASK);
+	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
+	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
 
 	/* Use LAN VSI Id if not programmed by user */
 	if (fdir_data->dest_vsi == 0)
-		fdir_desc->qindex_flex_ptype_vsi |=
-					  cpu_to_le32((pf->vsi[pf->lan_vsi]->id)
-					   << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
+		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
+		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
 	else
-		fdir_desc->qindex_flex_ptype_vsi |=
-			cpu_to_le32((((u32)fdir_data->dest_vsi) <<
-					  I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
-				    I40E_TXD_FLTR_QW0_DEST_VSI_MASK);
+		fpt |= ((u32)fdir_data->dest_vsi <<
+			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
+		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
+
+	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
 
-	fdir_desc->dtype_cmd_cntindex =
-				    cpu_to_le32(I40E_TX_DESC_DTYPE_FILTER_PROG);
+	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
 
 	if (add)
-		fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
-				       I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
-					<< I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
 	else
-		fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
-					   I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
-					   << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
 
-	fdir_desc->dtype_cmd_cntindex |= cpu_to_le32((fdir_data->dest_ctl
-					  << I40E_TXD_FLTR_QW1_DEST_SHIFT)
-					  & I40E_TXD_FLTR_QW1_DEST_MASK);
+	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
+	       I40E_TXD_FLTR_QW1_DEST_MASK;
 
-	fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
-		     (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
-		      & I40E_TXD_FLTR_QW1_FD_STATUS_MASK);
+	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
+	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
 
 	if (fdir_data->cnt_index != 0) {
-		fdir_desc->dtype_cmd_cntindex |=
-				    cpu_to_le32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK);
-		fdir_desc->dtype_cmd_cntindex |=
-			cpu_to_le32((((u32)fdir_data->cnt_index) <<
-					   I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
-				    I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
+		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+		dcc |= ((u32)fdir_data->cnt_index <<
+			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+		       I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 	}
 
+	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
 
 	/* Now program a dummy descriptor */
 	i = tx_ring->next_to_use;
 	tx_desc = I40E_TX_DESC(tx_ring, i);
 
-	i++;
-	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+	tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
 
 	tx_desc->buffer_addr = cpu_to_le64(dma);
-	td_cmd = I40E_TX_DESC_CMD_EOP |
-		 I40E_TX_DESC_CMD_RS  |
-		 I40E_TX_DESC_CMD_DUMMY;
+	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
 
 	tx_desc->cmd_type_offset_bsz =
 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
@@ -1254,7 +1243,6 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
 }
 
-#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 /**
  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
  * @skb:     send buffer
-- 
1.8.3.1

^ permalink raw reply related

* [net-next  14/14] i40e: Bump version
From: Jeff Kirsher @ 2013-10-18 13:23 UTC (permalink / raw)
  To: a, davem
  Cc: Catherine Sullivan, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1382102598-11343-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Catherine Sullivan <catherine.sullivan@intel.com>

Update the driver version.

Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d889342..41a79df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -36,7 +36,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 10
+#define DRV_VERSION_BUILD 11
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH nf-next] netfilter: xtables: lightweight process control group matching
From: Daniel Borkmann @ 2013-10-18 13:28 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev, Tejun Heo, cgroups
In-Reply-To: <cover.1382101225.git.dborkman@redhat.com>

It would be useful e.g. in a server or desktop environment to have
a facility in the notion of fine-grained "per application" or "per
application group" firewall policies. Probably, users in the mobile/
embedded area (e.g. Android based) with different security policy
requirements for application groups could have great benefit from
that as well. For example, with a little bit of configuration effort,
an admin could whitelist well-known applications, and thus block
otherwise unwanted "hard-to-track" applications like [1] from a
user's machine.

Implementation of PID-based matching would not be appropriate
as they frequently change, and child tracking would make that
even more complex and ugly. Cgroups would be a perfect candidate
for accomplishing that as they associate a set of tasks with a
set of parameters for one or more subsystems, in our case the
netfilter subsystem, which, of course, can be combined with other
cgroup subsystems into something more complex.

As mentioned, to overcome this constraint, such processes could
be placed into one or multiple cgroups where different fine-grained
rules can be defined depending on the application scenario, while
e.g. everything else that is not part of that could be dropped (or
vice versa), thus making life harder for unwanted processes to
communicate to the outside world. So, we make use of cgroups here
to track jobs and limit their resources in terms of iptables
policies; in other words, limiting what they are allowed to
communicate.

Minimal, basic usage example (many other iptables options can be
applied obviously):

 1) Configuring cgroups:

  mkdir /sys/fs/cgroup/net_filter
  mount -t cgroup -o net_filter net_filter /sys/fs/cgroup/net_filter
  mkdir /sys/fs/cgroup/net_filter/0
  echo 1 > /sys/fs/cgroup/net_filter/0/net_filter.fwid

 2) Configuring netfilter:

  iptables -A OUTPUT -m cgroup ! --cgroup 1 -j DROP

 3) Running applications:

  ping 208.67.222.222  <pid:1799>
  echo 1799 > /sys/fs/cgroup/net_filter/0/tasks
  64 bytes from 208.67.222.222: icmp_seq=44 ttl=49 time=11.9 ms
  ...

  ping 208.67.220.220  <pid:1804>
  ping: sendmsg: Operation not permitted
  ...
  echo 1804 > /sys/fs/cgroup/net_filter/0/tasks
  64 bytes from 208.67.220.220: icmp_seq=89 ttl=56 time=19.0 ms
  ...

Of course, real-world deployments would make use of cgroups user
space toolsuite, or own custom policy daemons dynamically moving
applications from/to various net_filter cgroups.

Design considerations appendix:

Based on the discussion from [2], [3], it seems the best tradeoff
imho to make this a subsystem, here's why:

netfilter is a large enough and ubiquitous subsystem, meaning it
is not somewhere in a niche, and enabled/shipped on most machines.
It is true that the descision making on fwid is "outsourced" to
netfilter itself, but that does not necessarily need to be
considered as a bad thing to delegate and reuse as much as possible.
The matching performance in the critical path is just a simple
comparison of fwid tags, nothing more, thus resulting in a good
performance suited for high-speed networking. Moreover, by simply
transfering fwids between user- and kernel space, we can have the
ruleset as packed as possible, giving an optimal footprint for
large rulesets using this feature. The alternative draft that we
have proposed in [3] comes at the cost of exposing some of the
cgroups internals outside of cgroups to make it work, at least a
higher memory footprint for transferal of rules and even worse a
lower performance as more work needs to be done in the matching
critical path, that is traversing all cgroups a task belongs to
to find the one of our interest. Moreover, from the usability
point of view, it seems less intuitive, rather more confusing
than the approach presented here. Therefore, I consider this design
the better and less intrusive tradeoff to go with.

  [1] http://www.blackhat.com/presentations/bh-europe-06/bh-eu-06-biondi/bh-eu-06-biondi-up.pdf
  [2] http://patchwork.ozlabs.org/patch/280687/
  [3] http://patchwork.ozlabs.org/patch/282477/

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: cgroups@vger.kernel.org
---
 v1->v2:
  - Updated commit message, rebased
  - Applied Gao Feng's feedback from [2]

 Note: iptables part is still available in http://patchwork.ozlabs.org/patch/280690/

 Documentation/cgroups/00-INDEX           |   2 +
 Documentation/cgroups/net_filter.txt     |  27 +++++
 include/linux/cgroup_subsys.h            |   5 +
 include/net/netfilter/xt_cgroup.h        |  58 ++++++++++
 include/net/sock.h                       |   3 +
 include/uapi/linux/netfilter/Kbuild      |   1 +
 include/uapi/linux/netfilter/xt_cgroup.h |  11 ++
 net/core/scm.c                           |   2 +
 net/core/sock.c                          |  14 +++
 net/netfilter/Kconfig                    |   8 ++
 net/netfilter/Makefile                   |   1 +
 net/netfilter/xt_cgroup.c                | 177 +++++++++++++++++++++++++++++++
 12 files changed, 309 insertions(+)
 create mode 100644 Documentation/cgroups/net_filter.txt
 create mode 100644 include/net/netfilter/xt_cgroup.h
 create mode 100644 include/uapi/linux/netfilter/xt_cgroup.h
 create mode 100644 net/netfilter/xt_cgroup.c

diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX
index bc461b6..14424d2 100644
--- a/Documentation/cgroups/00-INDEX
+++ b/Documentation/cgroups/00-INDEX
@@ -20,6 +20,8 @@ memory.txt
 	- Memory Resource Controller; design, accounting, interface, testing.
 net_cls.txt
 	- Network classifier cgroups details and usages.
+net_filter.txt
+	- Network firewalling (netfilter) cgroups details and usages.
 net_prio.txt
 	- Network priority cgroups details and usages.
 resource_counter.txt
diff --git a/Documentation/cgroups/net_filter.txt b/Documentation/cgroups/net_filter.txt
new file mode 100644
index 0000000..22759e4
--- /dev/null
+++ b/Documentation/cgroups/net_filter.txt
@@ -0,0 +1,27 @@
+Netfilter cgroup
+----------------
+
+The netfilter cgroup provides an interface to aggregate jobs
+to a particular netfilter tag, that can be used to apply
+various iptables/netfilter policies for those jobs in order
+to limit resources/abilities for network communication.
+
+Creating a net_filter cgroups instance creates a net_filter.fwid
+file. The value of net_filter.fwid is initialized to 0 on
+default (so only global iptables/netfilter policies apply).
+You can write a unique decimal fwid tag into net_filter.fwid
+file, and use that tag along with iptables' --cgroup option.
+
+Minimal/basic usage example:
+
+1) Configuring cgroup:
+
+ mkdir /sys/fs/cgroup/net_filter
+ mount -t cgroup -o net_filter net_filter /sys/fs/cgroup/net_filter
+ mkdir /sys/fs/cgroup/net_filter/0
+ echo 1 > /sys/fs/cgroup/net_filter/0/net_filter.fwid
+ echo [pid] > /sys/fs/cgroup/net_filter/0/tasks
+
+2) Configuring netfilter:
+
+ iptables -A OUTPUT -m cgroup ! --cgroup 1 -p tcp --dport 80 -j DROP
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index b613ffd..ef58217 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -50,6 +50,11 @@ SUBSYS(net_prio)
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
+
+#if IS_SUBSYS_ENABLED(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+SUBSYS(net_filter)
+#endif
+
 /*
  * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
diff --git a/include/net/netfilter/xt_cgroup.h b/include/net/netfilter/xt_cgroup.h
new file mode 100644
index 0000000..b2c702f
--- /dev/null
+++ b/include/net/netfilter/xt_cgroup.h
@@ -0,0 +1,58 @@
+#ifndef _XT_CGROUP_H
+#define _XT_CGROUP_H
+
+#include <linux/types.h>
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+struct cgroup_nf_state {
+	struct cgroup_subsys_state css;
+	u32 fwid;
+};
+
+void sock_update_fwid(struct sock *sk);
+
+#if IS_BUILTIN(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+static inline u32 task_fwid(struct task_struct *p)
+{
+	u32 fwid;
+
+	if (in_interrupt())
+		return 0;
+
+	rcu_read_lock();
+	fwid = container_of(task_css(p, net_filter_subsys_id),
+			    struct cgroup_nf_state, css)->fwid;
+	rcu_read_unlock();
+
+	return fwid;
+}
+#elif IS_MODULE(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+static inline u32 task_fwid(struct task_struct *p)
+{
+	struct cgroup_subsys_state *css;
+	u32 fwid = 0;
+
+	if (in_interrupt())
+		return 0;
+
+	rcu_read_lock();
+	css = task_css(p, net_filter_subsys_id);
+	if (css)
+		fwid = container_of(css, struct cgroup_nf_state, css)->fwid;
+	rcu_read_unlock();
+
+	return fwid;
+}
+#endif
+#else /* !CONFIG_NETFILTER_XT_MATCH_CGROUP */
+static inline u32 task_fwid(struct task_struct *p)
+{
+	return 0;
+}
+
+#define sock_update_fwid(sk)
+#endif /* CONFIG_NETFILTER_XT_MATCH_CGROUP */
+#endif /* _XT_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index e3bf213..f7da4b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -387,6 +387,9 @@ struct sock {
 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
 	__u32			sk_cgrp_prioidx;
 #endif
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+	__u32			sk_cgrp_fwid;
+#endif
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 1749154..94a4890 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -37,6 +37,7 @@ header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_addrtype.h
 header-y += xt_bpf.h
+header-y += xt_cgroup.h
 header-y += xt_cluster.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
new file mode 100644
index 0000000..43acb7e
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_cgroup.h
@@ -0,0 +1,11 @@
+#ifndef _UAPI_XT_CGROUP_H
+#define _UAPI_XT_CGROUP_H
+
+#include <linux/types.h>
+
+struct xt_cgroup_info {
+	__u32 id;
+	__u32 invert;
+};
+
+#endif /* _UAPI_XT_CGROUP_H */
diff --git a/net/core/scm.c b/net/core/scm.c
index b442e7e..f08672a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
 #include <net/sock.h>
 #include <net/compat.h>
 #include <net/scm.h>
+#include <net/netfilter/xt_cgroup.h>
 #include <net/cls_cgroup.h>
 
 
@@ -290,6 +291,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		/* Bump the usage count and install the file. */
 		sock = sock_from_file(fp[i], &err);
 		if (sock) {
+			sock_update_fwid(sock->sk);
 			sock_update_netprioidx(sock->sk);
 			sock_update_classid(sock->sk);
 		}
diff --git a/net/core/sock.c b/net/core/sock.c
index 2bd9b3f..524a376 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -125,6 +125,7 @@
 #include <linux/skbuff.h>
 #include <net/net_namespace.h>
 #include <net/request_sock.h>
+#include <net/netfilter/xt_cgroup.h>
 #include <net/sock.h>
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
@@ -1337,6 +1338,18 @@ void sock_update_netprioidx(struct sock *sk)
 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
 
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_CGROUP)
+void sock_update_fwid(struct sock *sk)
+{
+	u32 fwid;
+
+	fwid = task_fwid(current);
+	if (fwid != sk->sk_cgrp_fwid)
+		sk->sk_cgrp_fwid = fwid;
+}
+EXPORT_SYMBOL(sock_update_fwid);
+#endif
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@net: the applicable net namespace
@@ -1363,6 +1376,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 
 		sock_update_classid(sk);
 		sock_update_netprioidx(sk);
+		sock_update_fwid(sk);
 	}
 
 	return sk;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6..d276ff4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -806,6 +806,14 @@ config NETFILTER_XT_MATCH_BPF
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_CGROUP
+	tristate '"control group" match support'
+	depends on NETFILTER_ADVANCED
+	depends on CGROUPS
+	---help---
+	Socket/process control group matching allows you to match locally
+	generated packets based on which control group processes belong to.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12..12f014f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -124,6 +124,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
new file mode 100644
index 0000000..249c7ee
--- /dev/null
+++ b/net/netfilter/xt_cgroup.c
@@ -0,0 +1,177 @@
+/*
+ * Xtables module to match the process control group.
+ *
+ * Might be used to implement individual "per-application" firewall
+ * policies in contrast to global policies based on control groups.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ * (C) 2013 Thomas Graf <tgraf@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/cgroup.h>
+#include <linux/fdtable.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_cgroup.h>
+#include <net/netfilter/xt_cgroup.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("Xtables: process control group matching");
+MODULE_ALIAS("ipt_cgroup");
+MODULE_ALIAS("ip6t_cgroup");
+
+static int cgroup_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info *info = par->matchinfo;
+
+	if (info->invert & ~1)
+		return -EINVAL;
+
+	return info->id ? 0 : -EINVAL;
+}
+
+static bool
+cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info *info = par->matchinfo;
+
+	if (skb->sk == NULL)
+		return false;
+
+	return (info->id == skb->sk->sk_cgrp_fwid) ^ info->invert;
+}
+
+static struct xt_match cgroup_mt_reg __read_mostly = {
+	.name       = "cgroup",
+	.revision   = 0,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = cgroup_mt_check,
+	.match      = cgroup_mt,
+	.matchsize  = sizeof(struct xt_cgroup_info),
+	.me         = THIS_MODULE,
+	.hooks      = (1 << NF_INET_LOCAL_OUT) |
+	              (1 << NF_INET_POST_ROUTING),
+};
+
+static inline struct cgroup_nf_state *
+css_nf_state(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct cgroup_nf_state, css) : NULL;
+}
+
+static struct cgroup_subsys_state *
+cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct cgroup_nf_state *cs;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return ERR_PTR(-ENOMEM);
+
+	return &cs->css;
+}
+
+static int cgroup_css_online(struct cgroup_subsys_state *css)
+{
+	struct cgroup_nf_state *cs = css_nf_state(css);
+	struct cgroup_nf_state *parent = css_nf_state(css_parent(css));
+
+	if (parent)
+		cs->fwid = parent->fwid;
+
+	return 0;
+}
+
+static void cgroup_css_free(struct cgroup_subsys_state *css)
+{
+	kfree(css_nf_state(css));
+}
+
+static int cgroup_fwid_update(const void *v, struct file *file, unsigned n)
+{
+	int err;
+	struct socket *sock = sock_from_file(file, &err);
+
+	if (sock)
+		sock->sk->sk_cgrp_fwid = (u32)(unsigned long) v;
+
+	return 0;
+}
+
+static u64 cgroup_fwid_read(struct cgroup_subsys_state *css,
+			    struct cftype *cft)
+{
+	return css_nf_state(css)->fwid;
+}
+
+static int cgroup_fwid_write(struct cgroup_subsys_state *css,
+			     struct cftype *cft, u64 id)
+{
+	css_nf_state(css)->fwid = (u32) id;
+
+	return 0;
+}
+
+static void cgroup_attach(struct cgroup_subsys_state *css,
+			  struct cgroup_taskset *tset)
+{
+	struct cgroup_nf_state *cs = css_nf_state(css);
+	void *v = (void *)(unsigned long) cs->fwid;
+	struct task_struct *p;
+
+	cgroup_taskset_for_each(p, css, tset) {
+		task_lock(p);
+		iterate_fd(p->files, 0, cgroup_fwid_update, v);
+		task_unlock(p);
+	}
+}
+
+static struct cftype net_filter_ss_files[] = {
+	{
+		.name		= "fwid",
+		.read_u64	= cgroup_fwid_read,
+		.write_u64	= cgroup_fwid_write,
+	},
+	{ }
+};
+
+struct cgroup_subsys net_filter_subsys = {
+	.name		= "net_filter",
+	.css_alloc	= cgroup_css_alloc,
+	.css_online	= cgroup_css_online,
+	.css_free	= cgroup_css_free,
+	.attach		= cgroup_attach,
+	.subsys_id	= net_filter_subsys_id,
+	.base_cftypes	= net_filter_ss_files,
+	.module		= THIS_MODULE,
+};
+
+static int __init cgroup_mt_init(void)
+{
+	int ret = cgroup_load_subsys(&net_filter_subsys);
+	if (ret)
+		goto out;
+
+	ret = xt_register_match(&cgroup_mt_reg);
+	if (ret)
+		cgroup_unload_subsys(&net_filter_subsys);
+out:
+	return ret;
+}
+
+static void __exit cgroup_mt_exit(void)
+{
+	xt_unregister_match(&cgroup_mt_reg);
+	cgroup_unload_subsys(&net_filter_subsys);
+}
+
+module_init(cgroup_mt_init);
+module_exit(cgroup_mt_exit);
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH v2 net 2/4] bridge: Apply the PVID to priority-tagged frames
From: Toshiaki Makita @ 2013-10-18 14:01 UTC (permalink / raw)
  To: vyasevic
  Cc: Toshiaki Makita, Stephen Hemminger, David S . Miller, netdev,
	Fernando Luis Vazquez Cao
In-Reply-To: <526020B4.80104@redhat.com>

On Thu, 2013-10-17 at 13:39 -0400, Vlad Yasevich wrote:
> On 10/17/2013 08:14 AM, Toshiaki Makita wrote:
> > On Wed, 2013-10-16 at 12:16 -0400, Vlad Yasevich wrote:
> >> On 10/16/2013 11:55 AM, Stephen Hemminger wrote:
> >>> On Wed, 16 Oct 2013 17:07:14 +0900
> >>> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
> >>>
> >>>> IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames
> >>>> use the PVID for the port as its VID.
> >>>> (See IEEE 802.1Q-2011 6.9.1 and Table 9-2)
> >>>>
> >>>> Apply the PVID to not only untagged frames but also priority-tagged frames.
> >>>>
> >>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> >>>> ---
> >>>>    net/bridge/br_vlan.c | 27 ++++++++++++++++++++-------
> >>>>    1 file changed, 20 insertions(+), 7 deletions(-)
> >>>>
> >>>> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> >>>> index 21b6d21..5a9c44a 100644
> >>>> --- a/net/bridge/br_vlan.c
> >>>> +++ b/net/bridge/br_vlan.c
> >>>> @@ -189,6 +189,8 @@ out:
> >>>>    bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
> >>>>    			struct sk_buff *skb, u16 *vid)
> >>>>    {
> >>>> +	int err;
> >>>> +
> >>>>    	/* If VLAN filtering is disabled on the bridge, all packets are
> >>>>    	 * permitted.
> >>>>    	 */
> >>>> @@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
> >>>>    	if (!v)
> >>>>    		return false;
> >>>>
> >>>> -	if (br_vlan_get_tag(skb, vid)) {
> >>>> +	err = br_vlan_get_tag(skb, vid);
> >>>> +	if (!*vid) {
> >>>>    		u16 pvid = br_get_pvid(v);
> >>>
> >>> Ok, but it looks like br_vlan_get_tag() could be cleaner if it just returned
> >>> the tag, and there was another br_vlan_tag_present() function.
> >
> > Thank you for reviewing.
> > I agree with you.
> > I had been afraid that if it affects other codes because
> > br_vlan_get_tag() is used in many places else, but now I have decided
> > not to hesitate to change its signature and behavior.
> >
> >>
> >> I was just thinking about that as well.  If we make br_vlan_get_tag()
> >> return either the actual tag (if the packet is tagged), or the pvid
> >> if (untagged/prio_tagged), then we can skp most of this.
> >
> > Hmm... maybe I don't fully understand you.
> >
> > Is what you intend something like
> > 	br_allowed_ingress(...) {
> > 		...
> > 		vid = br_vlan_get_tag(skb, v);
> > 		if (!tagged(skb)) put_tag(skb, vid); /* untagged */
> > 		else if (!get_vid(skb)) update_vid(skb, vid); /* prio_tagged */
> > 		...
> > 	}
> >
> > 	br_vlan_get_tag(skb, v) {
> > 		if (tagged(skb)) {
> > 			vid = get_vid(skb);
> > 			if (!vid) return get_pvid(v); /* prio_tagged */
> > 			return vid;
> > 		}
> > 		return get_pvid(v); /* untagged */
> > 	}
> >
> > This needs double check for prio_tagged at br_allowed_ingress() and
> > br_vlan_get_tag().
> >
> > Or if we modify skb->vlan_tci at br_vlan_get_tag(), isn't it a little
> > dangerous to other codes that use this function in order to just get
> > vid?
> >
> > I am thinking it makes things simple that br_vlan_get_tag() returns 0 if
> > (untagged/prio_tagged).
> >
> > 	br_allowed_ingress(...) {
> > 		...
> > 		vid = br_vlan_get_tag(skb);
> > 		if (!vid) {
> > 			vid = get_pvid(v);
> > 			if (!tagged(skb)) put_tag(skb, vid);/* untagged */
> > 			else update_vid(skb, vid); /* prio_tagged */
> > 		}
> > 		...
> > 	}
> >
> > 	br_vlan_get_tag(skb) {
> > 		if (tagged(skb)) return get_vid(skb);
> > 		return 0;
> > 	}
> 
> With this you end up checking if the patcket is tagged quite a lot of times.
> 
> What I am thinking is that once we perform a get_tag, we should get
> the vlan tag that the current packet belongs to.  We can then safely
> use that tag everywhere and not have to worry too much about it.
> 
> We can pass that tag to br_allowed_ingress to verify that it is
> permitted to enter.
> 
> You made a valid point about multicast code using br_vlan_get_tag
> incorrectly and I plan on addressing that.
> 
> As it is, the current series addresses bugs in the implementation
> that should be fixed.
> 
> We can make the code better/nicer as a next step.

OK, you seem to have a better idea to avoid checking if the packet is
tagged many times.

If this patch series is acceptable just as a bug fix, I'll wait for your
proposal of improvement and fixing wrong multicast codes next time.

Thanks,

Toshiaki Makita

> 
> -vlad
> 
> >
> > Thanks,
> >
> > Toshiaki Makita
> >
> >>
> >>>
> >>> Also, does this still work if CONFIG_BRIDGE_VLAN_FILTERING is disabled?
> >>
> >> Yes.  br_allowed_ingress becomes an inline if the config option is disabled.
> >>
> >> -vlad
> >
> >
> 

^ permalink raw reply

* for 3.0 : please add "c16a98e ipv6: tcp: fix panic in SYN processing"
From: Willy Tarreau @ 2013-10-18 14:04 UTC (permalink / raw)
  To: Greg Kroah-Hartman, David Miller; +Cc: Eric Dumazet, netdev, stable

Greg, David,

one of our customers faced a panic in latest 2.6.32 when both somaxconn
and the listen backlog are large on an IPv6 socket. It was also reported
by one haproxy user on the latest RHEL6 kernel a few months ago. We found
that the same bug affects 3.0 up to and including 3.0.100.

Eric had already spotted that bug and fixed it in 3.2 with the following
patch :

  commit c16a98ed91597b40b22b540c6517103497ef8e74
  Author: Eric Dumazet <eric.dumazet@gmail.com>
  Date:   Wed Nov 23 15:49:31 2011 -0500

    ipv6: tcp: fix panic in SYN processing

    commit 72a3effaf633bc ([NET]: Size listen hash tables using backlog
    hint) added a bug allowing inet6_synq_hash() to return an out of bound
    array index, because of u16 overflow.

    Bug can happen if system admins set net.core.somaxconn &
    net.ipv4.tcp_max_syn_backlog sysctls to values greater than 65536

    Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

In practice, the bug extends to lower values as well (32768 and above),
because reqsk_queue_alloc() can round the number of entries to double of
the backlog by doing roundup_pow_of_two(backlog+1), resulting in
inet6_csk_search_req() calling inet6_synq_hash() with too large an integer.

Could we please apply it to 3.0 before it finishes its life ?

Thanks,
Willy

^ permalink raw reply

* Re: [net-next  03/14] i40e: assign correct vector to VF
From: Sergei Shtylyov @ 2013-10-18 14:08 UTC (permalink / raw)
  To: Jeff Kirsher, a, davem
  Cc: Mitch Williams, netdev, gospo, sassmann, Jesse Brandeburg
In-Reply-To: <1382102598-11343-4-git-send-email-jeffrey.t.kirsher@intel.com>

Hello.

On 18-10-2013 17:23, Jeff Kirsher wrote:

> From: Mitch Williams <mitch.a.williams@intel.com>

> Correct math error when assigning MSI-X vectors to VFs. The vectors-per-vf
> value reported by the hardware already conveniently reports one less than the
> actual value.

> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
> Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Tested-by: Sibai Li <sibai.li@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
>   drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> index 8967e58..202139f 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> @@ -251,7 +251,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_idx,
>   		reg_idx = I40E_VPINT_LNKLST0(vf->vf_id);
>   	else
>   		reg_idx = I40E_VPINT_LNKLSTN(
> -			    ((pf->hw.func_caps.num_msix_vectors_vf - 1)
> +			    ((pf->hw.func_caps.num_msix_vectors_vf)

    Why keep the parens if you've removed -1?

WBR, Sergei

^ permalink raw reply

* Re: [net-next  11/14] i40e: reorder block declarations in debugfs
From: Sergei Shtylyov @ 2013-10-18 14:13 UTC (permalink / raw)
  To: Jeff Kirsher, a, davem
  Cc: Shannon Nelson, netdev, gospo, sassmann, Jesse Brandeburg
In-Reply-To: <1382102598-11343-12-git-send-email-jeffrey.t.kirsher@intel.com>

Hello.

On 18-10-2013 17:23, Jeff Kirsher wrote:

> From: Shannon Nelson <shannon.nelson@intel.com>

> This is a cleanup of the arguments declared at the beginning
> of each function.

    I hope you meant "local variables" because "arguments" are the values 
passed to a function.

> Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
> Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

WBR, Sergei

^ permalink raw reply

* Re: [PATCH net] bridge: clean the nf_bridge status when forwarding the skb
From: Vlad Yasevich @ 2013-10-18 14:32 UTC (permalink / raw)
  To: Antonio Quartulli, Pablo Neira Ayuso
  Cc: Antonio Quartulli, David S. Miller, netdev@vger.kernel.org,
	Stephen Hemminger
In-Reply-To: <20131018113555.GK2596@neomailbox.net>

On 10/18/2013 07:35 AM, Antonio Quartulli wrote:
> On Fri, Oct 18, 2013 at 01:10:41PM +0200, Pablo Neira Ayuso wrote:
>> On Thu, Oct 17, 2013 at 01:37:35PM +0200, Antonio Quartulli wrote:
>>> On Thu, Oct 17, 2013 at 04:28:57AM -0700, Pablo Neira Ayuso wrote:
>>>> Hi,
>>>>> +
>>>>> +/**
>>>>> + * br_netfilter_skb_free - clean the NF bridge data in an skb
>>>>> + * @skb: the skb which the data to free belongs to
>>>>> + */
>>>>> +void br_netfilter_skb_free(struct sk_buff *skb)
>>>>> +{
>>>>> +	nf_bridge_put(skb->nf_bridge);
>>>>> +	skb->nf_bridge = NULL;
>>>>> +}
>>>>
>>>> This should be nf_reset.
>>>
>>> You think I should directly use nf_reset instead of this function?
>>>
>>> I see that nf_reset() cleans up the conntrack part too: does it also become
>>> useless once the packet exits the bridge interface?
>>
>> The conntrack should not attached if it's forwarded to another netif,
>> see dev_forward_skb.
>>
>> But I'm not sure what scenario you're trying to handle with this
>> change, if you could please elaborate.
>
>
> This is a sample scenario (nf bridge is on):
>
> [eth0] ---> [br0] ---> [bat0] ---> [br1]
>

Another possible config that is out in the wild is

[eth0] ---> [br0] ---> [vlanX] ----> [br1]


> where the relation '[a] ---> [b]' means 'a is enslaved in b' (bat0 is a
> batman-adv virtual interface..in this situation it should not matter: it
> just removs an header from an incoming skb and delivers it).
>
> The problem I was having was due to an skb entering br0 first and br1 later.
> When reaching br1 skb->nf_bridge was != NULL because of the previous processing
> in br0.
>

Doesn't br_nf_pre_routing already take care of this for you?  It will 
drop the ref on the current nf_bridge and allocate a new one.  Is that
not sufficient?

-vlad

> To clarify, the packet arriving on eth0 is 'delivered' to br0. It is not
> forwarded to another port of the bridge. Therefore I am not sure that we should
> clean the conntrack part too.
>
>>
>> Perhaps your fix is more conservative to avoid breaking strange setups
>> that have been relying on this behaviour. I know of people deploying
>> strange configurations using netfilter bridge.
>>
>
> could be.
>
> Cheers,
>

^ permalink raw reply

* Re: for 3.0 : please add "c16a98e ipv6: tcp: fix panic in SYN processing"
From: Greg Kroah-Hartman @ 2013-10-18 14:34 UTC (permalink / raw)
  To: Willy Tarreau; +Cc: David Miller, Eric Dumazet, netdev, stable
In-Reply-To: <20131018140442.GA16883@1wt.eu>

On Fri, Oct 18, 2013 at 04:04:42PM +0200, Willy Tarreau wrote:
> Greg, David,
> 
> one of our customers faced a panic in latest 2.6.32 when both somaxconn
> and the listen backlog are large on an IPv6 socket. It was also reported
> by one haproxy user on the latest RHEL6 kernel a few months ago. We found
> that the same bug affects 3.0 up to and including 3.0.100.
> 
> Eric had already spotted that bug and fixed it in 3.2 with the following
> patch :
> 
>   commit c16a98ed91597b40b22b540c6517103497ef8e74
>   Author: Eric Dumazet <eric.dumazet@gmail.com>
>   Date:   Wed Nov 23 15:49:31 2011 -0500
> 
>     ipv6: tcp: fix panic in SYN processing
>     
>     commit 72a3effaf633bc ([NET]: Size listen hash tables using backlog
>     hint) added a bug allowing inet6_synq_hash() to return an out of bound
>     array index, because of u16 overflow.
>     
>     Bug can happen if system admins set net.core.somaxconn &
>     net.ipv4.tcp_max_syn_backlog sysctls to values greater than 65536
>     
>     Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>     Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> In practice, the bug extends to lower values as well (32768 and above),
> because reqsk_queue_alloc() can round the number of entries to double of
> the backlog by doing roundup_pow_of_two(backlog+1), resulting in
> inet6_csk_search_req() calling inet6_synq_hash() with too large an integer.
> 
> Could we please apply it to 3.0 before it finishes its life ?

Unless David objects, I can queue this up just in time for the last
3.0.stable.

David?

^ permalink raw reply

* Re: [net-next  11/14] i40e: reorder block declarations in debugfs
From: Jeff Kirsher @ 2013-10-18 14:34 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: a, davem, Shannon Nelson, netdev, gospo, sassmann,
	Jesse Brandeburg
In-Reply-To: <52614206.7050404@cogentembedded.com>

[-- Attachment #1: Type: text/plain, Size: 397 bytes --]

On Fri, 2013-10-18 at 18:13 +0400, Sergei Shtylyov wrote:
> On 18-10-2013 17:23, Jeff Kirsher wrote:
> 
> > From: Shannon Nelson <shannon.nelson@intel.com>
> 
> > This is a cleanup of the arguments declared at the beginning
> > of each function.
> 
>     I hope you meant "local variables" because "arguments" are the
> values 
> passed to a function.

Yes, he meant local variables.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH net] bridge: clean the nf_bridge status when forwarding the skb
From: Antonio Quartulli @ 2013-10-18 14:46 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: Pablo Neira Ayuso, Antonio Quartulli, David S. Miller,
	netdev@vger.kernel.org, Stephen Hemminger
In-Reply-To: <52614669.5040301@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 995 bytes --]

On Fri, Oct 18, 2013 at 10:32:09AM -0400, Vlad Yasevich wrote:
> On 10/18/2013 07:35 AM, Antonio Quartulli wrote:
> > On Fri, Oct 18, 2013 at 01:10:41PM +0200, Pablo Neira Ayuso wrote:
> >> On Thu, Oct 17, 2013 at 01:37:35PM +0200, Antonio Quartulli wrote:
> >>> On Thu, Oct 17, 2013 at 04:28:57AM -0700, Pablo Neira Ayuso wrote:

[...]

> >
> > The problem I was having was due to an skb entering br0 first and br1 later.
> > When reaching br1 skb->nf_bridge was != NULL because of the previous processing
> > in br0.
> >
> 
> Doesn't br_nf_pre_routing already take care of this for you?  It will 
> drop the ref on the current nf_bridge and allocate a new one.  Is that
> not sufficient?

In my case that line is not reached because

 700         if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))

is always true: the packet getting analysed is a batman-adv encapsulated packet,
which does not match any of the three above.

Cheers,

-- 
Antonio Quartulli

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* RE: transmit lockup using smsc95xx ethernet on usb3
From: David Laight @ 2013-10-18 15:22 UTC (permalink / raw)
  To: Sarah Sharp
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-usb-u79uwXL29TY76Z2rM5mHXA,
	Xenia Ragiadakou
In-Reply-To: <20131017174329.GB6256@xanatos>

> It's not too hard.  Here's some directions:
> http://kernelnewbies.org/KernelBuild

I can build it easily enough but ...

Is there a .config file with a sane list of modules anywhere?
(and builtin drivers for disk boot)

The defconfig one doesn't seem to contain enough to be useful,
and the allmodconfig gives me 800MB+ to copy from the build
system to the test one (I can't seem to see an obvious way of
doing this either - apart from installing them to the 'wrong'
place on the build system and then copying everything over).

	David

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net] bridge: clean the nf_bridge status when forwarding the skb
From: Vlad Yasevich @ 2013-10-18 15:33 UTC (permalink / raw)
  To: Antonio Quartulli
  Cc: Pablo Neira Ayuso, Antonio Quartulli, David S. Miller,
	netdev@vger.kernel.org, Stephen Hemminger
In-Reply-To: <20131018144618.GL2596@neomailbox.net>

On 10/18/2013 10:46 AM, Antonio Quartulli wrote:
> On Fri, Oct 18, 2013 at 10:32:09AM -0400, Vlad Yasevich wrote:
>> On 10/18/2013 07:35 AM, Antonio Quartulli wrote:
>>> On Fri, Oct 18, 2013 at 01:10:41PM +0200, Pablo Neira Ayuso wrote:
>>>> On Thu, Oct 17, 2013 at 01:37:35PM +0200, Antonio Quartulli wrote:
>>>>> On Thu, Oct 17, 2013 at 04:28:57AM -0700, Pablo Neira Ayuso wrote:
>
> [...]
>
>>>
>>> The problem I was having was due to an skb entering br0 first and br1 later.
>>> When reaching br1 skb->nf_bridge was != NULL because of the previous processing
>>> in br0.
>>>
>>
>> Doesn't br_nf_pre_routing already take care of this for you?  It will
>> drop the ref on the current nf_bridge and allocate a new one.  Is that
>> not sufficient?
>
> In my case that line is not reached because
>
>   700         if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
>
> is always true: the packet getting analysed is a batman-adv encapsulated packet,
> which does not match any of the three above.
>
> Cheers,
>

Looking at other encapsulators (PPP, iptunnel, VXLAN), they do
nf_reset() on input.  Would that be appropriate for batman as well?

-vlad

^ permalink raw reply

* Re: [PATCH] veth: Showing peer of veth type dev in ip link (kernel side)
From: Nicolas Dichtel @ 2013-10-18 15:34 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Stephen Hemminger, David Miller, yamato, netdev
In-Reply-To: <8738nzelxl.fsf@xmission.com>

Le 17/10/2013 21:28, Eric W. Biederman a écrit :
> Nicolas Dichtel <nicolas.dichtel@6wind.com> writes:
>
>> Le 16/10/2013 21:53, Eric W. Biederman a écrit :
>
>>> The age old question why can't we have global identifiers for
>>> namespaces?
>>>
>>> The answer is that I don't want to implement a namespace for namespaces.
>> Sorry, but I don't understand the problem. This ID is owned by the kernel, like
>> the netns list (for_each_net()) is owned by it.
>
> The scenario where problem are likely to show up is something like this.
>
> For testing it would be reasonable to setup two linux containers that
> look like full linux systems.  In those containers you run one instance
> of your virtual router managment daemons, and you arrange to synchronize
> between the two linux containers for testing.
>
> It becomes even more interesting when we want to migrate one of those
> linux containers to another physical machine.
>
> Global identifiers start breaking the first scenario, and really trash
> the second scenario.
>
> At the same time migration of configuration and replication of
> configuration are essentially the same problem, so it would be very
> silly to design such that will cause problems.
Ok, I'm now convinced ;-)

>
>>> While the proc inode does work today across different mounts of proc, I
>>> reserve the right at some future date (if it solves a technical problem)
>>> to give each namespace a different inode number in each different mount
>>> of proc.  So the inode number is not quite the unique identifier you
>>> want.  The inode number is a close as I am willing to get to a namespace
>>> of namespaces.
>>>
>>> I think the simplest solution is to just not worry about which namespace
>>> the other half of a veth pair is in.  But I have not encountered the
>>> problem where I need to know exactly which namespace we are worrying
>>> about.
>> Ok, let's start by explaining our usecase.
>>
>> We are using namespaces only to implement virtual routers (VR), ie only
>> the networking stack is virtualized. We don't care about other namespaces, we
>> just want to run several network stacks and beeing able to manage them.
>>
>> For example, providers use this feature to isolate clients, one VR is opened
>> for each client. You can have a large number of clients (+10 000) and thus the
>> same number of netns.
>> Considering these numbers, we don't want to run one instance per VR for all of
>> our network daemons, but have only one instance that manage all VR.
>>
>> You also have daemons that monitor the system and synchronize network objects
>> (interfaces, routes, etc.) on another linux. Goal is to implement an high
>> availablity system: it's possible to switch to the other linux to avoid service
>> interruption.
>> This kind of daemon wants to have the full information about interfaces to be
>> able to build/configure them on the other linux.
>>
>>>
>>> Global identifiers are easy until you hit the cases where they make
>>> things impossible.
>> I don't want specially to use ID, but I fear that the solution with file
>> descriptors will be a nightmare.
>
> I can certainly see challenges.  In asking for symmetry between set and
> get the solution with file descriptors is the obvious answer and the
> first answer I have been able to come up with so far.
>
> My original answer was that the ifindex happened to be unique across
> namespaces but that actually turned out to be a problem for migration
> so that abandoned.
>
> Namespace file descriptors are the solution that I know semantically
> will work.  Beyond that I don't have any good ideas right now.
>
> I just know that local names (aka file descriptors) are much easier to
> work with semantically than global names.
Yes sure. I will continue to think about this.


Thank you,
Nicolas

^ permalink raw reply

* Re: getting lldp DCB_CMD_IEEE_GET after DCB_CMD_GCAP fails
From: Olaf Hering @ 2013-10-18 15:40 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20131015161838.GA10478@aepfle.de>

On Tue, Oct 15, Olaf Hering wrote:

> Is the code below supposed to work anyway? Thanks for any help.

For some reason libnl does require an ack handler. Without such handler,
if the data and the actual ack are sent back to the application with two
independent messages, the second recv will just be the ack. My example
did not take this into account. Now I changed it to something like this:
 do {
   nl_recvmsgs(handle, cb);
 } while (ack == 0);

Now it happens to work.

Olaf

^ permalink raw reply

* Re: [PATCH net] bridge: clean the nf_bridge status when forwarding the skb
From: Antonio Quartulli @ 2013-10-18 15:41 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: Pablo Neira Ayuso, Antonio Quartulli, David S. Miller,
	netdev@vger.kernel.org, Stephen Hemminger
In-Reply-To: <526154B2.2060900@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1774 bytes --]

On Fri, Oct 18, 2013 at 11:33:06AM -0400, Vlad Yasevich wrote:
> On 10/18/2013 10:46 AM, Antonio Quartulli wrote:
> > On Fri, Oct 18, 2013 at 10:32:09AM -0400, Vlad Yasevich wrote:
> >> On 10/18/2013 07:35 AM, Antonio Quartulli wrote:
> >>> On Fri, Oct 18, 2013 at 01:10:41PM +0200, Pablo Neira Ayuso wrote:
> >>>> On Thu, Oct 17, 2013 at 01:37:35PM +0200, Antonio Quartulli wrote:
> >>>>> On Thu, Oct 17, 2013 at 04:28:57AM -0700, Pablo Neira Ayuso wrote:
> >
> > [...]
> >
> >>>
> >>> The problem I was having was due to an skb entering br0 first and br1 later.
> >>> When reaching br1 skb->nf_bridge was != NULL because of the previous processing
> >>> in br0.
> >>>
> >>
> >> Doesn't br_nf_pre_routing already take care of this for you?  It will
> >> drop the ref on the current nf_bridge and allocate a new one.  Is that
> >> not sufficient?
> >
> > In my case that line is not reached because
> >
> >   700         if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
> >
> > is always true: the packet getting analysed is a batman-adv encapsulated packet,
> > which does not match any of the three above.
> >
> > Cheers,
> >
> 
> Looking at other encapsulators (PPP, iptunnel, VXLAN), they do
> nf_reset() on input.  Would that be appropriate for batman as well?

I thought that too.

But at this point, wouldn't it be better to do a reset here and remove the other
resets from any other encapsulation module?

Maybe this operation is supposed to not happen if no encapsulation is involved?
I thought that polishing the nf state when exiting the nf related path was a
clean and easy solution.

Moreover we avoid that any newly implemented tunneling module hit this problem again.


Cheers,

-- 
Antonio Quartulli

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [patch net-next 0/7] bonding: introduce bonding options Netlink support
From: Jiri Pirko @ 2013-10-18 15:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, fubar, vfalico, andy, stephen, vyasevic

This patchset basically allows "mode" and "active_slave" bonding options
to be propagated and set up via standart RT Netlink interface.

In future other options can be easily added as well.

Jiri Pirko (7):
  bonding: push Netlink bits into separate file
  bonding: move mode setting into separate function
  bonding: move active_slave setting into separate function
  bonding: remove bond_ioctl_change_active()
  bonding: move active_slave getting into separate function
  bonding: add Netlink support mode option
  bonding: add Netlink support active_slave option

 drivers/net/bonding/Makefile       |   2 +-
 drivers/net/bonding/bond_main.c    |  91 ++----------------------
 drivers/net/bonding/bond_netlink.c | 131 ++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_options.c | 142 +++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_sysfs.c   | 127 ++++++++-------------------------
 drivers/net/bonding/bonding.h      |  19 ++++-
 include/uapi/linux/if_link.h       |  11 +++
 7 files changed, 337 insertions(+), 186 deletions(-)
 create mode 100644 drivers/net/bonding/bond_netlink.c
 create mode 100644 drivers/net/bonding/bond_options.c

-- 
1.8.3.1

^ permalink raw reply

* [patch net-next 1/7] bonding: push Netlink bits into separate file
From: Jiri Pirko @ 2013-10-18 15:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, fubar, vfalico, andy, stephen, vyasevic
In-Reply-To: <1382111019-1102-1-git-send-email-jiri@resnulli.us>

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 drivers/net/bonding/Makefile       |  2 +-
 drivers/net/bonding/bond_main.c    | 32 ++++------------------
 drivers/net/bonding/bond_netlink.c | 54 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bonding.h      |  7 +++++
 4 files changed, 67 insertions(+), 28 deletions(-)
 create mode 100644 drivers/net/bonding/bond_netlink.c

diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 4c21bf6..09e8b2c 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_BONDING) += bonding.o
 
-bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o
+bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o bond_netlink.o
 
 proc-$(CONFIG_PROC_FS) += bond_procfs.o
 bonding-objs += $(proc-y)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index dfb4f6d..a113e42 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3951,7 +3951,7 @@ static void bond_destructor(struct net_device *bond_dev)
 	free_netdev(bond_dev);
 }
 
-static void bond_setup(struct net_device *bond_dev)
+void bond_setup(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
@@ -4451,32 +4451,11 @@ static int bond_init(struct net_device *bond_dev)
 	return 0;
 }
 
-static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
-{
-	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
-			return -EINVAL;
-		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
-			return -EADDRNOTAVAIL;
-	}
-	return 0;
-}
-
-static unsigned int bond_get_num_tx_queues(void)
+unsigned int bond_get_num_tx_queues(void)
 {
 	return tx_queues;
 }
 
-static struct rtnl_link_ops bond_link_ops __read_mostly = {
-	.kind			= "bond",
-	.priv_size		= sizeof(struct bonding),
-	.setup			= bond_setup,
-	.validate		= bond_validate,
-	.get_num_tx_queues	= bond_get_num_tx_queues,
-	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number
-							     as for TX queues */
-};
-
 /* Create a new bond based on the specified name and bonding parameters.
  * If name is NULL, obtain a suitable "bond%d" name for us.
  * Caller must NOT hold rtnl_lock; we need to release it here before we
@@ -4563,7 +4542,7 @@ static int __init bonding_init(void)
 	if (res)
 		goto out;
 
-	res = rtnl_link_register(&bond_link_ops);
+	res = bond_netlink_init();
 	if (res)
 		goto err_link;
 
@@ -4579,7 +4558,7 @@ static int __init bonding_init(void)
 out:
 	return res;
 err:
-	rtnl_link_unregister(&bond_link_ops);
+	bond_netlink_fini();
 err_link:
 	unregister_pernet_subsys(&bond_net_ops);
 	goto out;
@@ -4592,7 +4571,7 @@ static void __exit bonding_exit(void)
 
 	bond_destroy_debugfs();
 
-	rtnl_link_unregister(&bond_link_ops);
+	bond_netlink_fini();
 	unregister_pernet_subsys(&bond_net_ops);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -4609,4 +4588,3 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
-MODULE_ALIAS_RTNL_LINK("bond");
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
new file mode 100644
index 0000000..3e5c5f8
--- /dev/null
+++ b/drivers/net/bonding/bond_netlink.c
@@ -0,0 +1,54 @@
+/*
+ * drivers/net/bond/bond_netlink.c - Netlink interface for bonding
+ * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include "bonding.h"
+
+static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+struct rtnl_link_ops bond_link_ops __read_mostly = {
+	.kind			= "bond",
+	.priv_size		= sizeof(struct bonding),
+	.setup			= bond_setup,
+	.validate		= bond_validate,
+	.get_num_tx_queues	= bond_get_num_tx_queues,
+	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number
+							     as for TX queues */
+};
+
+int __init bond_netlink_init(void)
+{
+	return rtnl_link_register(&bond_link_ops);
+}
+
+void __exit bond_netlink_fini(void)
+{
+	rtnl_link_unregister(&bond_link_ops);
+}
+
+MODULE_ALIAS_RTNL_LINK("bond");
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index bb5c731..a2a353b 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -418,6 +418,10 @@ void bond_debug_register(struct bonding *bond);
 void bond_debug_unregister(struct bonding *bond);
 void bond_debug_reregister(struct bonding *bond);
 const char *bond_mode_name(int mode);
+void bond_setup(struct net_device *bond_dev);
+unsigned int bond_get_num_tx_queues(void);
+int bond_netlink_init(void);
+void bond_netlink_fini(void);
 
 struct bond_net {
 	struct net *		net;	/* Associated network namespace */
@@ -505,4 +509,7 @@ extern const struct bond_parm_tbl fail_over_mac_tbl[];
 extern const struct bond_parm_tbl pri_reselect_tbl[];
 extern struct bond_parm_tbl ad_select_tbl[];
 
+/* exported from bond_netlink.c */
+extern struct rtnl_link_ops bond_link_ops;
+
 #endif /* _LINUX_BONDING_H */
-- 
1.8.3.1

^ permalink raw reply related

* [patch net-next 3/7] bonding: move active_slave setting into separate function
From: Jiri Pirko @ 2013-10-18 15:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, fubar, vfalico, andy, stephen, vyasevic
In-Reply-To: <1382111019-1102-1-git-send-email-jiri@resnulli.us>

Do a bit of refactoring on the way.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 drivers/net/bonding/bond_options.c | 69 +++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_sysfs.c   | 74 +++++++-------------------------------
 drivers/net/bonding/bonding.h      |  1 +
 3 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 294b766..09af5d1 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -12,6 +12,9 @@
 
 #include <linux/errno.h>
 #include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/rwlock.h>
+#include <linux/rcupdate.h>
 #include "bonding.h"
 
 static bool bond_mode_is_valid(int mode)
@@ -53,3 +56,69 @@ int bond_option_mode_set(struct bonding *bond, int mode)
 	bond->params.mode = mode;
 	return 0;
 }
+
+int bond_option_active_slave_set(struct bonding *bond,
+				 struct net_device *slave_dev)
+{
+	int ret = 0;
+
+	if (slave_dev) {
+		if (!netif_is_bond_slave(slave_dev)) {
+			pr_err("Device %s is not bonding slave.\n",
+			       slave_dev->name);
+			return -EINVAL;
+		}
+
+		if (bond->dev != netdev_master_upper_dev_get(slave_dev)) {
+			pr_err("%s: Device %s is not our slave.\n",
+			       bond->dev->name, slave_dev->name);
+			return -EINVAL;
+		}
+	}
+
+	if (!USES_PRIMARY(bond->params.mode)) {
+		pr_err("%s: Unable to change active slave; %s is in mode %d\n",
+		       bond->dev->name, bond->dev->name, bond->params.mode);
+		return -EINVAL;
+	}
+
+	block_netpoll_tx();
+	read_lock(&bond->lock);
+	write_lock_bh(&bond->curr_slave_lock);
+
+	/* check to see if we are clearing active */
+	if (!slave_dev) {
+		pr_info("%s: Clearing current active slave.\n",
+		bond->dev->name);
+		rcu_assign_pointer(bond->curr_active_slave, NULL);
+		bond_select_active_slave(bond);
+	} else {
+		struct slave *old_active = bond->curr_active_slave;
+		struct slave *new_active = bond_slave_get_rtnl(slave_dev);
+
+		BUG_ON(!new_active);
+
+		if (new_active == old_active) {
+			/* do nothing */
+			pr_info("%s: %s is already the current active slave.\n",
+				bond->dev->name, new_active->dev->name);
+		} else {
+			if (old_active && (new_active->link == BOND_LINK_UP) &&
+			    IS_UP(new_active->dev)) {
+				pr_info("%s: Setting %s as active slave.\n",
+					bond->dev->name, new_active->dev->name);
+				bond_change_active_slave(bond, new_active);
+			} else {
+				pr_err("%s: Could not set %s as active slave; either %s is down or the link is down.\n",
+				       bond->dev->name, new_active->dev->name,
+				       new_active->dev->name);
+				ret = -EINVAL;
+			}
+		}
+	}
+
+	write_unlock_bh(&bond->curr_slave_lock);
+	read_unlock(&bond->lock);
+	unblock_netpoll_tx();
+	return ret;
+}
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c234cec..abd2600 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1235,81 +1235,33 @@ static ssize_t bonding_store_active_slave(struct device *d,
 					  struct device_attribute *attr,
 					  const char *buf, size_t count)
 {
-	struct slave *slave, *old_active, *new_active;
+	int ret;
 	struct bonding *bond = to_bond(d);
-	struct list_head *iter;
 	char ifname[IFNAMSIZ];
+	struct net_device *dev;
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	old_active = new_active = NULL;
-	block_netpoll_tx();
-	read_lock(&bond->lock);
-	write_lock_bh(&bond->curr_slave_lock);
-
-	if (!USES_PRIMARY(bond->params.mode)) {
-		pr_info("%s: Unable to change active slave; %s is in mode %d\n",
-			bond->dev->name, bond->dev->name, bond->params.mode);
-		goto out;
-	}
-
 	sscanf(buf, "%15s", ifname); /* IFNAMSIZ */
-
-	/* check to see if we are clearing active */
 	if (!strlen(ifname) || buf[0] == '\n') {
-		pr_info("%s: Clearing current active slave.\n",
-			bond->dev->name);
-		rcu_assign_pointer(bond->curr_active_slave, NULL);
-		bond_select_active_slave(bond);
-		goto out;
-	}
-
-	bond_for_each_slave(bond, slave, iter) {
-		if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
-			old_active = bond->curr_active_slave;
-			new_active = slave;
-			if (new_active == old_active) {
-				/* do nothing */
-				pr_info("%s: %s is already the current"
-					" active slave.\n",
-					bond->dev->name,
-					slave->dev->name);
-				goto out;
-			} else {
-				if ((new_active) &&
-				    (old_active) &&
-				    (new_active->link == BOND_LINK_UP) &&
-				    IS_UP(new_active->dev)) {
-					pr_info("%s: Setting %s as active"
-						" slave.\n",
-						bond->dev->name,
-						slave->dev->name);
-					bond_change_active_slave(bond,
-								 new_active);
-				} else {
-					pr_info("%s: Could not set %s as"
-						" active slave; either %s is"
-						" down or the link is down.\n",
-						bond->dev->name,
-						slave->dev->name,
-						slave->dev->name);
-				}
-				goto out;
-			}
+		dev = NULL;
+	} else {
+		dev = __dev_get_by_name(dev_net(bond->dev), ifname);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out;
 		}
 	}
 
-	pr_info("%s: Unable to set %.*s as active slave.\n",
-		bond->dev->name, (int)strlen(buf) - 1, buf);
- out:
-	write_unlock_bh(&bond->curr_slave_lock);
-	read_unlock(&bond->lock);
-	unblock_netpoll_tx();
+	ret = bond_option_active_slave_set(bond, dev);
+	if (!ret)
+		ret = count;
 
+ out:
 	rtnl_unlock();
 
-	return count;
+	return ret;
 
 }
 static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 7446849..686759d 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -427,6 +427,7 @@ unsigned int bond_get_num_tx_queues(void);
 int bond_netlink_init(void);
 void bond_netlink_fini(void);
 int bond_option_mode_set(struct bonding *bond, int mode);
+int bond_option_active_slave_set(struct bonding *bond, struct net_device *slave_dev);
 
 struct bond_net {
 	struct net *		net;	/* Associated network namespace */
-- 
1.8.3.1

^ permalink raw reply related

* [patch net-next 2/7] bonding: move mode setting into separate function
From: Jiri Pirko @ 2013-10-18 15:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, fubar, vfalico, andy, stephen, vyasevic
In-Reply-To: <1382111019-1102-1-git-send-email-jiri@resnulli.us>

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 drivers/net/bonding/Makefile       |  2 +-
 drivers/net/bonding/bond_options.c | 55 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_sysfs.c   | 45 ++++++++-----------------------
 drivers/net/bonding/bonding.h      |  9 +++++--
 4 files changed, 74 insertions(+), 37 deletions(-)
 create mode 100644 drivers/net/bonding/bond_options.c

diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 09e8b2c..5a5d720 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_BONDING) += bonding.o
 
-bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o bond_netlink.o
+bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o bond_netlink.o bond_options.o
 
 proc-$(CONFIG_PROC_FS) += bond_procfs.o
 bonding-objs += $(proc-y)
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
new file mode 100644
index 0000000..294b766
--- /dev/null
+++ b/drivers/net/bonding/bond_options.c
@@ -0,0 +1,55 @@
+/*
+ * drivers/net/bond/bond_options.c - bonding options
+ * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/if.h>
+#include "bonding.h"
+
+static bool bond_mode_is_valid(int mode)
+{
+	int i;
+
+	for (i = 0; bond_mode_tbl[i].modename; i++);
+
+	return mode >= 0 && mode < i;
+}
+
+int bond_option_mode_set(struct bonding *bond, int mode)
+{
+	if (!bond_mode_is_valid(mode)) {
+		pr_err("invalid mode value %d.\n", mode);
+		return -EINVAL;
+	}
+
+	if (bond->dev->flags & IFF_UP) {
+		pr_err("%s: unable to update mode because interface is up.\n",
+		       bond->dev->name);
+		return -EPERM;
+	}
+
+	if (bond_has_slaves(bond)) {
+		pr_err("%s: unable to update mode because bond has slaves.\n",
+			bond->dev->name);
+		return -EPERM;
+	}
+
+	if (BOND_MODE_IS_LB(mode) && bond->params.arp_interval) {
+		pr_err("%s: %s mode is incompatible with arp monitoring.\n",
+		       bond->dev->name, bond_mode_tbl[mode].modename);
+		return -EINVAL;
+	}
+
+	/* don't cache arp_validate between modes */
+	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
+	bond->params.mode = mode;
+	return 0;
+}
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 03bed0c..c234cec 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -283,49 +283,26 @@ static ssize_t bonding_store_mode(struct device *d,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
-	int new_value, ret = count;
+	int new_value, ret;
 	struct bonding *bond = to_bond(d);
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-
-	if (bond->dev->flags & IFF_UP) {
-		pr_err("unable to update mode of %s because interface is up.\n",
-		       bond->dev->name);
-		ret = -EPERM;
-		goto out;
-	}
-
-	if (bond_has_slaves(bond)) {
-		pr_err("unable to update mode of %s because it has slaves.\n",
-			bond->dev->name);
-		ret = -EPERM;
-		goto out;
-	}
-
 	new_value = bond_parse_parm(buf, bond_mode_tbl);
 	if (new_value < 0)  {
 		pr_err("%s: Ignoring invalid mode value %.*s.\n",
 		       bond->dev->name, (int)strlen(buf) - 1, buf);
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
-	if ((new_value == BOND_MODE_ALB ||
-	     new_value == BOND_MODE_TLB) &&
-	    bond->params.arp_interval) {
-		pr_err("%s: %s mode is incompatible with arp monitoring.\n",
-		       bond->dev->name, bond_mode_tbl[new_value].modename);
-		ret = -EINVAL;
-		goto out;
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	ret = bond_option_mode_set(bond, new_value);
+	if (!ret) {
+		pr_info("%s: setting mode to %s (%d).\n",
+			bond->dev->name, bond_mode_tbl[new_value].modename,
+			new_value);
+		ret = count;
 	}
 
-	/* don't cache arp_validate between modes */
-	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
-	bond->params.mode = new_value;
-	pr_info("%s: setting mode to %s (%d).\n",
-		bond->dev->name, bond_mode_tbl[new_value].modename,
-		new_value);
-out:
 	rtnl_unlock();
 	return ret;
 }
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index a2a353b..7446849 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -58,6 +58,11 @@
 #define TX_QUEUE_OVERRIDE(mode)				\
 			(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\
 			 ((mode) == BOND_MODE_ROUNDROBIN))
+
+#define BOND_MODE_IS_LB(mode)			\
+		(((mode) == BOND_MODE_TLB) ||	\
+		 ((mode) == BOND_MODE_ALB))
+
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -259,8 +264,7 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
 
 static inline bool bond_is_lb(const struct bonding *bond)
 {
-	return (bond->params.mode == BOND_MODE_TLB ||
-		bond->params.mode == BOND_MODE_ALB);
+	return BOND_MODE_IS_LB(bond->params.mode);
 }
 
 static inline void bond_set_active_slave(struct slave *slave)
@@ -422,6 +426,7 @@ void bond_setup(struct net_device *bond_dev);
 unsigned int bond_get_num_tx_queues(void);
 int bond_netlink_init(void);
 void bond_netlink_fini(void);
+int bond_option_mode_set(struct bonding *bond, int mode);
 
 struct bond_net {
 	struct net *		net;	/* Associated network namespace */
-- 
1.8.3.1

^ permalink raw reply related

* [patch net-next 4/7] bonding: remove bond_ioctl_change_active()
From: Jiri Pirko @ 2013-10-18 15:43 UTC (permalink / raw)
  To: netdev; +Cc: davem, fubar, vfalico, andy, stephen, vyasevic
In-Reply-To: <1382111019-1102-1-git-send-email-jiri@resnulli.us>

no longer needed since bond_option_active_slave_set() can be used
instead.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 drivers/net/bonding/bond_main.c | 59 ++---------------------------------------
 1 file changed, 2 insertions(+), 57 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a113e42..d90734f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1910,61 +1910,6 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,
 	return ret;
 }
 
-/*
- * This function changes the active slave to slave <slave_dev>.
- * It returns -EINVAL in the following cases.
- *  - <slave_dev> is not found in the list.
- *  - There is not active slave now.
- *  - <slave_dev> is already active.
- *  - The link state of <slave_dev> is not BOND_LINK_UP.
- *  - <slave_dev> is not running.
- * In these cases, this function does nothing.
- * In the other cases, current_slave pointer is changed and 0 is returned.
- */
-static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev)
-{
-	struct bonding *bond = netdev_priv(bond_dev);
-	struct slave *old_active = NULL;
-	struct slave *new_active = NULL;
-	int res = 0;
-
-	if (!USES_PRIMARY(bond->params.mode))
-		return -EINVAL;
-
-	/* Verify that bond_dev is indeed the master of slave_dev */
-	if (!(slave_dev->flags & IFF_SLAVE) ||
-	    !netdev_has_upper_dev(slave_dev, bond_dev))
-		return -EINVAL;
-
-	read_lock(&bond->lock);
-
-	old_active = bond->curr_active_slave;
-	new_active = bond_get_slave_by_dev(bond, slave_dev);
-	/*
-	 * Changing to the current active: do nothing; return success.
-	 */
-	if (new_active && new_active == old_active) {
-		read_unlock(&bond->lock);
-		return 0;
-	}
-
-	if (new_active &&
-	    old_active &&
-	    new_active->link == BOND_LINK_UP &&
-	    IS_UP(new_active->dev)) {
-		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
-		bond_change_active_slave(bond, new_active);
-		write_unlock_bh(&bond->curr_slave_lock);
-		unblock_netpoll_tx();
-	} else
-		res = -EINVAL;
-
-	read_unlock(&bond->lock);
-
-	return res;
-}
-
 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
@@ -3257,6 +3202,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 
 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
 {
+	struct bonding *bond = netdev_priv(bond_dev);
 	struct net_device *slave_dev = NULL;
 	struct ifbond k_binfo;
 	struct ifbond __user *u_binfo = NULL;
@@ -3287,7 +3233,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 
 
 		if (mii->reg_num == 1) {
-			struct bonding *bond = netdev_priv(bond_dev);
 			mii->val_out = 0;
 			read_lock(&bond->lock);
 			read_lock(&bond->curr_slave_lock);
@@ -3359,7 +3304,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 			break;
 		case BOND_CHANGE_ACTIVE_OLD:
 		case SIOCBONDCHANGEACTIVE:
-			res = bond_ioctl_change_active(bond_dev, slave_dev);
+			res = bond_option_active_slave_set(bond, slave_dev);
 			break;
 		default:
 			res = -EOPNOTSUPP;
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox