Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 08/13] i40e: Split bytes and packets from Rx/Tx stats
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This makes it so that the Tx and Rx byte and packet counts are
separated from the rest of the statistics.  This allows for better
isolation of these stats when we move them into the 64 bit statistics.

Simplify things by re-ordering how the stats display in ethtool.
Instead of displaying all of the Tx queues as a block, followed by all
the Rx queues, the new order is Tx[0], Rx[0], Tx[1], Rx[1], ..., Tx[n],
Rx[n].  This reduces the loops and cleans up the display for testing
purposes since it is very easy to verify if flow director is doing the
right thing as the Tx and Rx queue pair are shown in pairs.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c |  8 ++++----
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 14 +++++---------
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 12 ++++++++----
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    | 12 ++++++------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h    |  8 +++++---
 5 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 0b01c61..2b6655b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -512,8 +512,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 				 vsi->rx_rings[i].ring_active);
 			dev_info(&pf->pdev->dev,
 				 "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
-				 i, vsi->rx_rings[i].rx_stats.packets,
-				 vsi->rx_rings[i].rx_stats.bytes,
+				 i, vsi->rx_rings[i].stats.packets,
+				 vsi->rx_rings[i].stats.bytes,
 				 vsi->rx_rings[i].rx_stats.non_eop_descs);
 			dev_info(&pf->pdev->dev,
 				 "    rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n",
@@ -556,8 +556,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 				 vsi->tx_rings[i].ring_active);
 			dev_info(&pf->pdev->dev,
 				 "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
-				 i, vsi->tx_rings[i].tx_stats.packets,
-				 vsi->tx_rings[i].tx_stats.bytes,
+				 i, vsi->tx_rings[i].stats.packets,
+				 vsi->tx_rings[i].stats.bytes,
 				 vsi->tx_rings[i].tx_stats.restart_queue);
 			dev_info(&pf->pdev->dev,
 				 "    tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 9a76b8c..8754c6fa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -587,13 +587,11 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
-	for (j = 0; j < vsi->num_queue_pairs; j++) {
-		data[i++] = vsi->tx_rings[j].tx_stats.packets;
-		data[i++] = vsi->tx_rings[j].tx_stats.bytes;
-	}
-	for (j = 0; j < vsi->num_queue_pairs; j++) {
-		data[i++] = vsi->rx_rings[j].rx_stats.packets;
-		data[i++] = vsi->rx_rings[j].rx_stats.bytes;
+	for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) {
+		data[i] = vsi->tx_rings[j].stats.packets;
+		data[i + 1] = vsi->tx_rings[j].stats.bytes;
+		data[i + 2] = vsi->rx_rings[j].stats.packets;
+		data[i + 3] = vsi->rx_rings[j].stats.bytes;
 	}
 	if (vsi == pf->vsi[pf->lan_vsi]) {
 		for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
@@ -641,8 +639,6 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
 			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 657babe..d1b5bae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -376,8 +376,12 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
 	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
 	if (vsi->rx_rings)
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			memset(&vsi->rx_rings[i].stats, 0 ,
+			       sizeof(vsi->rx_rings[i].stats));
 			memset(&vsi->rx_rings[i].rx_stats, 0 ,
 			       sizeof(vsi->rx_rings[i].rx_stats));
+			memset(&vsi->tx_rings[i].stats, 0 ,
+			       sizeof(vsi->tx_rings[i].stats));
 			memset(&vsi->tx_rings[i].tx_stats, 0,
 			       sizeof(vsi->tx_rings[i].tx_stats));
 		}
@@ -708,14 +712,14 @@ void i40e_update_stats(struct i40e_vsi *vsi)
 		struct i40e_ring *p;
 
 		p = &vsi->rx_rings[q];
-		rx_b += p->rx_stats.bytes;
-		rx_p += p->rx_stats.packets;
+		rx_b += p->stats.bytes;
+		rx_p += p->stats.packets;
 		rx_buf += p->rx_stats.alloc_rx_buff_failed;
 		rx_page += p->rx_stats.alloc_rx_page_failed;
 
 		p = &vsi->tx_rings[q];
-		tx_b += p->tx_stats.bytes;
-		tx_p += p->tx_stats.packets;
+		tx_b += p->stats.bytes;
+		tx_p += p->stats.packets;
 		tx_restart += p->tx_stats.restart_queue;
 		tx_busy += p->tx_stats.tx_busy;
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ad2818f..3e73bc0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -305,14 +305,14 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
 	 * run the check_tx_hang logic with a transmit completion
 	 * pending but without time to complete it yet.
 	 */
-	if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) &&
+	if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
 	    tx_pending) {
 		/* make sure it is true for two checks in a row */
 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
 				       &tx_ring->state);
 	} else {
 		/* update completed stats and disarm the hang check */
-		tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets;
+		tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
 	}
 
@@ -411,8 +411,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	tx_ring->tx_stats.bytes += total_bytes;
-	tx_ring->tx_stats.packets += total_packets;
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
@@ -1075,8 +1075,8 @@ next_desc:
 	}
 
 	rx_ring->next_to_clean = i;
-	rx_ring->rx_stats.packets += total_rx_packets;
-	rx_ring->rx_stats.bytes += total_rx_bytes;
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e514247..7f3f7e3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -126,17 +126,18 @@ struct i40e_rx_buffer {
 	unsigned int page_offset;
 };
 
-struct i40e_tx_queue_stats {
+struct i40e_queue_stats {
 	u64 packets;
 	u64 bytes;
+};
+
+struct i40e_tx_queue_stats {
 	u64 restart_queue;
 	u64 tx_busy;
 	u64 tx_done_old;
 };
 
 struct i40e_rx_queue_stats {
-	u64 packets;
-	u64 bytes;
 	u64 non_eop_descs;
 	u64 alloc_rx_page_failed;
 	u64 alloc_rx_buff_failed;
@@ -215,6 +216,7 @@ struct i40e_ring {
 	bool ring_active;		/* is ring online or not */
 
 	/* stats structs */
+	struct i40e_queue_stats	stats;
 	union {
 		struct i40e_tx_queue_stats tx_stats;
 		struct i40e_rx_queue_stats rx_stats;
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 09/13] i40e: Move q_vectors from pointer to array to array of pointers
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Allocate the q_vectors individually. The advantage to this is that it
allows for easier freeing and allocation.  In addition it makes it so
that we could do node specific allocations at some point in the future.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h         |   5 +-
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c |  11 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |   4 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 152 +++++++++++++++++--------
 4 files changed, 112 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index b5252eb..789304e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -366,7 +366,7 @@ struct i40e_vsi {
 	u8  dtype;
 
 	/* List of q_vectors allocated to this VSI */
-	struct i40e_q_vector *q_vectors;
+	struct i40e_q_vector **q_vectors;
 	int num_q_vectors;
 	int base_vector;
 
@@ -422,8 +422,9 @@ struct i40e_q_vector {
 
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 
-	char name[IFNAMSIZ + 9];
 	cpumask_t affinity_mask;
+	struct rcu_head rcu;	/* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
 } ____cacheline_internodealigned_in_smp;
 
 /* lan device */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 2b6655b..44e3fa4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -586,15 +586,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 	dev_info(&pf->pdev->dev,
 		 "    max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n",
 		 vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype);
-	if (vsi->q_vectors) {
-		for (i = 0; i < vsi->num_q_vectors; i++) {
-			dev_info(&pf->pdev->dev,
-				 "    q_vectors[%i]: base index = %ld\n",
-				 i, ((long int)*vsi->q_vectors[i].rx.ring-
-					(long int)*vsi->q_vectors[0].rx.ring)/
-					sizeof(struct i40e_ring));
-		}
-	}
 	dev_info(&pf->pdev->dev,
 		 "    num_q_vectors = %i, base_vector = %i\n",
 		 vsi->num_q_vectors, vsi->base_vector);
@@ -1995,7 +1986,7 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp,
 			goto netdev_ops_write_done;
 		}
 		for (i = 0; i < vsi->num_q_vectors; i++)
-			napi_schedule(&vsi->q_vectors[i].napi);
+			napi_schedule(&vsi->q_vectors[i]->napi);
 		dev_info(&pf->pdev->dev, "napi called\n");
 	} else {
 		dev_info(&pf->pdev->dev, "unknown command '%s'\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 8754c6fa..bf607da 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -906,8 +906,8 @@ static int i40e_set_coalesce(struct net_device *netdev,
 	}
 
 	vector = vsi->base_vector;
-	q_vector = vsi->q_vectors;
-	for (i = 0; i < vsi->num_q_vectors; i++, vector++, q_vector++) {
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		q_vector = vsi->q_vectors[i];
 		q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
 		wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr);
 		q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d1b5bae..a090815 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2358,8 +2358,8 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 	 */
 	qp = vsi->base_queue;
 	vector = vsi->base_vector;
-	q_vector = vsi->q_vectors;
-	for (i = 0; i < vsi->num_q_vectors; i++, q_vector++, vector++) {
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		q_vector = vsi->q_vectors[i];
 		q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
 		q_vector->rx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
@@ -2439,7 +2439,7 @@ static void i40e_enable_misc_int_causes(struct i40e_hw *hw)
  **/
 static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 {
-	struct i40e_q_vector *q_vector = vsi->q_vectors;
+	struct i40e_q_vector *q_vector = vsi->q_vectors[0];
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	u32 val;
@@ -2558,7 +2558,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 	int vector, err;
 
 	for (vector = 0; vector < q_vectors; vector++) {
-		struct i40e_q_vector *q_vector = &(vsi->q_vectors[vector]);
+		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
 
 		if (q_vector->tx.ring[0] && q_vector->rx.ring[0]) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
@@ -2709,7 +2709,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 		i40e_flush(hw);
 
 		if (!test_bit(__I40E_DOWN, &pf->state))
-			napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0].napi);
+			napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0]->napi);
 	}
 
 	if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
@@ -2785,7 +2785,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
  **/
 static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx)
 {
-	struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]);
+	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
 	struct i40e_ring *rx_ring = &(vsi->rx_rings[r_idx]);
 
 	rx_ring->q_vector = q_vector;
@@ -2803,7 +2803,7 @@ static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx)
  **/
 static void map_vector_to_txq(struct i40e_vsi *vsi, int v_idx, int t_idx)
 {
-	struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]);
+	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
 	struct i40e_ring *tx_ring = &(vsi->tx_rings[t_idx]);
 
 	tx_ring->q_vector = q_vector;
@@ -2891,7 +2891,7 @@ static void i40e_netpoll(struct net_device *netdev)
 	pf->flags |= I40E_FLAG_IN_NETPOLL;
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
 		for (i = 0; i < vsi->num_q_vectors; i++)
-			i40e_msix_clean_rings(0, &vsi->q_vectors[i]);
+			i40e_msix_clean_rings(0, vsi->q_vectors[i]);
 	} else {
 		i40e_intr(pf->pdev->irq, netdev);
 	}
@@ -3077,14 +3077,14 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 			u16 vector = i + base;
 
 			/* free only the irqs that were actually requested */
-			if (vsi->q_vectors[i].num_ringpairs == 0)
+			if (vsi->q_vectors[i]->num_ringpairs == 0)
 				continue;
 
 			/* clear the affinity_mask in the IRQ descriptor */
 			irq_set_affinity_hint(pf->msix_entries[vector].vector,
 					      NULL);
 			free_irq(pf->msix_entries[vector].vector,
-				 &vsi->q_vectors[i]);
+				 vsi->q_vectors[i]);
 
 			/* Tear down the interrupt queue link list
 			 *
@@ -3168,6 +3168,38 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_free_q_vector - Free memory allocated for specific interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
+{
+	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
+	int r_idx;
+
+	if (!q_vector)
+		return;
+
+	/* disassociate q_vector from rings */
+	for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++)
+		q_vector->tx.ring[r_idx]->q_vector = NULL;
+	for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++)
+		q_vector->rx.ring[r_idx]->q_vector = NULL;
+
+	/* only VSI w/ an associated netdev is set up w/ NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	vsi->q_vectors[v_idx] = NULL;
+
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
  * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors
  * @vsi: the VSI being un-configured
  *
@@ -3178,24 +3210,8 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
 {
 	int v_idx;
 
-	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
-		struct i40e_q_vector *q_vector = &vsi->q_vectors[v_idx];
-		int r_idx;
-
-		if (!q_vector)
-			continue;
-
-		/* disassociate q_vector from rings */
-		for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++)
-			q_vector->tx.ring[r_idx]->q_vector = NULL;
-		for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++)
-			q_vector->rx.ring[r_idx]->q_vector = NULL;
-
-		/* only VSI w/ an associated netdev is set up w/ NAPI */
-		if (vsi->netdev)
-			netif_napi_del(&q_vector->napi);
-	}
-	kfree(vsi->q_vectors);
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
+		i40e_free_q_vector(vsi, v_idx);
 }
 
 /**
@@ -3245,7 +3261,7 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi)
 		return;
 
 	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_enable(&vsi->q_vectors[q_idx].napi);
+		napi_enable(&vsi->q_vectors[q_idx]->napi);
 }
 
 /**
@@ -3260,7 +3276,7 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
 		return;
 
 	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_disable(&vsi->q_vectors[q_idx].napi);
+		napi_disable(&vsi->q_vectors[q_idx]->napi);
 }
 
 /**
@@ -4945,6 +4961,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 {
 	int ret = -ENODEV;
 	struct i40e_vsi *vsi;
+	int sz_vectors;
 	int vsi_idx;
 	int i;
 
@@ -4970,14 +4987,14 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 		vsi_idx = i;             /* Found one! */
 	} else {
 		ret = -ENODEV;
-		goto err_alloc_vsi;  /* out of VSI slots! */
+		goto unlock_pf;  /* out of VSI slots! */
 	}
 	pf->next_vsi = ++i;
 
 	vsi = kzalloc(sizeof(*vsi), GFP_KERNEL);
 	if (!vsi) {
 		ret = -ENOMEM;
-		goto err_alloc_vsi;
+		goto unlock_pf;
 	}
 	vsi->type = type;
 	vsi->back = pf;
@@ -4992,12 +5009,25 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 
 	i40e_set_num_rings_in_vsi(vsi);
 
+	/* allocate memory for q_vector pointers */
+	sz_vectors = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors;
+	vsi->q_vectors = kzalloc(sz_vectors, GFP_KERNEL);
+	if (!vsi->q_vectors) {
+		ret = -ENOMEM;
+		goto err_vectors;
+	}
+
 	/* Setup default MSIX irq handler for VSI */
 	i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
 
 	pf->vsi[vsi_idx] = vsi;
 	ret = vsi_idx;
-err_alloc_vsi:
+	goto unlock_pf;
+
+err_vectors:
+	pf->next_vsi = i - 1;
+	kfree(vsi);
+unlock_pf:
 	mutex_unlock(&pf->switch_mutex);
 	return ret;
 }
@@ -5038,6 +5068,9 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
 	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
 
+	/* free the ring and vector containers */
+	kfree(vsi->q_vectors);
+
 	pf->vsi[vsi->idx] = NULL;
 	if (vsi->idx < pf->next_vsi)
 		pf->next_vsi = vsi->idx;
@@ -5257,6 +5290,35 @@ static int i40e_init_msix(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the vsi struct
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int i40e_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+{
+	struct i40e_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi,
+			       i40e_napi_poll, vsi->work_limit);
+
+	/* tie q_vector and vsi together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
  * i40e_alloc_q_vectors - Allocate memory for interrupt vectors
  * @vsi: the VSI being configured
  *
@@ -5267,6 +5329,7 @@ static int i40e_alloc_q_vectors(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 	int v_idx, num_q_vectors;
+	int err;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -5276,22 +5339,19 @@ static int i40e_alloc_q_vectors(struct i40e_vsi *vsi)
 	else
 		return -EINVAL;
 
-	vsi->q_vectors = kcalloc(num_q_vectors,
-				 sizeof(struct i40e_q_vector),
-				 GFP_KERNEL);
-	if (!vsi->q_vectors)
-		return -ENOMEM;
-
 	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		vsi->q_vectors[v_idx].vsi = vsi;
-		vsi->q_vectors[v_idx].v_idx = v_idx;
-		cpumask_set_cpu(v_idx, &vsi->q_vectors[v_idx].affinity_mask);
-		if (vsi->netdev)
-			netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx].napi,
-				       i40e_napi_poll, vsi->work_limit);
+		err = i40e_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
 	}
 
 	return 0;
+
+err_out:
+	while (v_idx--)
+		i40e_free_q_vector(vsi, v_idx);
+
+	return err;
 }
 
 /**
@@ -5958,7 +6018,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
 	int ret = -ENOENT;
 	struct i40e_pf *pf = vsi->back;
 
-	if (vsi->q_vectors) {
+	if (vsi->q_vectors[0]) {
 		dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
 			 vsi->seid);
 		return -EEXIST;
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 10/13] i40e: Replace ring container array with linked list
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This replaces the ring container array with a linked list.  The idea is
to make the logic much easier to deal with since this will allow us to
call a simple helper function from the q_vectors to go through the
entire list.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 84 ++++++++++++++---------------
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 19 +++----
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |  8 ++-
 3 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a090815..c74ac58 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2516,7 +2516,7 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
 {
 	struct i40e_q_vector *q_vector = data;
 
-	if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0])
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
 		return IRQ_HANDLED;
 
 	napi_schedule(&q_vector->napi);
@@ -2533,7 +2533,7 @@ static irqreturn_t i40e_fdir_clean_rings(int irq, void *data)
 {
 	struct i40e_q_vector *q_vector = data;
 
-	if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0])
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
 		return IRQ_HANDLED;
 
 	pr_info("fdir ring cleaning needed\n");
@@ -2560,14 +2560,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 	for (vector = 0; vector < q_vectors; vector++) {
 		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
 
-		if (q_vector->tx.ring[0] && q_vector->rx.ring[0]) {
+		if (q_vector->tx.ring && q_vector->rx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
 				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
 			tx_int_idx++;
-		} else if (q_vector->rx.ring[0]) {
+		} else if (q_vector->rx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
 				 "%s-%s-%d", basename, "rx", rx_int_idx++);
-		} else if (q_vector->tx.ring[0]) {
+		} else if (q_vector->tx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
 				 "%s-%s-%d", basename, "tx", tx_int_idx++);
 		} else {
@@ -2778,40 +2778,26 @@ static irqreturn_t i40e_intr(int irq, void *data)
 }
 
 /**
- * i40e_map_vector_to_rxq - Assigns the Rx queue to the vector
+ * i40e_map_vector_to_qp - Assigns the queue pair to the vector
  * @vsi: the VSI being configured
  * @v_idx: vector index
- * @r_idx: rx queue index
+ * @qp_idx: queue pair index
  **/
-static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx)
+static void map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
 {
 	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
-	struct i40e_ring *rx_ring = &(vsi->rx_rings[r_idx]);
-
-	rx_ring->q_vector = q_vector;
-	q_vector->rx.ring[q_vector->rx.count] = rx_ring;
-	q_vector->rx.count++;
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->vsi = vsi;
-}
-
-/**
- * i40e_map_vector_to_txq - Assigns the Tx queue to the vector
- * @vsi: the VSI being configured
- * @v_idx: vector index
- * @t_idx: tx queue index
- **/
-static void map_vector_to_txq(struct i40e_vsi *vsi, int v_idx, int t_idx)
-{
-	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
-	struct i40e_ring *tx_ring = &(vsi->tx_rings[t_idx]);
+	struct i40e_ring *tx_ring = &(vsi->tx_rings[qp_idx]);
+	struct i40e_ring *rx_ring = &(vsi->rx_rings[qp_idx]);
 
 	tx_ring->q_vector = q_vector;
-	q_vector->tx.ring[q_vector->tx.count] = tx_ring;
+	tx_ring->next = q_vector->tx.ring;
+	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	q_vector->num_ringpairs++;
-	q_vector->vsi = vsi;
+
+	rx_ring->q_vector = q_vector;
+	rx_ring->next = q_vector->rx.ring;
+	q_vector->rx.ring = rx_ring;
+	q_vector->rx.count++;
 }
 
 /**
@@ -2827,7 +2813,7 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
 {
 	int qp_remaining = vsi->num_queue_pairs;
 	int q_vectors = vsi->num_q_vectors;
-	int qp_per_vector;
+	int num_ringpairs;
 	int v_start = 0;
 	int qp_idx = 0;
 
@@ -2835,11 +2821,21 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
 	 * group them so there are multiple queues per vector.
 	 */
 	for (; v_start < q_vectors && qp_remaining; v_start++) {
-		qp_per_vector = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
-		for (; qp_per_vector;
-		     qp_per_vector--, qp_idx++, qp_remaining--)	{
-			map_vector_to_rxq(vsi, v_start, qp_idx);
-			map_vector_to_txq(vsi, v_start, qp_idx);
+		struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];
+
+		num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
+
+		q_vector->num_ringpairs = num_ringpairs;
+
+		q_vector->rx.count = 0;
+		q_vector->tx.count = 0;
+		q_vector->rx.ring = NULL;
+		q_vector->tx.ring = NULL;
+
+		while (num_ringpairs--) {
+			map_vector_to_qp(vsi, v_start, qp_idx);
+			qp_idx++;
+			qp_remaining--;
 		}
 	}
 }
@@ -3179,16 +3175,17 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
 {
 	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
-	int r_idx;
+	struct i40e_ring *ring;
 
 	if (!q_vector)
 		return;
 
 	/* disassociate q_vector from rings */
-	for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++)
-		q_vector->tx.ring[r_idx]->q_vector = NULL;
-	for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++)
-		q_vector->rx.ring[r_idx]->q_vector = NULL;
+	i40e_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+
+	i40e_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
 
 	/* only VSI w/ an associated netdev is set up w/ NAPI */
 	if (vsi->netdev)
@@ -5312,6 +5309,9 @@ static int i40e_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, vsi->work_limit);
 
+	q_vector->rx.latency_range = I40E_LOW_LATENCY;
+	q_vector->tx.latency_range = I40E_LOW_LATENCY;
+
 	/* tie q_vector and vsi together */
 	vsi->q_vectors[v_idx] = q_vector;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3e73bc0..f153f37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1100,27 +1100,28 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 	struct i40e_q_vector *q_vector =
 			       container_of(napi, struct i40e_q_vector, napi);
 	struct i40e_vsi *vsi = q_vector->vsi;
+	struct i40e_ring *ring;
 	bool clean_complete = true;
 	int budget_per_ring;
-	int i;
 
 	if (test_bit(__I40E_DOWN, &vsi->state)) {
 		napi_complete(napi);
 		return 0;
 	}
 
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	i40e_for_each_ring(ring, q_vector->tx)
+		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+
 	/* We attempt to distribute budget to each Rx queue fairly, but don't
 	 * allow the budget to go below 1 because that would exit polling early.
-	 * Since the actual Tx work is minimal, we can give the Tx a larger
-	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
-	for (i = 0; i < q_vector->num_ringpairs; i++) {
-		clean_complete &= i40e_clean_tx_irq(q_vector->tx.ring[i],
-						    vsi->work_limit);
-		clean_complete &= i40e_clean_rx_irq(q_vector->rx.ring[i],
-						    budget_per_ring);
-	}
+
+	i40e_for_each_ring(ring, q_vector->rx)
+		clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
 
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 7f3f7e3..c2a6746 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -180,6 +180,7 @@ enum i40e_ring_state_t {
 
 /* struct that defines a descriptor ring, associated with a VSI */
 struct i40e_ring {
+	struct i40e_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
 	struct net_device *netdev;	/* netdev ring maps to */
@@ -236,9 +237,8 @@ enum i40e_latency_range {
 };
 
 struct i40e_ring_container {
-#define I40E_MAX_RINGPAIR_PER_VECTOR 8
 	/* array of pointers to rings */
-	struct i40e_ring *ring[I40E_MAX_RINGPAIR_PER_VECTOR];
+	struct i40e_ring *ring;
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
 	u16 count;
@@ -246,6 +246,10 @@ struct i40e_ring_container {
 	u16 itr;
 };
 
+/* iterator for handling rings in ring container */
+#define i40e_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
 void i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
 void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 12/13] i40e: Add support for 64 bit netstats
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change brings support for 64 bit netstats to the driver. Previously
the stats were 64 bit but highly racy due to the fact that 64 bit
transactions are not atomic on 32 bit systems.  This change makes is so
that the 64 bit byte and packet stats are reliable on all architectures.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 27 +++++++--
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 78 ++++++++++++++++++++++----
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    |  4 ++
 drivers/net/ethernet/intel/i40e/i40e_txrx.h    |  1 +
 4 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 50153ea..1b86138 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -579,6 +579,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	char *p;
 	int j;
 	struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
+	unsigned int start;
 
 	i40e_update_stats(vsi);
 
@@ -587,12 +588,30 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
+	rcu_read_lock();
 	for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) {
-		data[i] = vsi->tx_rings[j]->stats.packets;
-		data[i + 1] = vsi->tx_rings[j]->stats.bytes;
-		data[i + 2] = vsi->rx_rings[j]->stats.packets;
-		data[i + 3] = vsi->rx_rings[j]->stats.bytes;
+		struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[j]);
+		struct i40e_ring *rx_ring;
+
+		if (!tx_ring)
+			continue;
+
+		/* process Tx ring statistics */
+		do {
+			start = u64_stats_fetch_begin_bh(&tx_ring->syncp);
+			data[i] = tx_ring->stats.packets;
+			data[i + 1] = tx_ring->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start));
+
+		/* Rx ring is the 2nd half of the queue pair */
+		rx_ring = &tx_ring[1];
+		do {
+			start = u64_stats_fetch_begin_bh(&rx_ring->syncp);
+			data[i + 2] = rx_ring->stats.packets;
+			data[i + 3] = rx_ring->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start));
 	}
+	rcu_read_unlock();
 	if (vsi == pf->vsi[pf->lan_vsi]) {
 		for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
 			p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3cf23f5..24ee5d4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -347,14 +347,53 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
  **/
 static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct(
 					     struct net_device *netdev,
-					     struct rtnl_link_stats64 *storage)
+					     struct rtnl_link_stats64 *stats)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
+	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		struct i40e_ring *tx_ring, *rx_ring;
+		u64 bytes, packets;
+		unsigned int start;
+
+		tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+		if (!tx_ring)
+			continue;
 
-	*storage = *i40e_get_vsi_stats_struct(vsi);
+		do {
+			start = u64_stats_fetch_begin_bh(&tx_ring->syncp);
+			packets = tx_ring->stats.packets;
+			bytes   = tx_ring->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start));
+
+		stats->tx_packets += packets;
+		stats->tx_bytes   += bytes;
+		rx_ring = &tx_ring[1];
+
+		do {
+			start = u64_stats_fetch_begin_bh(&rx_ring->syncp);
+			packets = rx_ring->stats.packets;
+			bytes   = rx_ring->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start));
 
-	return storage;
+		stats->rx_packets += packets;
+		stats->rx_bytes   += bytes;
+	}
+	rcu_read_unlock();
+
+	/* following stats updated by ixgbe_watchdog_task() */
+	stats->multicast	= vsi_stats->multicast;
+	stats->tx_errors	= vsi_stats->tx_errors;
+	stats->tx_dropped	= vsi_stats->tx_dropped;
+	stats->rx_errors	= vsi_stats->rx_errors;
+	stats->rx_crc_errors	= vsi_stats->rx_crc_errors;
+	stats->rx_length_errors	= vsi_stats->rx_length_errors;
+
+	return stats;
 }
 
 /**
@@ -708,21 +747,38 @@ void i40e_update_stats(struct i40e_vsi *vsi)
 	tx_restart = tx_busy = 0;
 	rx_page = 0;
 	rx_buf = 0;
+	rcu_read_lock();
 	for (q = 0; q < vsi->num_queue_pairs; q++) {
 		struct i40e_ring *p;
+		u64 bytes, packets;
+		unsigned int start;
 
-		p = vsi->rx_rings[q];
-		rx_b += p->stats.bytes;
-		rx_p += p->stats.packets;
-		rx_buf += p->rx_stats.alloc_rx_buff_failed;
-		rx_page += p->rx_stats.alloc_rx_page_failed;
+		/* locate Tx ring */
+		p = ACCESS_ONCE(vsi->tx_rings[q]);
 
-		p = vsi->tx_rings[q];
-		tx_b += p->stats.bytes;
-		tx_p += p->stats.packets;
+		do {
+			start = u64_stats_fetch_begin_bh(&p->syncp);
+			packets = p->stats.packets;
+			bytes = p->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+		tx_b += bytes;
+		tx_p += packets;
 		tx_restart += p->tx_stats.restart_queue;
 		tx_busy += p->tx_stats.tx_busy;
+
+		/* Rx queue is part of the same block as Tx queue */
+		p = &p[1];
+		do {
+			start = u64_stats_fetch_begin_bh(&p->syncp);
+			packets = p->stats.packets;
+			bytes = p->stats.bytes;
+		} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+		rx_b += bytes;
+		rx_p += packets;
+		rx_buf += p->rx_stats.alloc_rx_buff_failed;
+		rx_page += p->rx_stats.alloc_rx_page_failed;
 	}
+	rcu_read_unlock();
 	vsi->tx_restart = tx_restart;
 	vsi->tx_busy = tx_busy;
 	vsi->rx_page_failed = rx_page;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9eee551..dc89e72 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -411,8 +411,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
 	tx_ring->stats.bytes += total_bytes;
 	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
@@ -1075,8 +1077,10 @@ next_desc:
 	}
 
 	rx_ring->next_to_clean = i;
+	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.packets += total_rx_packets;
 	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 5db36c3..db55d99 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -218,6 +218,7 @@ struct i40e_ring {
 
 	/* stats structs */
 	struct i40e_queue_stats	stats;
+	struct u64_stats_sync syncp;
 	union {
 		struct i40e_tx_queue_stats tx_stats;
 		struct i40e_rx_queue_stats rx_stats;
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 11/13] i40e: Move rings from pointer to array to array of pointers
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Allocate the queue pairs individually instead of as a group.  This
allows for much easier queue management as it is possible to dynamically
resize the queues without having to free and allocate the entire block.

Ease statistic collection by treating Tx/Rx queue pairs as a single
unit.  Each pair is allocated together and starts with a Tx queue and
ends with an Rx queue.  By ordering them this way it is possible to know
the Rx offset based on a pointer to the Tx queue.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h         |   6 +-
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 195 +++++++++++++------------
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  40 ++---
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 142 +++++++++---------
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    |   4 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h    |   2 +
 6 files changed, 204 insertions(+), 185 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 789304e..c06a76c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -347,9 +347,9 @@ struct i40e_vsi {
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 
-	/* These are arrays of rings, allocated at run-time */
-	struct i40e_ring *rx_rings;
-	struct i40e_ring *tx_rings;
+	/* These are containers of ring pointers, allocated at run-time */
+	struct i40e_ring **rx_rings;
+	struct i40e_ring **tx_rings;
 
 	u16 work_limit;
 	/* high bit set means dynamic, use accessor routines to read/write.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 44e3fa4..19e248f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -258,12 +258,12 @@ static ssize_t i40e_dbg_dump_write(struct file *filp,
 
 			for (i = 0; i < vsi->num_queue_pairs; i++) {
 				len = sizeof(struct i40e_tx_buffer);
-				memcpy(p, vsi->tx_rings[i].tx_bi, len);
+				memcpy(p, vsi->tx_rings[i]->tx_bi, len);
 				p += len;
 			}
 			for (i = 0; i < vsi->num_queue_pairs; i++) {
 				len = sizeof(struct i40e_rx_buffer);
-				memcpy(p, vsi->rx_rings[i].rx_bi, len);
+				memcpy(p, vsi->rx_rings[i]->rx_bi, len);
 				p += len;
 			}
 
@@ -484,99 +484,104 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		 "    tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n",
 		 vsi->tx_restart, vsi->tx_busy,
 		 vsi->rx_buf_failed, vsi->rx_page_failed);
-	if (vsi->rx_rings) {
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: desc = %p\n",
-				 i, vsi->rx_rings[i].desc);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
-				 i, vsi->rx_rings[i].dev,
-				 vsi->rx_rings[i].netdev,
-				 vsi->rx_rings[i].rx_bi);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-				 i, vsi->rx_rings[i].state,
-				 vsi->rx_rings[i].queue_index,
-				 vsi->rx_rings[i].reg_idx);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n",
-				 i, vsi->rx_rings[i].rx_hdr_len,
-				 vsi->rx_rings[i].rx_buf_len,
-				 vsi->rx_rings[i].dtype);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
-				 i, vsi->rx_rings[i].hsplit,
-				 vsi->rx_rings[i].next_to_use,
-				 vsi->rx_rings[i].next_to_clean,
-				 vsi->rx_rings[i].ring_active);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
-				 i, vsi->rx_rings[i].stats.packets,
-				 vsi->rx_rings[i].stats.bytes,
-				 vsi->rx_rings[i].rx_stats.non_eop_descs);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n",
-				 i,
-				 vsi->rx_rings[i].rx_stats.alloc_rx_page_failed,
-				vsi->rx_rings[i].rx_stats.alloc_rx_buff_failed);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
-				 i, vsi->rx_rings[i].size,
-				 (long unsigned int)vsi->rx_rings[i].dma);
-			dev_info(&pf->pdev->dev,
-				 "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
-				 i, vsi->rx_rings[i].vsi,
-				 vsi->rx_rings[i].q_vector);
-		}
+	rcu_read_lock();
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]);
+		if (!rx_ring)
+			continue;
+
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: desc = %p\n",
+			 i, rx_ring->desc);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
+			 i, rx_ring->dev,
+			 rx_ring->netdev,
+			 rx_ring->rx_bi);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
+			 i, rx_ring->state,
+			 rx_ring->queue_index,
+			 rx_ring->reg_idx);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n",
+			 i, rx_ring->rx_hdr_len,
+			 rx_ring->rx_buf_len,
+			 rx_ring->dtype);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+			 i, rx_ring->hsplit,
+			 rx_ring->next_to_use,
+			 rx_ring->next_to_clean,
+			 rx_ring->ring_active);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
+			 i, rx_ring->stats.packets,
+			 rx_ring->stats.bytes,
+			 rx_ring->rx_stats.non_eop_descs);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n",
+			 i,
+			 rx_ring->rx_stats.alloc_rx_page_failed,
+			rx_ring->rx_stats.alloc_rx_buff_failed);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
+			 i, rx_ring->size,
+			 (long unsigned int)rx_ring->dma);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
+			 i, rx_ring->vsi,
+			 rx_ring->q_vector);
 	}
-	if (vsi->tx_rings) {
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: desc = %p\n",
-				 i, vsi->tx_rings[i].desc);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
-				 i, vsi->tx_rings[i].dev,
-				 vsi->tx_rings[i].netdev,
-				 vsi->tx_rings[i].tx_bi);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-				 i, vsi->tx_rings[i].state,
-				 vsi->tx_rings[i].queue_index,
-				 vsi->tx_rings[i].reg_idx);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: dtype = %d\n",
-				 i, vsi->tx_rings[i].dtype);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
-				 i, vsi->tx_rings[i].hsplit,
-				 vsi->tx_rings[i].next_to_use,
-				 vsi->tx_rings[i].next_to_clean,
-				 vsi->tx_rings[i].ring_active);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
-				 i, vsi->tx_rings[i].stats.packets,
-				 vsi->tx_rings[i].stats.bytes,
-				 vsi->tx_rings[i].tx_stats.restart_queue);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
-				 i,
-				 vsi->tx_rings[i].tx_stats.tx_busy,
-				 vsi->tx_rings[i].tx_stats.tx_done_old);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
-				 i, vsi->tx_rings[i].size,
-				 (long unsigned int)vsi->tx_rings[i].dma);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
-				 i, vsi->tx_rings[i].vsi,
-				 vsi->tx_rings[i].q_vector);
-			dev_info(&pf->pdev->dev,
-				 "    tx_rings[%i]: DCB tc = %d\n",
-				 i, vsi->tx_rings[i].dcb_tc);
-		}
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]);
+		if (!tx_ring)
+			continue;
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: desc = %p\n",
+			 i, tx_ring->desc);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
+			 i, tx_ring->dev,
+			 tx_ring->netdev,
+			 tx_ring->tx_bi);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
+			 i, tx_ring->state,
+			 tx_ring->queue_index,
+			 tx_ring->reg_idx);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: dtype = %d\n",
+			 i, tx_ring->dtype);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+			 i, tx_ring->hsplit,
+			 tx_ring->next_to_use,
+			 tx_ring->next_to_clean,
+			 tx_ring->ring_active);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
+			 i, tx_ring->stats.packets,
+			 tx_ring->stats.bytes,
+			 tx_ring->tx_stats.restart_queue);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
+			 i,
+			 tx_ring->tx_stats.tx_busy,
+			 tx_ring->tx_stats.tx_done_old);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
+			 i, tx_ring->size,
+			 (long unsigned int)tx_ring->dma);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
+			 i, tx_ring->vsi,
+			 tx_ring->q_vector);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: DCB tc = %d\n",
+			 i, tx_ring->dcb_tc);
 	}
+	rcu_read_unlock();
 	dev_info(&pf->pdev->dev,
 		 "    work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n",
 		 vsi->work_limit, vsi->rx_itr_setting,
@@ -782,9 +787,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
 		return;
 	}
 	if (is_rx_ring)
-		ring = vsi->rx_rings[ring_id];
+		ring = *vsi->rx_rings[ring_id];
 	else
-		ring = vsi->tx_rings[ring_id];
+		ring = *vsi->tx_rings[ring_id];
 	if (cnt == 2) {
 		dev_info(&pf->pdev->dev, "vsi = %02i %s ring = %02i\n",
 			 vsi_seid, is_rx_ring ? "rx" : "tx", ring_id);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index bf607da..50153ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -399,8 +399,8 @@ static void i40e_get_ringparam(struct net_device *netdev,
 	ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
 	ring->rx_mini_max_pending = 0;
 	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = vsi->rx_rings[0].count;
-	ring->tx_pending = vsi->tx_rings[0].count;
+	ring->rx_pending = vsi->rx_rings[0]->count;
+	ring->tx_pending = vsi->tx_rings[0]->count;
 	ring->rx_mini_pending = 0;
 	ring->rx_jumbo_pending = 0;
 }
@@ -429,8 +429,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	new_rx_count = ALIGN(new_rx_count, I40E_REQ_DESCRIPTOR_MULTIPLE);
 
 	/* if nothing to do return success */
-	if ((new_tx_count == vsi->tx_rings[0].count) &&
-	    (new_rx_count == vsi->rx_rings[0].count))
+	if ((new_tx_count == vsi->tx_rings[0]->count) &&
+	    (new_rx_count == vsi->rx_rings[0]->count))
 		return 0;
 
 	while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state))
@@ -439,8 +439,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	if (!netif_running(vsi->netdev)) {
 		/* simple case - set for the next time the netdev is started */
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			vsi->tx_rings[i].count = new_tx_count;
-			vsi->rx_rings[i].count = new_rx_count;
+			vsi->tx_rings[i]->count = new_tx_count;
+			vsi->rx_rings[i]->count = new_rx_count;
 		}
 		goto done;
 	}
@@ -451,10 +451,10 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	 */
 
 	/* alloc updated Tx resources */
-	if (new_tx_count != vsi->tx_rings[0].count) {
+	if (new_tx_count != vsi->tx_rings[0]->count) {
 		netdev_info(netdev,
 			    "Changing Tx descriptor count from %d to %d.\n",
-			    vsi->tx_rings[0].count, new_tx_count);
+			    vsi->tx_rings[0]->count, new_tx_count);
 		tx_rings = kcalloc(vsi->alloc_queue_pairs,
 				   sizeof(struct i40e_ring), GFP_KERNEL);
 		if (!tx_rings) {
@@ -464,7 +464,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			/* clone ring and setup updated count */
-			tx_rings[i] = vsi->tx_rings[i];
+			tx_rings[i] = *vsi->tx_rings[i];
 			tx_rings[i].count = new_tx_count;
 			err = i40e_setup_tx_descriptors(&tx_rings[i]);
 			if (err) {
@@ -481,10 +481,10 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	}
 
 	/* alloc updated Rx resources */
-	if (new_rx_count != vsi->rx_rings[0].count) {
+	if (new_rx_count != vsi->rx_rings[0]->count) {
 		netdev_info(netdev,
 			    "Changing Rx descriptor count from %d to %d\n",
-			    vsi->rx_rings[0].count, new_rx_count);
+			    vsi->rx_rings[0]->count, new_rx_count);
 		rx_rings = kcalloc(vsi->alloc_queue_pairs,
 				   sizeof(struct i40e_ring), GFP_KERNEL);
 		if (!rx_rings) {
@@ -494,7 +494,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			/* clone ring and setup updated count */
-			rx_rings[i] = vsi->rx_rings[i];
+			rx_rings[i] = *vsi->rx_rings[i];
 			rx_rings[i].count = new_rx_count;
 			err = i40e_setup_rx_descriptors(&rx_rings[i]);
 			if (err) {
@@ -517,8 +517,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 
 	if (tx_rings) {
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			i40e_free_tx_resources(&vsi->tx_rings[i]);
-			vsi->tx_rings[i] = tx_rings[i];
+			i40e_free_tx_resources(vsi->tx_rings[i]);
+			*vsi->tx_rings[i] = tx_rings[i];
 		}
 		kfree(tx_rings);
 		tx_rings = NULL;
@@ -526,8 +526,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 
 	if (rx_rings) {
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			i40e_free_rx_resources(&vsi->rx_rings[i]);
-			vsi->rx_rings[i] = rx_rings[i];
+			i40e_free_rx_resources(vsi->rx_rings[i]);
+			*vsi->rx_rings[i] = rx_rings[i];
 		}
 		kfree(rx_rings);
 		rx_rings = NULL;
@@ -588,10 +588,10 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) {
-		data[i] = vsi->tx_rings[j].stats.packets;
-		data[i + 1] = vsi->tx_rings[j].stats.bytes;
-		data[i + 2] = vsi->rx_rings[j].stats.packets;
-		data[i + 3] = vsi->rx_rings[j].stats.bytes;
+		data[i] = vsi->tx_rings[j]->stats.packets;
+		data[i + 1] = vsi->tx_rings[j]->stats.bytes;
+		data[i + 2] = vsi->rx_rings[j]->stats.packets;
+		data[i + 3] = vsi->rx_rings[j]->stats.bytes;
 	}
 	if (vsi == pf->vsi[pf->lan_vsi]) {
 		for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c74ac58..3cf23f5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -376,14 +376,14 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
 	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
 	if (vsi->rx_rings)
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			memset(&vsi->rx_rings[i].stats, 0 ,
-			       sizeof(vsi->rx_rings[i].stats));
-			memset(&vsi->rx_rings[i].rx_stats, 0 ,
-			       sizeof(vsi->rx_rings[i].rx_stats));
-			memset(&vsi->tx_rings[i].stats, 0 ,
-			       sizeof(vsi->tx_rings[i].stats));
-			memset(&vsi->tx_rings[i].tx_stats, 0,
-			       sizeof(vsi->tx_rings[i].tx_stats));
+			memset(&vsi->rx_rings[i]->stats, 0 ,
+			       sizeof(vsi->rx_rings[i]->stats));
+			memset(&vsi->rx_rings[i]->rx_stats, 0 ,
+			       sizeof(vsi->rx_rings[i]->rx_stats));
+			memset(&vsi->tx_rings[i]->stats, 0 ,
+			       sizeof(vsi->tx_rings[i]->stats));
+			memset(&vsi->tx_rings[i]->tx_stats, 0,
+			       sizeof(vsi->tx_rings[i]->tx_stats));
 		}
 	vsi->stat_offsets_loaded = false;
 }
@@ -602,7 +602,7 @@ static void i40e_update_link_xoff_rx(struct i40e_pf *pf)
 			continue;
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			struct i40e_ring *ring = &vsi->tx_rings[i];
+			struct i40e_ring *ring = vsi->tx_rings[i];
 			clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
 		}
 	}
@@ -656,7 +656,7 @@ static void i40e_update_prio_xoff_rx(struct i40e_pf *pf)
 			continue;
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			struct i40e_ring *ring = &vsi->tx_rings[i];
+			struct i40e_ring *ring = vsi->tx_rings[i];
 
 			tc = ring->dcb_tc;
 			if (xoff[tc])
@@ -711,13 +711,13 @@ void i40e_update_stats(struct i40e_vsi *vsi)
 	for (q = 0; q < vsi->num_queue_pairs; q++) {
 		struct i40e_ring *p;
 
-		p = &vsi->rx_rings[q];
+		p = vsi->rx_rings[q];
 		rx_b += p->stats.bytes;
 		rx_p += p->stats.packets;
 		rx_buf += p->rx_stats.alloc_rx_buff_failed;
 		rx_page += p->rx_stats.alloc_rx_page_failed;
 
-		p = &vsi->tx_rings[q];
+		p = vsi->tx_rings[q];
 		tx_b += p->stats.bytes;
 		tx_p += p->stats.packets;
 		tx_restart += p->tx_stats.restart_queue;
@@ -1992,7 +1992,7 @@ static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
 	int i, err = 0;
 
 	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
-		err = i40e_setup_tx_descriptors(&vsi->tx_rings[i]);
+		err = i40e_setup_tx_descriptors(vsi->tx_rings[i]);
 
 	return err;
 }
@@ -2008,8 +2008,8 @@ static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
 	int i;
 
 	for (i = 0; i < vsi->num_queue_pairs; i++)
-		if (vsi->tx_rings[i].desc)
-			i40e_free_tx_resources(&vsi->tx_rings[i]);
+		if (vsi->tx_rings[i]->desc)
+			i40e_free_tx_resources(vsi->tx_rings[i]);
 }
 
 /**
@@ -2027,7 +2027,7 @@ static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
 	int i, err = 0;
 
 	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
-		err = i40e_setup_rx_descriptors(&vsi->rx_rings[i]);
+		err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
 	return err;
 }
 
@@ -2042,8 +2042,8 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
 	int i;
 
 	for (i = 0; i < vsi->num_queue_pairs; i++)
-		if (vsi->rx_rings[i].desc)
-			i40e_free_rx_resources(&vsi->rx_rings[i]);
+		if (vsi->rx_rings[i]->desc)
+			i40e_free_rx_resources(vsi->rx_rings[i]);
 }
 
 /**
@@ -2227,8 +2227,8 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
 	int err = 0;
 	u16 i;
 
-	for (i = 0; (i < vsi->num_queue_pairs) && (!err); i++)
-		err = i40e_configure_tx_ring(&vsi->tx_rings[i]);
+	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
+		err = i40e_configure_tx_ring(vsi->tx_rings[i]);
 
 	return err;
 }
@@ -2278,7 +2278,7 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
 
 	/* set up individual rings */
 	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
-		err = i40e_configure_rx_ring(&vsi->rx_rings[i]);
+		err = i40e_configure_rx_ring(vsi->rx_rings[i]);
 
 	return err;
 }
@@ -2302,8 +2302,8 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
 		qoffset = vsi->tc_config.tc_info[n].qoffset;
 		qcount = vsi->tc_config.tc_info[n].qcount;
 		for (i = qoffset; i < (qoffset + qcount); i++) {
-			struct i40e_ring *rx_ring = &vsi->rx_rings[i];
-			struct i40e_ring *tx_ring = &vsi->tx_rings[i];
+			struct i40e_ring *rx_ring = vsi->rx_rings[i];
+			struct i40e_ring *tx_ring = vsi->tx_rings[i];
 			rx_ring->dcb_tc = n;
 			tx_ring->dcb_tc = n;
 		}
@@ -2615,8 +2615,8 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
 	int i;
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
-		wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i].reg_idx), 0);
-		wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i].reg_idx), 0);
+		wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), 0);
+		wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), 0);
 	}
 
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
@@ -2786,8 +2786,8 @@ static irqreturn_t i40e_intr(int irq, void *data)
 static void map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
 {
 	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
-	struct i40e_ring *tx_ring = &(vsi->tx_rings[qp_idx]);
-	struct i40e_ring *rx_ring = &(vsi->rx_rings[qp_idx]);
+	struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx];
+	struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx];
 
 	tx_ring->q_vector = q_vector;
 	tx_ring->next = q_vector->tx.ring;
@@ -3792,8 +3792,8 @@ void i40e_down(struct i40e_vsi *vsi)
 	i40e_napi_disable_all(vsi);
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
-		i40e_clean_tx_ring(&vsi->tx_rings[i]);
-		i40e_clean_rx_ring(&vsi->rx_rings[i]);
+		i40e_clean_tx_ring(vsi->tx_rings[i]);
+		i40e_clean_rx_ring(vsi->rx_rings[i]);
 	}
 }
 
@@ -4220,9 +4220,9 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf)
 			continue;
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			set_check_for_tx_hang(&vsi->tx_rings[i]);
+			set_check_for_tx_hang(vsi->tx_rings[i]);
 			if (test_bit(__I40E_HANG_CHECK_ARMED,
-				     &vsi->tx_rings[i].state))
+				     &vsi->tx_rings[i]->state))
 				armed++;
 		}
 
@@ -4959,6 +4959,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	int ret = -ENODEV;
 	struct i40e_vsi *vsi;
 	int sz_vectors;
+	int sz_rings;
 	int vsi_idx;
 	int i;
 
@@ -5004,7 +5005,18 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
 	INIT_LIST_HEAD(&vsi->mac_filter_list);
 
-	i40e_set_num_rings_in_vsi(vsi);
+	ret = i40e_set_num_rings_in_vsi(vsi);
+	if (ret)
+		goto err_rings;
+
+	/* allocate memory for ring pointers */
+	sz_rings = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs * 2;
+	vsi->tx_rings = kzalloc(sz_rings, GFP_KERNEL);
+	if (!vsi->tx_rings) {
+		ret = -ENOMEM;
+		goto err_rings;
+	}
+	vsi->rx_rings = &vsi->tx_rings[vsi->alloc_queue_pairs];
 
 	/* allocate memory for q_vector pointers */
 	sz_vectors = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors;
@@ -5022,6 +5034,8 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	goto unlock_pf;
 
 err_vectors:
+ 	kfree(vsi->tx_rings);
+err_rings:
 	pf->next_vsi = i - 1;
 	kfree(vsi);
 unlock_pf:
@@ -5067,6 +5081,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 
 	/* free the ring and vector containers */
 	kfree(vsi->q_vectors);
+	kfree(vsi->tx_rings);
 
 	pf->vsi[vsi->idx] = NULL;
 	if (vsi->idx < pf->next_vsi)
@@ -5081,34 +5096,39 @@ free_vsi:
 }
 
 /**
+ * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
+ * @vsi: the VSI being cleaned
+ **/
+static s32 i40e_vsi_clear_rings(struct i40e_vsi *vsi)
+{
+	int i;
+
+	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
+		kfree_rcu(vsi->tx_rings[i], rcu);
+		vsi->tx_rings[i] = NULL;
+		vsi->rx_rings[i] = NULL;
+	}
+
+	return 0;
+}
+
+/**
  * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI
  * @vsi: the VSI being configured
  **/
 static int i40e_alloc_rings(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	int ret = 0;
 	int i;
 
-	vsi->rx_rings = kcalloc(vsi->alloc_queue_pairs,
-				sizeof(struct i40e_ring), GFP_KERNEL);
-	if (!vsi->rx_rings) {
-		ret = -ENOMEM;
-		goto err_alloc_rings;
-	}
-
-	vsi->tx_rings = kcalloc(vsi->alloc_queue_pairs,
-				sizeof(struct i40e_ring), GFP_KERNEL);
-	if (!vsi->tx_rings) {
-		ret = -ENOMEM;
-		kfree(vsi->rx_rings);
-		goto err_alloc_rings;
-	}
-
 	/* Set basic values in the rings to be used later during open() */
 	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
-		struct i40e_ring *rx_ring = &vsi->rx_rings[i];
-		struct i40e_ring *tx_ring = &vsi->tx_rings[i];
+		struct i40e_ring *tx_ring;
+		struct i40e_ring *rx_ring;
+
+		tx_ring = kzalloc(sizeof(struct i40e_ring) * 2, GFP_KERNEL);
+		if (!tx_ring)
+			goto err_out;
 
 		tx_ring->queue_index = i;
 		tx_ring->reg_idx = vsi->base_queue + i;
@@ -5119,7 +5139,9 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		tx_ring->count = vsi->num_desc;
 		tx_ring->size = 0;
 		tx_ring->dcb_tc = 0;
+		vsi->tx_rings[i] = tx_ring;
 
+		rx_ring = &tx_ring[1];
 		rx_ring->queue_index = i;
 		rx_ring->reg_idx = vsi->base_queue + i;
 		rx_ring->ring_active = false;
@@ -5133,24 +5155,14 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 			set_ring_16byte_desc_enabled(rx_ring);
 		else
 			clear_ring_16byte_desc_enabled(rx_ring);
-	}
-
-err_alloc_rings:
-	return ret;
-}
-
-/**
- * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
- * @vsi: the VSI being cleaned
- **/
-static int i40e_vsi_clear_rings(struct i40e_vsi *vsi)
-{
-	if (vsi) {
-		kfree(vsi->rx_rings);
-		kfree(vsi->tx_rings);
+		vsi->rx_rings[i] = rx_ring;
 	}
 
 	return 0;
+
+err_out:
+	i40e_vsi_clear_rings(vsi);
+	return -ENOMEM;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f153f37..9eee551 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -64,7 +64,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	if (!vsi)
 		return -ENOENT;
 
-	tx_ring = &vsi->tx_rings[0];
+	tx_ring = vsi->tx_rings[0];
 	dev = tx_ring->dev;
 
 	dma = dma_map_single(dev, fdir_data->raw_packet,
@@ -1823,7 +1823,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_ring *tx_ring = &vsi->tx_rings[skb->queue_mapping];
+	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
 
 	/* hardware can't handle really short frames, hardware padding works
 	 * beyond this point
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index c2a6746..5db36c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -228,6 +228,8 @@ struct i40e_ring {
 
 	struct i40e_vsi *vsi;		/* Backreference to associated VSI */
 	struct i40e_q_vector *q_vector;	/* Backreference to associated vector */
+
+	struct rcu_head rcu;		/* to avoid race on free */
 } ____cacheline_internodealigned_in_smp;
 
 enum i40e_latency_range {
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 13/13] i40e: Bump version
From: Jeff Kirsher @ 2013-10-10  6:41 UTC (permalink / raw)
  To: davem
  Cc: Catherine Sullivan, netdev, gospo, sassmann, Jesse Brandeburg,
	Jeff Kirsher
In-Reply-To: <1381387271-29605-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Catherine Sullivan <catherine.sullivan@intel.com>

Update the version number of the driver.

Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 24ee5d4..fbe7fe2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -36,7 +36,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 9
+#define DRV_VERSION_BUILD 10
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH RFC 0/2] xfrm: Remove ancient sleeping code
From: Fan Du @ 2013-10-10  7:02 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: netdev
In-Reply-To: <20131010063301.GO7660@secunet.com>



On 2013年10月10日 14:33, Steffen Klassert wrote:
> Does anyone still rely on the ancient sleeping when the SA is in
> acquire state? It is disabled by default since more that five years,
> but can cause indefinite task hangs if enabled and the needed state
> does not get resolved.

I saw that "can_sleep" is set true in ip_route_connect which upper layer
protocol relies on it, which ensure not dropping *any* skb.
And acquire timer will make sure the task will not hangs indefinitely.

In xfrm policy queue, XFRM_MAX_QUEUE_LEN is 100, which means 101th skb
will be dropped, how about make it configurable? If CAN_SLEEP flags is
removed, user could adjust this knob if needed in any circumstance.


> We now queue packets to the policy if the states are not yet resolved
> if we are in a code path that can not sleep. We could do this even in
> the case we can sleep. As a bonus, we can remove the FLOWI_FLAG_CAN_SLEEP
> flag because the only thing this flag does, is to notify xfrm that we are
> in a codepath that can sleep.
>
> The two RFC patches to remove the sleeping code are in reply to this
> mail. I'd add this to the ipsec-next tree if there are no objections.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

-- 
浮沉随浪只记今朝笑

--fan

^ permalink raw reply

* [PATCH net-next] inet: rename ir_loc_port to ir_num
From: Eric Dumazet @ 2013-10-10  7:04 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

In commit 634fb979e8f ("inet: includes a sock_common in request_sock")
I forgot that the two ports in sock_common do not have same byte order :

skc_dport is __be16 (network order), but skc_num is __u16 (host order)

So sparse complains because ir_loc_port (mapped into skc_num) is
considered as __u16 while it should be __be16

Let rename ir_loc_port to ireq->ir_num (analogy with inet->inet_num),
and perform appropriate htons/ntohs conversions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/net/inet_sock.h          |    2 +-
 include/net/tcp.h                |    2 +-
 net/dccp/ipv6.c                  |    4 ++--
 net/dccp/minisocks.c             |    8 ++++----
 net/dccp/output.c                |    2 +-
 net/ipv4/inet_connection_sock.c  |    4 ++--
 net/ipv4/syncookies.c            |    8 ++++----
 net/ipv4/tcp_output.c            |    2 +-
 net/ipv6/inet6_connection_sock.c |    2 +-
 net/ipv6/syncookies.c            |    2 +-
 net/ipv6/tcp_ipv6.c              |    2 +-
 11 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f912044..06da91e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -72,7 +72,7 @@ struct inet_request_sock {
 	struct request_sock	req;
 #define ir_loc_addr		req.__req_common.skc_rcv_saddr
 #define ir_rmt_addr		req.__req_common.skc_daddr
-#define ir_loc_port		req.__req_common.skc_num
+#define ir_num			req.__req_common.skc_num
 #define ir_rmt_port		req.__req_common.skc_dport
 #define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
 #define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 24a0616..1db3a01 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1110,7 +1110,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
 	ireq->ir_rmt_port = tcp_hdr(skb)->source;
-	ireq->ir_loc_port = tcp_hdr(skb)->dest;
+	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 }
 
 void tcp_enter_memory_pressure(struct sock *sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5cc5b24..4ac71ff 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -231,7 +231,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	fl6.flowlabel = 0;
 	fl6.flowi6_oif = ireq->ir_iif;
 	fl6.fl6_dport = ireq->ir_rmt_port;
-	fl6.fl6_sport = ireq->ir_loc_port;
+	fl6.fl6_sport = htons(ireq->ir_num);
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
@@ -509,7 +509,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.fl6_dport = ireq->ir_rmt_port;
-		fl6.fl6_sport = ireq->ir_loc_port;
+		fl6.fl6_sport = htons(ireq->ir_num);
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 66afbce..9e2f78b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -266,10 +266,10 @@ int dccp_reqsk_init(struct request_sock *req,
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
-	inet_rsk(req)->ir_rmt_port	  = dccp_hdr(skb)->dccph_sport;
-	inet_rsk(req)->ir_loc_port	  = dccp_hdr(skb)->dccph_dport;
-	inet_rsk(req)->acked	  = 0;
-	dreq->dreq_timestamp_echo = 0;
+	inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->ir_num	   = ntohs(dccp_hdr(skb)->dccph_dport);
+	inet_rsk(req)->acked	   = 0;
+	dreq->dreq_timestamp_echo  = 0;
 
 	/* inherit feature negotiation options from listening socket */
 	return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 9bf195d..8876078 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -424,7 +424,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_rsk(req)->ir_loc_port;
+	dh->dccph_sport	= htons(inet_rsk(req)->ir_num);
 	dh->dccph_dport	= inet_rsk(req)->ir_rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2ffd931..fc0e649 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -676,8 +676,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
-		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->ir_loc_port);
-		inet_sk(newsk)->inet_sport = inet_rsk(req)->ir_loc_port;
+		inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
+		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 984e21c..3b64c59 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -304,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->ir_loc_port		= th->dest;
-	ireq->ir_rmt_port		= th->source;
-	ireq->ir_loc_addr		= ip_hdr(skb)->daddr;
-	ireq->ir_rmt_addr		= ip_hdr(skb)->saddr;
+	ireq->ir_num		= ntohs(th->dest);
+	ireq->ir_rmt_port	= th->source;
+	ireq->ir_loc_addr	= ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr	= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index faec813..2822ad0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2734,7 +2734,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->syn = 1;
 	th->ack = 1;
 	TCP_ECN_make_synack(req, th);
-	th->source = ireq->ir_loc_port;
+	th->source = htons(ireq->ir_num);
 	th->dest = ireq->ir_rmt_port;
 	/* Setting of flags are superfluous here for callers (and ECE is
 	 * not even correctly set)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 1317c56..77bb8af 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -83,7 +83,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = sk->sk_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
-	fl6->fl6_sport = ireq->ir_loc_port;
+	fl6->fl6_sport = htons(ireq->ir_num);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bc5698f9..d04d3f1 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->mss = mss;
 	ireq->ir_rmt_port = th->source;
-	ireq->ir_loc_port = th->dest;
+	ireq->ir_num = ntohs(th->dest);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	if (ipv6_opt_accepted(sk, skb) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index db234d6..b996ee2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1735,7 +1735,7 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_rsk(req)->ir_loc_port),
+		   inet_rsk(req)->ir_num,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
 		   ntohs(inet_rsk(req)->ir_rmt_port),

^ permalink raw reply related

* Re: BUG in net/l2tp/l2tp_core.c
From: François Cachereul @ 2013-10-10  7:37 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: James Chapman, David S. Miller, netdev
In-Reply-To: <1381316730.4971.16.camel@edumazet-glaptop.roam.corp.google.com>

On 10/09/2013 01:05 PM, Eric Dumazet wrote:
> On Wed, 2013-10-09 at 12:11 +0200, François Cachereul wrote:
>> Hi,
>>
>> I got the following BUG when using l2tp modules with smp kernel.
>> I noticed that l2tp_xmit_skb uses bh_lock_sock/bh_unlok_sock which
>> doesn't seem to be correct because it does a lot of stuff and probably
>> sleep before releasing the lock. I try replacing
>> bh_lock_sock/bh_unlock_sock with lock_sock/release_sock and the BUG
>> doesn't happened anymore. Is it correct ?
>>
>> Regards
>> François
>>
> 
> At first glance, you might read commit
> 6af88da14ee284aaad6e4326da09a89191ab6165
> ("l2tp: Fix locking in l2tp_core.c")
> 
> l2tp_eth_dev_xmit() is called from BH context, so you cannot use
> lock_sock()
> 
> Try the following patch instead :
> 
> diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
> index f0a7ada..ffda81e 100644
> --- a/net/l2tp/l2tp_ppp.c
> +++ b/net/l2tp/l2tp_ppp.c
> @@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
>  		goto error_put_sess_tun;
>  	}
>  
> +	local_bh_disable();
>  	l2tp_xmit_skb(session, skb, session->hdr_len);
> +	local_bh_enable();
>  
>  	sock_put(ps->tunnel_sock);
>  	sock_put(sk);
> @@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
>  	skb->data[0] = ppph[0];
>  	skb->data[1] = ppph[1];
>  
> +	local_bh_disable();
>  	l2tp_xmit_skb(session, skb, session->hdr_len);
> +	local_bh_enable();
>  
>  	sock_put(sk_tun);
>  	sock_put(sk);
> 
> 

That works. Thanks

François

^ permalink raw reply

* [PATCH v3] mac80211: port CCMP to cryptoapi's CCM driver
From: Ard Biesheuvel @ 2013-10-10  7:55 UTC (permalink / raw)
  To: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: patches-QSEj5FYQhm4dnm+yROfE0A, johannes-cdvu00un1VgdHxzADdlk8Q,
	Ard Biesheuvel

Use the generic CCM aead chaining mode driver rather than a local
implementation that sits right on top of the core AES cipher.

This allows the use of accelerated implementations of either
CCM as a whole or the CTR mode which it encapsulates.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---

Changes since v2:
- removed the redundant length setting in vector b_0 (and updated the comment
  accordingly);

Changes since v1:
- use a better way to allocate the variable size aead_request struct on the
  stack;
- pass only a single data pointer argument as we always encrypt/decrypt in
  place;
- add a comment about how vector b_0 is generated.


 net/mac80211/Kconfig   |   1 +
 net/mac80211/aes_ccm.c | 169 ++++++++++++++++---------------------------------
 net/mac80211/aes_ccm.h |  14 ++--
 net/mac80211/key.h     |   2 +-
 net/mac80211/wpa.c     |  44 ++++++-------
 5 files changed, 84 insertions(+), 146 deletions(-)

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 62535fe..dc31ec3 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -4,6 +4,7 @@ config MAC80211
 	select CRYPTO
 	select CRYPTO_ARC4
 	select CRYPTO_AES
+	select CRYPTO_CCM
 	select CRC32
 	select AVERAGE
 	---help---
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index be7614b9..7c7df47 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -2,6 +2,8 @@
  * Copyright 2003-2004, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  *
+ * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -17,134 +19,75 @@
 #include "key.h"
 #include "aes_ccm.h"
 
-static void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *scratch, u8 *a)
+void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic)
 {
-	int i;
-	u8 *b_0, *aad, *b, *s_0;
-
-	b_0 = scratch + 3 * AES_BLOCK_SIZE;
-	aad = scratch + 4 * AES_BLOCK_SIZE;
-	b = scratch;
-	s_0 = scratch + AES_BLOCK_SIZE;
-
-	crypto_cipher_encrypt_one(tfm, b, b_0);
+	struct scatterlist assoc, pt, ct[2];
+	struct {
+		struct aead_request	req;
+		u8			priv[crypto_aead_reqsize(tfm)];
+	} aead_req;
 
-	/* Extra Authenticate-only data (always two AES blocks) */
-	for (i = 0; i < AES_BLOCK_SIZE; i++)
-		aad[i] ^= b[i];
-	crypto_cipher_encrypt_one(tfm, b, aad);
+	memset(&aead_req, 0, sizeof(aead_req));
 
-	aad += AES_BLOCK_SIZE;
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
 
-	for (i = 0; i < AES_BLOCK_SIZE; i++)
-		aad[i] ^= b[i];
-	crypto_cipher_encrypt_one(tfm, a, aad);
+	aead_request_set_tfm(&aead_req.req, tfm);
+	aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
+	aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0);
 
-	/* Mask out bits from auth-only-b_0 */
-	b_0[0] &= 0x07;
-
-	/* S_0 is used to encrypt T (= MIC) */
-	b_0[14] = 0;
-	b_0[15] = 0;
-	crypto_cipher_encrypt_one(tfm, s_0, b_0);
+	crypto_aead_encrypt(&aead_req.req);
 }
 
-
-void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
-			       u8 *data, size_t data_len,
-			       u8 *cdata, u8 *mic)
+int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic)
 {
-	int i, j, last_len, num_blocks;
-	u8 *pos, *cpos, *b, *s_0, *e, *b_0;
-
-	b = scratch;
-	s_0 = scratch + AES_BLOCK_SIZE;
-	e = scratch + 2 * AES_BLOCK_SIZE;
-	b_0 = scratch + 3 * AES_BLOCK_SIZE;
-
-	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_SIZE);
-	last_len = data_len % AES_BLOCK_SIZE;
-	aes_ccm_prepare(tfm, scratch, b);
-
-	/* Process payload blocks */
-	pos = data;
-	cpos = cdata;
-	for (j = 1; j <= num_blocks; j++) {
-		int blen = (j == num_blocks && last_len) ?
-			last_len : AES_BLOCK_SIZE;
-
-		/* Authentication followed by encryption */
-		for (i = 0; i < blen; i++)
-			b[i] ^= pos[i];
-		crypto_cipher_encrypt_one(tfm, b, b);
-
-		b_0[14] = (j >> 8) & 0xff;
-		b_0[15] = j & 0xff;
-		crypto_cipher_encrypt_one(tfm, e, b_0);
-		for (i = 0; i < blen; i++)
-			*cpos++ = *pos++ ^ e[i];
-	}
-
-	for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++)
-		mic[i] = b[i] ^ s_0[i];
+	struct scatterlist assoc, pt, ct[2];
+	struct {
+		struct aead_request	req;
+		u8			priv[crypto_aead_reqsize(tfm)];
+	} aead_req;
+
+	memset(&aead_req, 0, sizeof(aead_req));
+
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
+
+	aead_request_set_tfm(&aead_req.req, tfm);
+	aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
+	aead_request_set_crypt(&aead_req.req, ct, &pt,
+			       data_len + IEEE80211_CCMP_MIC_LEN, b_0);
+
+	return crypto_aead_decrypt(&aead_req.req);
 }
 
-
-int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
-			      u8 *cdata, size_t data_len, u8 *mic, u8 *data)
+struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])
 {
-	int i, j, last_len, num_blocks;
-	u8 *pos, *cpos, *b, *s_0, *a, *b_0;
-
-	b = scratch;
-	s_0 = scratch + AES_BLOCK_SIZE;
-	a = scratch + 2 * AES_BLOCK_SIZE;
-	b_0 = scratch + 3 * AES_BLOCK_SIZE;
-
-	num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_SIZE);
-	last_len = data_len % AES_BLOCK_SIZE;
-	aes_ccm_prepare(tfm, scratch, a);
-
-	/* Process payload blocks */
-	cpos = cdata;
-	pos = data;
-	for (j = 1; j <= num_blocks; j++) {
-		int blen = (j == num_blocks && last_len) ?
-			last_len : AES_BLOCK_SIZE;
-
-		/* Decryption followed by authentication */
-		b_0[14] = (j >> 8) & 0xff;
-		b_0[15] = j & 0xff;
-		crypto_cipher_encrypt_one(tfm, b, b_0);
-		for (i = 0; i < blen; i++) {
-			*pos = *cpos++ ^ b[i];
-			a[i] ^= *pos++;
-		}
-		crypto_cipher_encrypt_one(tfm, a, a);
-	}
-
-	for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) {
-		if ((mic[i] ^ s_0[i]) != a[i])
-			return -1;
-	}
-
-	return 0;
-}
+	struct crypto_aead *tfm;
+	int err;
 
+	tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return tfm;
 
-struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
-{
-	struct crypto_cipher *tfm;
+	err = crypto_aead_setkey(tfm, key, WLAN_KEY_LEN_CCMP);
+	if (!err)
+		err = crypto_aead_setauthsize(tfm, IEEE80211_CCMP_MIC_LEN);
+	if (!err)
+		return tfm;
 
-	tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
-	if (!IS_ERR(tfm))
-		crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP);
-
-	return tfm;
+	crypto_free_aead(tfm);
+	return ERR_PTR(err);
 }
 
-
-void ieee80211_aes_key_free(struct crypto_cipher *tfm)
+void ieee80211_aes_key_free(struct crypto_aead *tfm)
 {
-	crypto_free_cipher(tfm);
+	crypto_free_aead(tfm);
 }
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index 5b7d744..2c7ab19 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -12,13 +12,11 @@
 
 #include <linux/crypto.h>
 
-struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]);
-void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
-			       u8 *data, size_t data_len,
-			       u8 *cdata, u8 *mic);
-int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
-			      u8 *cdata, size_t data_len,
-			      u8 *mic, u8 *data);
-void ieee80211_aes_key_free(struct crypto_cipher *tfm);
+struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]);
+void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic);
+int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_key_free(struct crypto_aead *tfm);
 
 #endif /* AES_CCM_H */
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 036d57e..aaae0ed 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -83,7 +83,7 @@ struct ieee80211_key {
 			 * Management frames.
 			 */
 			u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN];
-			struct crypto_cipher *tfm;
+			struct crypto_aead *tfm;
 			u32 replays; /* dot11RSNAStatsCCMPReplays */
 		} ccmp;
 		struct {
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index c9edfcb..d657282 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -301,22 +301,16 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 }
 
 
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
 				int encrypted)
 {
 	__le16 mask_fc;
 	int a4_included, mgmt;
 	u8 qos_tid;
-	u8 *b_0, *aad;
-	u16 data_len, len_a;
+	u16 len_a;
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
-	memset(scratch, 0, 6 * AES_BLOCK_SIZE);
-
-	b_0 = scratch + 3 * AES_BLOCK_SIZE;
-	aad = scratch + 4 * AES_BLOCK_SIZE;
-
 	/*
 	 * Mask FC: zero subtype b4 b5 b6 (if not mgmt)
 	 * Retry, PwrMgt, MoreData; set Protected
@@ -338,20 +332,21 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
 	else
 		qos_tid = 0;
 
-	data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN;
-	if (encrypted)
-		data_len -= IEEE80211_CCMP_MIC_LEN;
+	/* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
+	 * mode authentication are not allowed to collide, yet both are derived
+	 * from this vector b_0. We only set L := 1 here to indicate that the
+	 * data size can be represented in (L+1) bytes. The CCM layer will take
+	 * care of storing the data length in the top (L+1) bytes and setting
+	 * and clearing the other bits as is required to derive the two IVs.
+	 */
+	b_0[0] = 0x1;
 
-	/* First block, b_0 */
-	b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
 	/* Nonce: Nonce Flags | A2 | PN
 	 * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
 	 */
 	b_0[1] = qos_tid | (mgmt << 4);
 	memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
 	memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
-	/* l(m) */
-	put_unaligned_be16(data_len, &b_0[14]);
 
 	/* AAD (extra authenticate-only data) / masked 802.11 header
 	 * FC | A1 | A2 | A3 | SC | [A4] | [QC] */
@@ -407,7 +402,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	u8 *pos;
 	u8 pn[6];
 	u64 pn64;
-	u8 scratch[6 * AES_BLOCK_SIZE];
+	u8 aad[2 * AES_BLOCK_SIZE];
+	u8 b_0[AES_BLOCK_SIZE];
 
 	if (info->control.hw_key &&
 	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
@@ -460,9 +456,9 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		return 0;
 
 	pos += IEEE80211_CCMP_HDR_LEN;
-	ccmp_special_blocks(skb, pn, scratch, 0);
-	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len,
-				  pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN));
+	ccmp_special_blocks(skb, pn, b_0, aad, 0);
+	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
+				  skb_put(skb, IEEE80211_CCMP_MIC_LEN));
 
 	return 0;
 }
@@ -525,16 +521,16 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	}
 
 	if (!(status->flag & RX_FLAG_DECRYPTED)) {
-		u8 scratch[6 * AES_BLOCK_SIZE];
+		u8 aad[2 * AES_BLOCK_SIZE];
+		u8 b_0[AES_BLOCK_SIZE];
 		/* hardware didn't decrypt/verify MIC */
-		ccmp_special_blocks(skb, pn, scratch, 1);
+		ccmp_special_blocks(skb, pn, b_0, aad, 1);
 
 		if (ieee80211_aes_ccm_decrypt(
-			    key->u.ccmp.tfm, scratch,
+			    key->u.ccmp.tfm, b_0, aad,
 			    skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
 			    data_len,
-			    skb->data + skb->len - IEEE80211_CCMP_MIC_LEN,
-			    skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN))
+			    skb->data + skb->len - IEEE80211_CCMP_MIC_LEN))
 			return RX_DROP_UNUSABLE;
 	}
 
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH 13/16] batman-adv: add build check macros for packet member offset
From: David Laight @ 2013-10-10  8:37 UTC (permalink / raw)
  To: Antonio Quartulli, davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r, Marek Lindner,
	Simon Wunderlich
In-Reply-To: <1381347174-3629-14-git-send-email-antonio-x4xJYDvStAgysxA8WJXlww@public.gmane.org>

> Since we removed the __packed from most of the packets, we should
> make sure that the offset generated by the compiler are correct for
> sent/received data.
...
> +	/* compile time checks for struct member offsets */
> +	BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10);
> +	BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4);
> +	BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4);
> +	BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4);
> +	BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4);
> +	BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4);

It is usually enough to check the size of the structures.
Which is also best done in the .h file so it is validated
in all the compilation environments that might be used.

	David

^ permalink raw reply

* Re: [PATCH 13/16] batman-adv: add build check macros for packet member offset
From: Antonio Quartulli @ 2013-10-10  8:55 UTC (permalink / raw)
  To: The list for a Better Approach To Mobile Ad-hoc Networking
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Marek Lindner,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, Simon Wunderlich
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B7387-CgBM+Bx2aUAnGFn1LkZF6NBPR1lH4CV8@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1735 bytes --]

On Thu, Oct 10, 2013 at 09:37:32AM +0100, David Laight wrote:
> > Since we removed the __packed from most of the packets, we should
> > make sure that the offset generated by the compiler are correct for
> > sent/received data.
> ...
> > +	/* compile time checks for struct member offsets */
> > +	BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10);
> > +	BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4);
> > +	BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4);
> > +	BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4);
> > +	BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4);
> > +	BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4);
> 
> It is usually enough to check the size of the structures.

What if two fields are inverted by mistake in a way that the
size of the struct remains the same? The size check would not complain but the
code would not work anymore.

We use a "generic" struct to access the initial part of any packet.
Therefore these checks are to ensure that the information we are going to access
is really placed at that offset, whatever packet we have.
It was not possible to use a common inner struct and so we relied on this test
to be safe.

> Which is also best done in the .h file so it is validated
> in all the compilation environments that might be used.
> 

This does not really hurt at the moment because we placed them in main.c which
is a file that is always compiled. But thanks for the suggestion: putting them
in the .h file helps to remind the developer to add a new BUILD_ON_BUG when
creating a new packet type.

Thanks a lot.

Regards,

-- 
Antonio Quartulli

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH RFC 0/2] xfrm: Remove ancient sleeping code
From: Steffen Klassert @ 2013-10-10  8:57 UTC (permalink / raw)
  To: Fan Du; +Cc: netdev
In-Reply-To: <525650F6.305@windriver.com>

On Thu, Oct 10, 2013 at 03:02:14PM +0800, Fan Du wrote:
> 
> 
> On 2013年10月10日 14:33, Steffen Klassert wrote:
> >Does anyone still rely on the ancient sleeping when the SA is in
> >acquire state? It is disabled by default since more that five years,
> >but can cause indefinite task hangs if enabled and the needed state
> >does not get resolved.
> 
> I saw that "can_sleep" is set true in ip_route_connect which upper layer
> protocol relies on it, which ensure not dropping *any* skb.

'Any' means one per task in this context. Also, we can't ensure that
this packet reaches it's destination. So where is the difference
between dropping the packet locally or on the network?

> And acquire timer will make sure the task will not hangs indefinitely.
> 

Did you try that? It makes sure that the task wakes up from time to time,
but it goes immediately back to sleep if the needed state is not resolved.
The only terminating contition is when the task gets a signal to exit.

> In xfrm policy queue, XFRM_MAX_QUEUE_LEN is 100, which means 101th skb
> will be dropped, how about make it configurable?

IMO we would have yet another useless knob then. Currently we send all
packets by default to a blackhole as long as the state is not resolved
and most people are fine with it. The queueing is mostly to speed up
tcp handshakes, so 100 packets should be enough. If it really turnes
out that we need more that 100 packets in some cases, we can add a
sysctl then.

^ permalink raw reply

* RE: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
From: Paul Durrant @ 2013-10-10  9:37 UTC (permalink / raw)
  To: Wei Liu
  Cc: Wei Liu, xen-devel@lists.xen.org, netdev@vger.kernel.org,
	David Vrabel, Ian Campbell
In-Reply-To: <20131008161925.GL28411@zion.uk.xensource.com>

> -----Original Message-----
> From: Wei Liu [mailto:wei.liu2@citrix.com]
> Sent: 08 October 2013 17:19
> To: Paul Durrant
> Cc: Wei Liu; xen-devel@lists.xen.org; netdev@vger.kernel.org; David Vrabel;
> Ian Campbell
> Subject: Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6
> checksum offload from guest
> 
> On Tue, Oct 08, 2013 at 02:50:27PM +0100, Paul Durrant wrote:
> [...]
> > > > -#define PKT_PROT_LEN    (ETH_HLEN + \
> > > > -			 VLAN_HLEN + \
> > > > -			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
> > > > -			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
> > > > +#define PKT_PROT_LEN 128
> > > >
> > >
> > > Where does 128 come from?
> > >
> >
> > It's just an arbitrary power of 2 that was chosen because it seems to
> > cover most likely v6 headers and all v4 headers.
> >
> 
> Hmm... How about using the value of MAX_TCP_HEADER? I guess that can
> cover all V4 / V6 headers.
> 
> MAX_TCP_HEADER varies, depending on configuration. To make sure we can
> accommodate all guests packet we might need to use its maximum value
> which can be as big as 128 + 128 + 48.
> 

Because we always double-copy (as the grant copy doesn't copy direct to the linear area) I was concerned about making the pullup too big. I'd rather optimize around a smaller header so how about we stick with 128 but if maybe_pull_tail() finds it needs to pullup then it just pulls up to MAX_TCP_HEADER, so we limit to pullups to maximum of 2?

> > > >  		if (recalculate_partial_csum) {
> > > >  			struct tcphdr *tcph = tcp_hdr(skb);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct tcphdr) +
> > > > +				MAX_TCP_OPTION_SPACE;
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > >
> > > I presume this function is checksum_setup stripped down to handle IPv4
> > > packet. What's the purpose of changing its behaviour? Why the pull_tail
> > > here?
> > >
> >
> > We have to make sure that the TCP header is in the linear area as we
> > are about to write to the checksum field. In practice, the 128 byte
> > pull should guarantee this but in case that is varied later I wanted
> > to make sure things did not start to fail in an add way.
> >
> 
> If you already set the pull size to maximum possible value then this
> will not be necessary anymore, right?
> 
> > > > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> > > &&
> > > > +	       !done) {
> > > > +		/* We only access at most the first 2 bytes of any option
> > > header
> > > > +		 * to determine the next one.
> > > > +		 */
> > > > +		header_size = skb->network_header + off + 2;
> > > > +		maybe_pull_tail(skb, header_size);
> > > > +
> > >
> > > Will this cause performance problem? Is it possible that you pull too
> > > many times?
> > >
> >
> > I guess it means we may get two pulls for the TCP/UDP headers rather
> > than one so could push the pulls into the individual cases if you
> > think it will affect performance that badly.
> 
> Hmm... Not sure I get what you mean here. The main problem I'm seeing is
> that maybe_pull_tail is called in every loop.
> 
> I would like to see as few pulls as possible because __pskb_pull_tail
> can be expensive and only expected to use in "exceptional cases" (quoted
> from the comment above that function).
> 
> Is it possible to pull TCP_MAX_HEADER bytes once to eliminate all other
> pulls in checksum_setup{,_ipv4,_ipv6}?
> 

Note that the function is called *maybe*_pull_tail(). It only pulls if it needs to :-)

  Paul

^ permalink raw reply

* Re: [net-next v4 RFC] ixgbe: Get and display the notifications from changes of the Rx vxlan UDP port
From: Or Gerlitz @ 2013-10-10 10:07 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: David Miller, Joseph Gasparakis, netdev@vger.kernel.org,
	gospo@redhat.com, sassmann, Don Skidmore
In-Reply-To: <1378286019-8719-2-git-send-email-jeffrey.t.kirsher@intel.com>

On Wed, Sep 4, 2013 at 12:13 PM, Jeff Kirsher
<jeffrey.t.kirsher@intel.com> wrote:
> From: Joseph Gasparakis <joseph.gasparakis@intel.com>

> +#ifdef CONFIG_VXLAN_MODULE
> +static void ixgbe_add_vxlan_port(struct net_device *netdev,
> +                                sa_family_t sa_family, __u16 port)
> +{
> +       netdev_info(netdev, ">>>>> Adding VXLAN port %d / protocol IPv%d\n",
> +                            port, sa_family == AF_INET ? 4 : 6);
> +}
> +


any deep reason under which the low level driver needs to define these
ndo's under special config directive?

Or.

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-10 10:17 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: H. Peter Anvin, linux-kernel, Bjorn Helgaas, Ralf Baechle,
	Michael Ellerman, Martin Schwidefsky, Ingo Molnar, Tejun Heo,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver
In-Reply-To: <1381292648.645.259.camel@pasglop>

On Wed, Oct 09, 2013 at 03:24:08PM +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2013-10-08 at 20:55 -0700, H. Peter Anvin wrote:
> > Why not add a minimum number to pci_enable_msix(), i.e.:
> > 
> > pci_enable_msix(pdev, msix_entries, nvec, minvec)
> > 
> > ... which means "nvec" is the number of interrupts *requested*, and
> > "minvec" is the minimum acceptable number (otherwise fail).
> 
> Which is exactly what Ben (the other Ben :-) suggested and that I
> supports...

Ok, this suggestion sounded in one or another form by several people.
What about name it pcim_enable_msix_range() and wrap in couple more
helpers to complete an API:

int pcim_enable_msix_range(pdev, msix_entries, nvec, minvec);
	<0 - error code
	>0 - number of MSIs allocated, where minvec >= result <= nvec

int pcim_enable_msix(pdev, msix_entries, nvec);
	<0 - error code
	>0 - number of MSIs allocated, where 1 >= result <= nvec 

int pcim_enable_msix_exact(pdev, msix_entries, nvec);
	<0 - error code
	>0 - number of MSIs allocated, where result == nvec

The latter's return value seems odd, but I can not help to make
it consistent with the first two.


(Sorry if you see this message twice - my MUA seems struggle with one of CC).

> Cheers,
> Ben.
> 
> 

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* RE: [PATCH next 2/6] be2net: pass if_id for v1 and V2 versions of TX_CREATE cmd
From: Sathya Perla @ 2013-10-10 10:53 UTC (permalink / raw)
  To: netdev@vger.kernel.org
In-Reply-To: <1380623401-15630-3-git-send-email-sathya.perla@emulex.com>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf
> 
> From: Vasundhara Volam <vasundhara.volam@emulex.com>
> 
> It is a required field for all TX_CREATE cmd versions > 0.
> Signed-off-by: Vasundhara Volam <vasundhara.volam@emulex.com>
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>

David, Could you pls queue this patch for the stable tree.

This was committed into the net-next tree:
commit 81b0265531b2ff091fb91c0af9bc9675f84e6f56
be2net: pass if_id for v1 and V2 versions of TX_CREATE cmd
Date:   Tue Oct 1 15:59:57 2013 +0530

This patch actually belongs in the net-tree. As it fixes a driver initialization failure on SH-R adapters, it needs to be
queued for stable tree too.
It was a mistake on my part to submit it to the net-next tree.

thanks,
-Sathya

^ permalink raw reply

* [PATCH net-next v2 1/3] bonding: use RCU protection for 3ad xmit path
From: Ding Tianhong @ 2013-10-10 11:50 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The commit 278b20837511776dc9d5f6ee1c7fabd5479838bb
(bonding: initial RCU conversion) has convert the roundrobin,
active-backup, broadcast and xor xmit path to rcu protection,
the performance will be better for these mode, so this time,
convert xmit path for 3ad mode.

Suggested-by: Nikolay Aleksandrov <nikolay@redhat.com>
Suggested-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Cc: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_3ad.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ea3e64e..187b1b7 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2344,7 +2344,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 	struct slave *slave;
 	struct port *port;
 
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		port = &(SLAVE_AD_INFO(slave).port);
 		if (port->aggregator && port->aggregator->is_active) {
 			aggregator = port->aggregator;
@@ -2369,9 +2369,9 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 {
 	int ret;
 
-	read_lock(&bond->lock);
+	rcu_read_lock();
 	ret = __bond_3ad_get_active_agg_info(bond, ad_info);
-	read_unlock(&bond->lock);
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -2388,7 +2388,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 	int res = 1;
 	int agg_id;
 
-	read_lock(&bond->lock);
 	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
 		pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
 			 dev->name);
@@ -2406,7 +2405,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 	slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
 	first_ok_slave = NULL;
 
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		agg = SLAVE_AD_INFO(slave).port.aggregator;
 		if (!agg || agg->aggregator_identifier != agg_id)
 			continue;
@@ -2436,7 +2435,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 		res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
 
 out:
-	read_unlock(&bond->lock);
 	if (res) {
 		/* no suitable interface, frame not sent */
 		kfree_skb(skb);
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH net-next v2 0/3] bonding: patchset for rcu use in bonding
From: Ding Tianhong @ 2013-10-10 11:50 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

Hi:

The Patch Set convert the xmit of 3ad and alb mode to use rcu lock.
dd rtnl lock and remove read lock for bond sysfs.

v2 because the bond_for_each_slave_rcu without rcu_read_lock() will occurs one warming, so
add new function for alb xmit path to avoid warming.

Ding Tianhong (3):
Wang Yufen (1):
Yang Yingliang (1):
  bonding: use RCU protection for 3ad xmit path
  bonding: use RCU protection for alb xmit path
  bonding: add rtnl lock and remove read lock for bond sysfs

 drivers/net/bonding/bond_3ad.c   | 10 +++----
 drivers/net/bonding/bond_alb.c   | 58 +++++++++++++++++++++++++++++-----------
 drivers/net/bonding/bond_sysfs.c | 30 ++++++++++++---------
 drivers/net/bonding/bonding.h    | 14 ++++++++++
 4 files changed, 78 insertions(+), 34 deletions(-)

-- 
1.8.2.1

^ permalink raw reply

* [PATCH net-next v2 3/3] bonding: add rtnl lock and remove read lock for bond sysfs
From: Ding Tianhong @ 2013-10-10 11:51 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond_for_each_slave() will not be protected by read_lock(),
only protected by rtnl_lock(), so need to replace read_lock()
with rtnl_lock().

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_sysfs.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e06c644..2ba1114 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -179,7 +179,9 @@ static ssize_t bonding_show_slaves(struct device *d,
 	struct slave *slave;
 	int res = 0;
 
-	read_lock(&bond->lock);
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	bond_for_each_slave(bond, slave, iter) {
 		if (res > (PAGE_SIZE - IFNAMSIZ)) {
 			/* not enough space for another interface name */
@@ -190,7 +192,9 @@ static ssize_t bonding_show_slaves(struct device *d,
 		}
 		res += sprintf(buf + res, "%s ", slave->dev->name);
 	}
-	read_unlock(&bond->lock);
+
+	rtnl_unlock();
+
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
 
@@ -628,6 +632,9 @@ static ssize_t bonding_store_arp_targets(struct device *d,
 	unsigned long *targets_rx;
 	int ind, i, j, ret = -EINVAL;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	targets = bond->params.arp_targets;
 	newtarget = in_aton(buf + 1);
 	/* look for adds */
@@ -701,6 +708,7 @@ static ssize_t bonding_store_arp_targets(struct device *d,
 
 	ret = count;
 out:
+	rtnl_unlock();
 	return ret;
 }
 static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
@@ -1469,7 +1477,6 @@ static ssize_t bonding_show_queue_id(struct device *d,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
 			/* not enough space for another interface_name:queue_id pair */
@@ -1481,9 +1488,9 @@ static ssize_t bonding_show_queue_id(struct device *d,
 		res += sprintf(buf + res, "%s:%d ",
 			       slave->dev->name, slave->queue_id);
 	}
-	read_unlock(&bond->lock);
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
+
 	rtnl_unlock();
 
 	return res;
@@ -1532,8 +1539,6 @@ static ssize_t bonding_store_queue_id(struct device *d,
 	if (!sdev)
 		goto err_no_cmd;
 
-	read_lock(&bond->lock);
-
 	/* Search for thes slave and check for duplicate qids */
 	update_slave = NULL;
 	bond_for_each_slave(bond, slave, iter) {
@@ -1544,23 +1549,20 @@ static ssize_t bonding_store_queue_id(struct device *d,
 			 */
 			update_slave = slave;
 		else if (qid && qid == slave->queue_id) {
-			goto err_no_cmd_unlock;
+			goto err_no_cmd;
 		}
 	}
 
 	if (!update_slave)
-		goto err_no_cmd_unlock;
+		goto err_no_cmd;
 
 	/* Actually set the qids for the slave */
 	update_slave->queue_id = qid;
 
-	read_unlock(&bond->lock);
 out:
 	rtnl_unlock();
 	return ret;
 
-err_no_cmd_unlock:
-	read_unlock(&bond->lock);
 err_no_cmd:
 	pr_info("invalid input for queue_id set for %s.\n",
 		bond->dev->name);
@@ -1593,6 +1595,9 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 	struct list_head *iter;
 	struct slave *slave;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	if (sscanf(buf, "%d", &new_value) != 1) {
 		pr_err("%s: no all_slaves_active value specified.\n",
 		       bond->dev->name);
@@ -1612,7 +1617,6 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 		goto out;
 	}
 
-	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		if (!bond_is_active_slave(slave)) {
 			if (new_value)
@@ -1621,8 +1625,8 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 				slave->inactive = 1;
 		}
 	}
-	read_unlock(&bond->lock);
 out:
+	rtnl_unlock();
 	return ret;
 }
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH net-next v2 2/3] bonding: use RCU protection for alb xmit path
From: Ding Tianhong @ 2013-10-10 11:51 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The commit 278b20837511776dc9d5f6ee1c7fabd5479838bb
(bonding: initial RCU conversion) has convert the roundrobin,
active-backup, broadcast and xor xmit path to rcu protection,
the performance will be better for these mode, so this time,
convert xmit path for alb mode.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Cc: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_alb.c | 58 +++++++++++++++++++++++++++++++-----------
 drivers/net/bonding/bonding.h  | 14 ++++++++++
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 576ccea..0287240 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -230,7 +230,7 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
 	max_gap = LLONG_MIN;
 
 	/* Find the slave with the largest gap */
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (SLAVE_IS_OK(slave)) {
 			long long gap = compute_gap(slave);
 
@@ -412,6 +412,39 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
 	return rx_slave;
 }
 
+/* Caller must hold rcu_read_lock() for read */
+static struct slave *__rlb_next_rx_slave(struct bonding *bond)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct slave *before = NULL, *rx_slave = NULL, *slave;
+	struct list_head *iter;
+	bool found = false;
+
+	bond_for_each_slave_rcu(bond, slave, iter) {
+		if (!SLAVE_IS_OK(slave))
+			continue;
+		if (!found) {
+			if (!before || before->speed < slave->speed)
+				before = slave;
+		} else {
+			if (!rx_slave || rx_slave->speed < slave->speed)
+				rx_slave = slave;
+		}
+		if (slave == bond_info->rx_slave)
+			found = true;
+	}
+	/* we didn't find anything after the current or we have something
+	 * better before and up to the current slave
+	 */
+	if (!rx_slave || (before && rx_slave->speed < before->speed))
+		rx_slave = before;
+
+	if (rx_slave)
+		bond_info->rx_slave = rx_slave;
+
+	return rx_slave;
+}
+
 /* teach the switch the mac of a disabled slave
  * on the primary for fault tolerance
  *
@@ -628,12 +661,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct arp_pkt *arp = arp_pkt(skb);
-	struct slave *assigned_slave;
+	struct slave *assigned_slave, *curr_active_slave;
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
 
 	_lock_rx_hashtbl(bond);
 
+	curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
 	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
 	client_info = &(bond_info->rx_hashtbl[hash_index]);
 
@@ -658,14 +693,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 			 * that the new client can be assigned to this entry.
 			 */
 			if (bond->curr_active_slave &&
-			    client_info->slave != bond->curr_active_slave) {
-				client_info->slave = bond->curr_active_slave;
+			    client_info->slave != curr_active_slave) {
+				client_info->slave = curr_active_slave;
 				rlb_update_client(client_info);
 			}
 		}
 	}
 	/* assign a new slave */
-	assigned_slave = rlb_next_rx_slave(bond);
+	assigned_slave = __rlb_next_rx_slave(bond);
 
 	if (assigned_slave) {
 		if (!(client_info->assigned &&
@@ -728,7 +763,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 	/* Don't modify or load balance ARPs that do not originate locally
 	 * (e.g.,arrive via a bridge).
 	 */
-	if (!bond_slave_has_mac(bond, arp->mac_src))
+	if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
 		return NULL;
 
 	if (arp->op_code == htons(ARPOP_REPLY)) {
@@ -1343,11 +1378,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
 
-	/* make sure that the curr_active_slave do not change during tx
-	 */
-	read_lock(&bond->lock);
-	read_lock(&bond->curr_slave_lock);
-
 	switch (ntohs(skb->protocol)) {
 	case ETH_P_IP: {
 		const struct iphdr *iph = ip_hdr(skb);
@@ -1429,12 +1459,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 
 	if (!tx_slave) {
 		/* unbalanced or unassigned, send through primary */
-		tx_slave = bond->curr_active_slave;
+		tx_slave = rcu_dereference(bond->curr_active_slave);
 		bond_info->unbalanced_load += skb->len;
 	}
 
 	if (tx_slave && SLAVE_IS_OK(tx_slave)) {
-		if (tx_slave != bond->curr_active_slave) {
+		if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
 			memcpy(eth_data->h_source,
 			       tx_slave->dev->dev_addr,
 			       ETH_ALEN);
@@ -1449,8 +1479,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 		}
 	}
 
-	read_unlock(&bond->curr_slave_lock);
-	read_unlock(&bond->lock);
 	if (res) {
 		/* no suitable interface, frame not sent */
 		kfree_skb(skb);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 0bd04fb..3c3076e 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -464,6 +464,20 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
 	return NULL;
 }
 
+/* Caller must hold rcu_read_lock() for read */
+static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
+					       const u8 *mac)
+{
+	struct list_head *iter;
+	struct slave *tmp;
+
+	bond_for_each_slave_rcu(bond, tmp, iter)
+		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+			return tmp;
+
+	return NULL;
+}
+
 /* Check if the ip is present in arp ip list, or first free slot if ip == 0
  * Returns -1 if not found, index if found
  */
-- 
1.8.2.1

^ permalink raw reply related

* Re: [PATCH v2] Stmmac: fix a bug when the clk_csr is euqal to 0x0
From: Giuseppe CAVALLARO @ 2013-10-10 12:05 UTC (permalink / raw)
  To: Wan ZongShun; +Cc: David Miller, netdev
In-Reply-To: <CAKT61h8=HGL8TvX2LZamXntR4QhM_s8UVdHRhs--kVT2RQ-wDA@mail.gmail.com>

On 10/10/2013 6:40 AM, Wan ZongShun wrote:
> Hi Giuseppe,
>
> According to your advice, I add this new field comments in stmmac.txt.
> And I only make one patch for fix this issue and field comments.

thanks Wan but pls re-send the patch where the comment only reports
what the patch actually does and the fix you are providing.

Put me on CC and remove "Hi Giuseppe"

many thanks
Peppe

>
> Signed-off-by: Wan Zongshun <mcuos.com@gmail.com>
>
> ---
>   Documentation/networking/stmmac.txt               | 3 +++
>   drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
>   include/linux/stmmac.h                            | 1 +
>   3 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/networking/stmmac.txt
> b/Documentation/networking/stmmac.txt
> index 457b8bb..3ed0ea9 100644
> --- a/Documentation/networking/stmmac.txt
> +++ b/Documentation/networking/stmmac.txt
> @@ -116,6 +116,7 @@ struct plat_stmmacenet_data {
>       struct stmmac_mdio_bus_data *mdio_bus_data;
>       struct stmmac_dma_cfg *dma_cfg;
>       int clk_csr;
> +    unsigned int dynamic_mdc_clk_en;
>       int has_gmac;
>       int enh_desc;
>       int tx_coe;
> @@ -148,6 +149,8 @@ Where:
>          GMAC also enables the 4xPBL by default.
>      o fixed_burst/mixed_burst/burst_len
>    o clk_csr: fixed CSR Clock range selection.
> + o dynamic_mdc_clk_en: If it is set to >=1 MDC clk will be selected
> dynamically,
> +               or else you must set a fixed CSR Clock range to clk_src.
>    o has_gmac: uses the GMAC core.
>    o enh_desc: if sets the MAC will use the enhanced descriptor structure.
>    o tx_coe: core is able to perform the tx csum in HW.
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 8d4ccd3..93fc6bc 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -2741,7 +2741,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct
> device *device,
>        * set the MDC clock dynamically according to the csr actual
>        * clock input.
>        */
> -    if (!priv->plat->clk_csr)
> +    if (!!priv->plat->dynamic_mdc_clk_en)
>           stmmac_clk_csr_set(priv);
>       else
>           priv->clk_csr = priv->plat->clk_csr;
> diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
> index bb5deb0..1b9f0b5 100644
> --- a/include/linux/stmmac.h
> +++ b/include/linux/stmmac.h
> @@ -101,6 +101,7 @@ struct plat_stmmacenet_data {
>       struct stmmac_mdio_bus_data *mdio_bus_data;
>       struct stmmac_dma_cfg *dma_cfg;
>       int clk_csr;
> +    unsigned int dynamic_mdc_clk_en;
>       int has_gmac;
>       int enh_desc;
>       int tx_coe;
>

^ permalink raw reply

* Re: [PATCH] usbnet: smsc95xx: Add device tree input for MAC address
From: Dan Murphy @ 2013-10-10 12:08 UTC (permalink / raw)
  To: Ming Lei
  Cc: Steve Glendinning, Network Development, Linux Kernel Mailing List,
	linux-usb, mugunthanvnm
In-Reply-To: <CACVXFVNAB3VnmdoP_LGJsBjQ17j1aoqKMsBE7LRt65OqROeRUQ@mail.gmail.com>

Ming

On 10/07/2013 06:42 AM, Ming Lei wrote:
> On Mon, Oct 7, 2013 at 1:31 AM, Dan Murphy <dmurphy@ti.com> wrote:
>> On 10/06/2013 10:05 AM, Ming Lei wrote:
>>> On Sat, Oct 5, 2013 at 2:25 AM, Dan Murphy <dmurphy@ti.com> wrote:
>>>> If the smsc95xx does not have a valid MAC address stored within
>>>> the eeprom then a random number is generated.  The MAC can also
>>>> be set by uBoot but the smsc95xx does not have a way to read this.
>>>>
>>>> Create the binding for the smsc95xx so that uBoot can set the MAC
>>>> and the code can retrieve the MAC from the modified DTB file.
>>> Suppose there are two smsc95xx usbnet devices connected to usb bus, and
>>> one is built-in, another is hotplug device, can your patch handle the situation
>>> correctly?
>> Look at this line in the patch below
>>
>> sprintf(of_name, "%s%i", SMSC95XX_OF_NAME, dev->udev->dev.id);
>>
>> I am appending the dev ID of the device to the of_name here.  As long as init_mac_address is called, the dev.id and the uBoot
>> entry match then yes.
> Currently, non-root-hub usb device is created and added into usb bus without
> any platform(device tree) knowledge, so you can't expect the match here.
>
> Also not mention the two smsc95xx devices may attach to two different
> usb host controllers(buses).
>
> Thanks,

You are correct I don't expect a match for PnP devices only devices that are hard wired.

After thinking of it I should move the OF code below the EEPROM code as the EEPROM should take preference over the DT code.

I will need to post V2 for that.

Dan

-- 
------------------
Dan Murphy

^ permalink raw reply

* Re: [PATCH] usbnet: smsc95xx: Add device tree input for MAC address
From: Ming Lei @ 2013-10-10 12:39 UTC (permalink / raw)
  To: Dan Murphy
  Cc: Steve Glendinning, Network Development, Linux Kernel Mailing List,
	linux-usb, mugunthanvnm
In-Reply-To: <525698CC.3060406@ti.com>

On Thu, Oct 10, 2013 at 8:08 PM, Dan Murphy <dmurphy@ti.com> wrote:
>
> You are correct I don't expect a match for PnP devices only devices that are hard wired.
>
> After thinking of it I should move the OF code below the EEPROM code as the EEPROM should take preference over the DT code.
>
> I will need to post V2 for that.

Your patch still doesn't deal with two smsc95xx devices built in
one board.

The problem is a generic one, looks it is better to do it when OF supports
discoverable bus.


Thanks,
-- 
Ming Lei

^ permalink raw reply

* RE: [PATCH net-next 01/10] qlcnic: Print informational messages only once during driver load.
From: Sucheta Chakraborty @ 2013-10-10 12:41 UTC (permalink / raw)
  To: Stephen Hemminger, Himanshu Madhani
  Cc: David Miller, netdev, Dept-NX Linux NIC Driver
In-Reply-To: <20131004142938.4ab6c6aa@nehalam.linuxnetplumber.net>

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Saturday, October 05, 2013 3:00 AM
> To: Himanshu Madhani
> Cc: David Miller; netdev; Dept-NX Linux NIC Driver; Sucheta Chakraborty
> Subject: Re: [PATCH net-next 01/10] qlcnic: Print informational
> messages only once during driver load.
> 
> On Fri, 4 Oct 2013 14:30:48 -0400
> Himanshu Madhani <himanshu.madhani@qlogic.com> wrote:
> 
> > From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> >
> > Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
> > Signed-off-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
> > ---
> >  drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  1 +
> >  .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c    | 12 -----------
> >  .../net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c  | 25
> ++++++++++++++++++----
> >  drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  1 +
> >  4 files changed, 23 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
> > b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
> > index 81bf836..a3c4379 100644
> > --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
> > +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
> > @@ -1199,6 +1199,7 @@ struct qlcnic_npar_info {
> >  	u8	promisc_mode;
> >  	u8	offload_flags;
> >  	u8      pci_func;
> > +	u8      mac[ETH_ALEN];
> >  };
> >
> 
> >
> 
> There is a field in netdevice which should probably be used for this
> perm_addr.
> 

This information is logged from the management function in Virtual NIC mode.
Management function do not a have access to the netdev info structure of other functions.
So we cannot make the changes as suggested.

> And then this could be corrected:
> 
> static void qlcnic_dcb_get_perm_hw_addr(struct net_device *netdev, u8
> *addr) {
> 	memcpy(addr, netdev->dev_addr, netdev->addr_len); }

Will incorporate this change in "dcb cleanup patch".

Thanks,
Sucheta.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox