netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes
@ 2015-03-04 12:46 Hariprasad Shenai
  2015-03-04 12:46 ` [PATCH net-next 1/2] cxgb4: Move offload Rx queue allocation to separate function Hariprasad Shenai
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Hariprasad Shenai @ 2015-03-04 12:46 UTC (permalink / raw)
  To: netdev; +Cc: davem, leedom, anish, nirranjan, kumaras, swise,
	Hariprasad Shenai

Hi

This patch series adds a common function to allocate RX queues and queue
allocation changes to RDMA CIQ

The patches series is created against 'net-next' tree.
And includes patches on cxgb4 driver.

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.

Thanks

Hariprasad Shenai (2):
  cxgb4: Move offload Rx queue allocation to separate function
  cxgb4: Try and provide an RDMA CIQ per cpu

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |    6 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |    4 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  124 ++++++++++++--------
 3 files changed, 80 insertions(+), 54 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH net-next 1/2] cxgb4: Move offload Rx queue allocation to separate function
  2015-03-04 12:46 [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes Hariprasad Shenai
@ 2015-03-04 12:46 ` Hariprasad Shenai
  2015-03-04 12:46 ` [PATCH net-next 2/2] cxgb4: Try and provide an RDMA CIQ per cpu Hariprasad Shenai
  2015-03-05 20:12 ` [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Hariprasad Shenai @ 2015-03-04 12:46 UTC (permalink / raw)
  To: netdev; +Cc: davem, leedom, anish, nirranjan, kumaras, swise,
	Hariprasad Shenai

Adds a common function for all Rx queue allocation.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   73 +++++++++++------------
 1 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a22cf93..836e411 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -957,6 +957,28 @@ static void enable_rx(struct adapter *adap)
 	}
 }
 
+static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
+			   unsigned int nq, unsigned int per_chan, int msi_idx,
+			   u16 *ids)
+{
+	int i, err;
+
+	for (i = 0; i < nq; i++, q++) {
+		if (msi_idx > 0)
+			msi_idx++;
+		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
+				       adap->port[i / per_chan],
+				       msi_idx, q->fl.size ? &q->fl : NULL,
+				       uldrx_handler);
+		if (err)
+			return err;
+		memset(&q->stats, 0, sizeof(q->stats));
+		if (ids)
+			ids[i] = q->rspq.abs_id;
+	}
+	return 0;
+}
+
 /**
  *	setup_sge_queues - configure SGE Tx/Rx/response queues
  *	@adap: the adapter
@@ -1018,51 +1040,26 @@ freeout:	t4_free_sge_resources(adap);
 
 	j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
 	for_each_ofldrxq(s, i) {
-		struct sge_ofld_rxq *q = &s->ofldrxq[i];
-		struct net_device *dev = adap->port[i / j];
-
-		if (msi_idx > 0)
-			msi_idx++;
-		err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
-				       q->fl.size ? &q->fl : NULL,
-				       uldrx_handler);
-		if (err)
-			goto freeout;
-		memset(&q->stats, 0, sizeof(q->stats));
-		s->ofld_rxq[i] = q->rspq.abs_id;
-		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
+		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
+					    adap->port[i / j],
 					    s->fw_evtq.cntxt_id);
 		if (err)
 			goto freeout;
 	}
 
-	for_each_rdmarxq(s, i) {
-		struct sge_ofld_rxq *q = &s->rdmarxq[i];
+#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids) do { \
+	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids); \
+	if (err) \
+		goto freeout; \
+	if (msi_idx > 0) \
+		msi_idx += nq; \
+} while (0)
 
-		if (msi_idx > 0)
-			msi_idx++;
-		err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
-				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler);
-		if (err)
-			goto freeout;
-		memset(&q->stats, 0, sizeof(q->stats));
-		s->rdma_rxq[i] = q->rspq.abs_id;
-	}
+	ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq);
+	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
+	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, 1, s->rdma_ciq);
 
-	for_each_rdmaciq(s, i) {
-		struct sge_ofld_rxq *q = &s->rdmaciq[i];
-
-		if (msi_idx > 0)
-			msi_idx++;
-		err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
-				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler);
-		if (err)
-			goto freeout;
-		memset(&q->stats, 0, sizeof(q->stats));
-		s->rdma_ciq[i] = q->rspq.abs_id;
-	}
+#undef ALLOC_OFLD_RXQS
 
 	for_each_port(adap, i) {
 		/*
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net-next 2/2] cxgb4: Try and provide an RDMA CIQ per cpu
  2015-03-04 12:46 [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes Hariprasad Shenai
  2015-03-04 12:46 ` [PATCH net-next 1/2] cxgb4: Move offload Rx queue allocation to separate function Hariprasad Shenai
@ 2015-03-04 12:46 ` Hariprasad Shenai
  2015-03-05 20:12 ` [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Hariprasad Shenai @ 2015-03-04 12:46 UTC (permalink / raw)
  To: netdev; +Cc: davem, leedom, anish, nirranjan, kumaras, swise,
	Hariprasad Shenai

To allow for better scalability on systems with large core counts, we
will try and allocate enough RDMA Concentrator IQs and MSI/X vectors as
we have cores. If we cannot get enough MSI/X vectors, fall back to the
minimum required: 1 per adapter rx channel.

Also clean up cxgb_enable_msix() to make it readable and correct a bug
where the vectors are not correctly assigned if the driver doesn't get
the full amount requested.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |    6 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |    4 ++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   53 ++++++++++++++-----
 3 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index d6cda17..6e7eac1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -369,7 +369,7 @@ enum {
 	MAX_OFLD_QSETS = 16,          /* # of offload Tx/Rx queue sets */
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
 	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
-	MAX_RDMA_CIQS = NCHAN,        /* # of  RDMA concentrator IQs */
+	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
 	MAX_ISCSI_QUEUES = NCHAN,     /* # of streaming iSCSI Rx queues */
 };
 
@@ -599,8 +599,8 @@ struct sge {
 	u16 rdmaqs;                 /* # of available RDMA Rx queues */
 	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
 	u16 ofld_rxq[MAX_OFLD_QSETS];
-	u16 rdma_rxq[NCHAN];
-	u16 rdma_ciq[NCHAN];
+	u16 rdma_rxq[MAX_RDMA_QUEUES];
+	u16 rdma_ciq[MAX_RDMA_CIQS];
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u32 fl_pg_order;            /* large page allocation size */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 78854ce..0918c16 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -1769,6 +1769,8 @@ do { \
 		int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx);
 
 		S("QType:", "RDMA-CPL");
+		S("Interface:",
+		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
 		R("RspQ ID:", rspq.abs_id);
 		R("RspQ size:", rspq.size);
 		R("RspQE size:", rspq.iqe_len);
@@ -1788,6 +1790,8 @@ do { \
 		int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
 
 		S("QType:", "RDMA-CIQ");
+		S("Interface:",
+		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
 		R("RspQ ID:", rspq.abs_id);
 		R("RspQ size:", rspq.size);
 		R("RspQE size:", rspq.iqe_len);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 836e411..e344bdc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1057,7 +1057,8 @@ freeout:	t4_free_sge_resources(adap);
 
 	ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq);
 	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
-	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, 1, s->rdma_ciq);
+	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
+	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq);
 
 #undef ALLOC_OFLD_RXQS
 
@@ -5702,7 +5703,16 @@ static void cfg_queues(struct adapter *adap)
 			s->ofldqsets = adap->params.nports;
 		/* For RDMA one Rx queue per channel suffices */
 		s->rdmaqs = adap->params.nports;
-		s->rdmaciqs = adap->params.nports;
+		/* Try and allow at least 1 CIQ per cpu rounding down
+		 * to the number of ports, with a minimum of 1 per port.
+		 * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
+		 * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
+		 * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
+		 */
+		s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
+		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
+				adap->params.nports;
+		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -5788,12 +5798,17 @@ static void reduce_ethqs(struct adapter *adap, int n)
 static int enable_msix(struct adapter *adap)
 {
 	int ofld_need = 0;
-	int i, want, need;
+	int i, want, need, allocated;
 	struct sge *s = &adap->sge;
 	unsigned int nchan = adap->params.nports;
-	struct msix_entry entries[MAX_INGQ + 1];
+	struct msix_entry *entries;
+
+	entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
+			  GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(entries); ++i)
+	for (i = 0; i < MAX_INGQ + 1; ++i)
 		entries[i].entry = i;
 
 	want = s->max_ethqsets + EXTRA_VECS;
@@ -5810,29 +5825,39 @@ static int enable_msix(struct adapter *adap)
 #else
 	need = adap->params.nports + EXTRA_VECS + ofld_need;
 #endif
-	want = pci_enable_msix_range(adap->pdev, entries, need, want);
-	if (want < 0)
-		return want;
+	allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
+	if (allocated < 0) {
+		dev_info(adap->pdev_dev, "not enough MSI-X vectors left,"
+			 " not using MSI-X\n");
+		kfree(entries);
+		return allocated;
+	}
 
-	/*
-	 * Distribute available vectors to the various queue groups.
+	/* Distribute available vectors to the various queue groups.
 	 * Every group gets its minimum requirement and NIC gets top
 	 * priority for leftovers.
 	 */
-	i = want - EXTRA_VECS - ofld_need;
+	i = allocated - EXTRA_VECS - ofld_need;
 	if (i < s->max_ethqsets) {
 		s->max_ethqsets = i;
 		if (i < s->ethqsets)
 			reduce_ethqs(adap, i);
 	}
 	if (is_offload(adap)) {
-		i = want - EXTRA_VECS - s->max_ethqsets;
-		i -= ofld_need - nchan;
+		if (allocated < want) {
+			s->rdmaqs = nchan;
+			s->rdmaciqs = nchan;
+		}
+
+		/* leftovers go to OFLD */
+		i = allocated - EXTRA_VECS - s->max_ethqsets -
+		    s->rdmaqs - s->rdmaciqs;
 		s->ofldqsets = (i / nchan) * nchan;  /* round down */
 	}
-	for (i = 0; i < want; ++i)
+	for (i = 0; i < allocated; ++i)
 		adap->msix_info[i].vec = entries[i].vector;
 
+	kfree(entries);
 	return 0;
 }
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes
  2015-03-04 12:46 [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes Hariprasad Shenai
  2015-03-04 12:46 ` [PATCH net-next 1/2] cxgb4: Move offload Rx queue allocation to separate function Hariprasad Shenai
  2015-03-04 12:46 ` [PATCH net-next 2/2] cxgb4: Try and provide an RDMA CIQ per cpu Hariprasad Shenai
@ 2015-03-05 20:12 ` David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2015-03-05 20:12 UTC (permalink / raw)
  To: hariprasad; +Cc: netdev, leedom, anish, nirranjan, kumaras, swise

From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Wed,  4 Mar 2015 18:16:26 +0530

> This patch series adds a common function to allocate RX queues and queue
> allocation changes to RDMA CIQ
> 
> The patches series is created against 'net-next' tree.
> And includes patches on cxgb4 driver.
> 
> We have included all the maintainers of respective drivers. Kindly review the
> change and let us know in case of any review comments.

Series applied, thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-03-05 20:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-04 12:46 [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes Hariprasad Shenai
2015-03-04 12:46 ` [PATCH net-next 1/2] cxgb4: Move offload Rx queue allocation to separate function Hariprasad Shenai
2015-03-04 12:46 ` [PATCH net-next 2/2] cxgb4: Try and provide an RDMA CIQ per cpu Hariprasad Shenai
2015-03-05 20:12 ` [PATCH net-next 0/2] cxgb4: RX Queue related cleanup and fixes David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).