[PATCH net-next 0/2] sfc: RSS enhancements

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next 0/2] sfc: RSS enhancements
@ 2016-05-04 16:56 Edward Cree
  2016-05-04 17:01 ` [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads' Edward Cree
  2016-05-04 17:02 ` [PATCH net-next 2/2] sfc: allocate rx pages on the same node as the interrupt Edward Cree
  0 siblings, 2 replies; 6+ messages in thread
From: Edward Cree @ 2016-05-04 16:56 UTC (permalink / raw)
  To: linux-net-drivers, netdev, David Miller; +Cc: Daniel Pieczko

Daniel Pieczko (1):
  sfc: allocate rx pages on the same node as the interrupt

Edward Cree (1):
  sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads'

 drivers/net/ethernet/sfc/efx.c        | 181 ++++++++++++++++++++++++++++------
 drivers/net/ethernet/sfc/net_driver.h |   3 +
 drivers/net/ethernet/sfc/rx.c         |  18 +++-
 3 files changed, 169 insertions(+), 33 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads'
  2016-05-04 16:56 [PATCH net-next 0/2] sfc: RSS enhancements Edward Cree
@ 2016-05-04 17:01 ` Edward Cree
  2016-05-06 19:38   ` David Miller
  2016-05-04 17:02 ` [PATCH net-next 2/2] sfc: allocate rx pages on the same node as the interrupt Edward Cree
  1 sibling, 1 reply; 6+ messages in thread
From: Edward Cree @ 2016-05-04 17:01 UTC (permalink / raw)
  To: linux-net-drivers, netdev, David Miller

These settings autoconfigure the number of RSS channels to match the number of
CPUs present.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/efx.c | 143 ++++++++++++++++++++++++++++++++---------
 1 file changed, 113 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 0705ec86..e6fdf35 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -22,6 +22,7 @@
 #include <linux/gfp.h>
 #include <linux/aer.h>
 #include <linux/interrupt.h>
+#include <xen/xen.h>
 #include "net_driver.h"
 #include "efx.h"
 #include "nic.h"
@@ -161,16 +162,19 @@ static unsigned int tx_irq_mod_usec = 150;
  */
 static unsigned int interrupt_mode;
 
-/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
- * i.e. the number of CPUs among which we may distribute simultaneous
- * interrupt handling.
+/* This is the requested number of CPUs to use for Receive-Side Scaling
+ * (RSS), i.e. the number of CPUs among which we may distribute
+ * simultaneous interrupt handling.  Or alternatively it may be set to
+ * "packages", "cores" or "hyperthreads" to get one receive channel per
+ * package, core or hyperthread.  The default is "cores".
  *
- * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
- * The default (0) means to assign an interrupt to each core.
+ * Systems without MSI-X will only target one CPU via legacy or MSI
+ * interrupt.
  */
-static unsigned int rss_cpus;
-module_param(rss_cpus, uint, 0444);
-MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+static char *rss_cpus;
+module_param(rss_cpus, charp, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling, "
+		 "or 'packages', 'cores' or 'hyperthreads'");
 
 static bool phy_flash_cfg;
 module_param(phy_flash_cfg, bool, 0644);
@@ -1324,51 +1328,130 @@ void efx_set_default_rx_indir_table(struct efx_nic *efx)
 			ethtool_rxfh_indir_default(i, efx->rss_spread);
 }
 
-static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
+/* Count the number of unique packages in the given cpumask */
+static unsigned int efx_num_packages(const cpumask_t *in)
 {
-	cpumask_var_t thread_mask;
+	cpumask_var_t core_mask;
+	unsigned int count;
+	int cpu, cpu2;
+
+	if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) {
+		pr_warn("sfc: RSS disabled due to allocation failure\n");
+		return 1;
+	}
+
+	count = 0;
+	for_each_cpu(cpu, in) {
+		if (!cpumask_test_cpu(cpu, core_mask)) {
+			++count;
+
+			/* Treat each numa node as a seperate package */
+			for_each_cpu(cpu2, topology_core_cpumask(cpu)) {
+				if (cpu_to_node(cpu) == cpu_to_node(cpu2))
+					cpumask_set_cpu(cpu2, core_mask);
+			}
+		}
+	}
+
+	free_cpumask_var(core_mask);
+
+	return count;
+}
+
+/* Count the number of unique cores in the given cpumask */
+static unsigned int efx_num_cores(const cpumask_t *in)
+{
+	cpumask_var_t core_mask;
 	unsigned int count;
 	int cpu;
 
-	if (rss_cpus) {
-		count = rss_cpus;
-	} else {
-		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
-			netif_warn(efx, probe, efx->net_dev,
-				   "RSS disabled due to allocation failure\n");
-			return 1;
+	if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) {
+		pr_warn("sfc: RSS disabled due to allocation failure\n");
+		return 1;
+	}
+
+	count = 0;
+	for_each_cpu(cpu, in) {
+		if (!cpumask_test_cpu(cpu, core_mask)) {
+			++count;
+			cpumask_or(core_mask, core_mask,
+				   topology_sibling_cpumask(cpu));
 		}
+	}
 
-		count = 0;
-		for_each_online_cpu(cpu) {
-			if (!cpumask_test_cpu(cpu, thread_mask)) {
-				++count;
-				cpumask_or(thread_mask, thread_mask,
-					   topology_sibling_cpumask(cpu));
-			}
+	free_cpumask_var(core_mask);
+	return count;
+}
+
+static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
+{
+	struct net_device *net_dev = efx->net_dev;
+	unsigned int n_rxq;
+
+	if (rss_cpus == NULL) {
+		/* default to 'cores' */
+		goto cores;
+	} else if (strcmp(rss_cpus, "packages") == 0) {
+		if (xen_domain()) {
+			netif_warn(efx, drv, net_dev,
+				   "Unable to determine CPU topology on Xen reliably. Using 4 rss channels.\n");
+			n_rxq = 4;
+		} else {
+			netif_dbg(efx,  drv, net_dev,
+				  "using efx_num_packages()\n");
+			n_rxq = efx_num_packages(cpu_online_mask);
+			/* Create two RSS queues even with a single package */
+			if (n_rxq == 1)
+				n_rxq = 2;
 		}
+	} else if (strcmp(rss_cpus, "cores") == 0) {
+cores:
+		if (xen_domain()) {
+			netif_warn(efx, drv, net_dev,
+				   "Unable to determine CPU topology on Xen reliably. Assuming hyperthreading enabled.\n");
+			n_rxq = max_t(int, 1, num_online_cpus() / 2);
+		} else {
+			netif_dbg(efx, drv, net_dev,
+				  "using efx_num_cores()\n");
+			n_rxq = efx_num_cores(cpu_online_mask);
+		}
+	} else if (strcmp(rss_cpus, "hyperthreads") == 0) {
+		n_rxq = num_online_cpus();
+	} else if (sscanf(rss_cpus, "%u", &n_rxq) == 1 && n_rxq > 0) {
+		/* nothing to do */
+	} else {
+		netif_err(efx, drv, net_dev,
+			  "Bad value for module parameter rss_cpus='%s'\n",
+			  rss_cpus);
+		/* default to 'cores' */
+		goto cores;
+	}
 
-		free_cpumask_var(thread_mask);
+	if (n_rxq > EFX_MAX_RX_QUEUES) {
+		netif_warn(efx, drv, net_dev,
+			   "Reducing number of rss channels from %u to %u.\n",
+			   n_rxq, EFX_MAX_RX_QUEUES);
+		n_rxq = EFX_MAX_RX_QUEUES;
 	}
 
+#ifdef CONFIG_SFC_SRIOV
 	/* If RSS is requested for the PF *and* VFs then we can't write RSS
 	 * table entries that are inaccessible to VFs
 	 */
-#ifdef CONFIG_SFC_SRIOV
 	if (efx->type->sriov_wanted) {
 		if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
-		    count > efx_vf_size(efx)) {
+		    n_rxq > efx_vf_size(efx)) {
 			netif_warn(efx, probe, efx->net_dev,
 				   "Reducing number of RSS channels from %u to %u for "
 				   "VF support. Increase vf-msix-limit to use more "
 				   "channels on the PF.\n",
-				   count, efx_vf_size(efx));
-			count = efx_vf_size(efx);
+				   n_rxq, efx_vf_size(efx));
+			n_rxq = efx_vf_size(efx);
 		}
 	}
 #endif
 
-	return count;
+	return n_rxq;
 }
 
 /* Probe the number and type of interrupts we are able to obtain, and

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads'
  2016-05-04 17:01 ` [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads' Edward Cree
@ 2016-05-06 19:38   ` David Miller
  2016-05-09 12:00     ` Edward Cree
  0 siblings, 1 reply; 6+ messages in thread
From: David Miller @ 2016-05-06 19:38 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, netdev

From: Edward Cree <ecree@solarflare.com>
Date: Wed, 4 May 2016 18:01:52 +0100

> These settings autoconfigure the number of RSS channels to match the number of
> CPUs present.
> 
> Signed-off-by: Edward Cree <ecree@solarflare.com>

I can't believe I allowed this 'rss_cpus' thing into the tree to begin with.

It's completely wrong and is exactly the kind of thing we are trying
to actively avoid in network drivers.

If another network driver wants to provide the same facility they will
add a module parameter with a slightly different name, a different
set of valid choices, and different semantics.

Define a proper global, stable, tree-wide mechanism to configure these
kinds of things and use that instead.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads'
  2016-05-06 19:38   ` David Miller
@ 2016-05-09 12:00     ` Edward Cree
  2016-05-09 16:16       ` David Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Edward Cree @ 2016-05-09 12:00 UTC (permalink / raw)
  To: David Miller; +Cc: linux-net-drivers, netdev

On 06/05/16 20:38, David Miller wrote:
> From: Edward Cree <ecree@solarflare.com>
> Date: Wed, 4 May 2016 18:01:52 +0100
>
>> These settings autoconfigure the number of RSS channels to match the number of
>> CPUs present.
>>
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
> I can't believe I allowed this 'rss_cpus' thing into the tree to begin with.
>
> It's completely wrong and is exactly the kind of thing we are trying
> to actively avoid in network drivers.
Fair enough.  Should I resubmit patch 2/2 or is that one awful too?

-Ed

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads'
  2016-05-09 12:00     ` Edward Cree
@ 2016-05-09 16:16       ` David Miller
  0 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2016-05-09 16:16 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, netdev

From: Edward Cree <ecree@solarflare.com>
Date: Mon, 9 May 2016 13:00:01 +0100

> Should I resubmit patch 2/2 or is that one awful too?

Sure.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next 2/2] sfc: allocate rx pages on the same node as the interrupt
  2016-05-04 16:56 [PATCH net-next 0/2] sfc: RSS enhancements Edward Cree
  2016-05-04 17:01 ` [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads' Edward Cree
@ 2016-05-04 17:02 ` Edward Cree
  1 sibling, 0 replies; 6+ messages in thread
From: Edward Cree @ 2016-05-04 17:02 UTC (permalink / raw)
  To: linux-net-drivers, netdev, David Miller; +Cc: Daniel Pieczko

From: Daniel Pieczko <dpieczko@solarflare.com>

When the interrupt servicing a channel is on a NUMA node that is
not local to the device, performance is improved by allocating
rx pages on the node local to the interrupt (remote to the device)

The performance-optimal case, where interrupts and applications
are pinned to CPUs on the same node as the device, is not altered
by this change.

This change gave a 1% improvement in transaction rate using Nginx
with all interrupts and Nginx threads on the node remote to the
device. It also gave a small reduction in round-trip latency,
again with the interrupt and application on a different node to
the device.

Allocating rx pages based on the channel->irq_node value is only
valid for the initial driver-load interrupt affinities; if an
interrupt is moved later, the wrong node may be used for the
allocation.

Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/efx.c        | 38 +++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/sfc/net_driver.h |  3 +++
 drivers/net/ethernet/sfc/rx.c         | 18 ++++++++++++++---
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index e6fdf35..f7e6ce5 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -449,6 +449,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 	channel->efx = efx;
 	channel->channel = i;
 	channel->type = &efx_default_channel_type;
+	channel->irq_mem_node = NUMA_NO_NODE;
 
 	for (j = 0; j < EFX_TXQ_TYPES; j++) {
 		tx_queue = &channel->tx_queue[j];
@@ -1569,6 +1570,38 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 	return 0;
 }
 
+#ifndef CONFIG_SMP
+static void efx_set_interrupt_affinity(struct efx_nic *efx __always_unused)
+{
+}
+
+static void efx_clear_interrupt_affinity(struct efx_nic *efx __always_unused)
+{
+}
+#else
+static void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+	unsigned int cpu;
+
+	efx_for_each_channel(channel, efx) {
+		cpu = cpumask_local_spread(channel->channel,
+					   pcibus_to_node(efx->pci_dev->bus));
+
+		irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+		channel->irq_mem_node = cpu_to_mem(cpu);
+	}
+}
+
+static void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		irq_set_affinity_hint(channel->irq, NULL);
+}
+#endif /* CONFIG_SMP */
+
 static int efx_soft_enable_interrupts(struct efx_nic *efx)
 {
 	struct efx_channel *channel, *end_channel;
@@ -3017,6 +3050,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
 	cancel_work_sync(&efx->reset_work);
 
 	efx_disable_interrupts(efx);
+	efx_clear_interrupt_affinity(efx);
 	efx_nic_fini_interrupt(efx);
 	efx_fini_port(efx);
 	efx->type->fini(efx);
@@ -3166,6 +3200,9 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 	rc = efx_nic_init_interrupt(efx);
 	if (rc)
 		goto fail5;
+
+	efx_set_interrupt_affinity(efx);
+
 	rc = efx_enable_interrupts(efx);
 	if (rc)
 		goto fail6;
@@ -3173,6 +3210,7 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 	return 0;
 
  fail6:
+	efx_clear_interrupt_affinity(efx);
 	efx_nic_fini_interrupt(efx);
  fail5:
 	efx_fini_port(efx);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 38c4223..28c3673 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -423,6 +423,7 @@ enum efx_sync_events_state {
  * @sync_events_state: Current state of sync events on this channel
  * @sync_timestamp_major: Major part of the last ptp sync event
  * @sync_timestamp_minor: Minor part of the last ptp sync event
+ * @irq_mem_node: Memory NUMA node of interrupt
  */
 struct efx_channel {
 	struct efx_nic *efx;
@@ -468,6 +469,8 @@ struct efx_channel {
 	enum efx_sync_events_state sync_events_state;
 	u32 sync_timestamp_major;
 	u32 sync_timestamp_minor;
+
+	int irq_mem_node;
 };
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 8956995..eed0c3b 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -163,9 +163,21 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
 	do {
 		page = efx_reuse_page(rx_queue);
 		if (page == NULL) {
-			page = alloc_pages(__GFP_COLD | __GFP_COMP |
-					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
-					   efx->rx_buffer_order);
+			/* GFP_ATOMIC may fail because of various reasons,
+			 * and we re-schedule rx_fill from non-atomic
+			 * context in such a case.  So, use __GFP_NO_WARN
+			 * in case of atomic.
+			 */
+			struct efx_channel *channel;
+
+			channel = efx_rx_queue_channel(rx_queue);
+			page = alloc_pages_node(channel->irq_mem_node,
+						__GFP_COMP |
+						(atomic ?
+						 (GFP_ATOMIC | __GFP_NOWARN)
+						 : GFP_KERNEL),
+						efx->rx_buffer_order);
+
 			if (unlikely(page == NULL))
 				return -ENOMEM;
 			dma_addr =

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-05-09 16:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-04 16:56 [PATCH net-next 0/2] sfc: RSS enhancements Edward Cree
2016-05-04 17:01 ` [PATCH net-next 1/2] sfc: Support setting rss_cpus to 'cores', 'packages' or 'hyperthreads' Edward Cree
2016-05-06 19:38   ` David Miller
2016-05-09 12:00     ` Edward Cree
2016-05-09 16:16       ` David Miller
2016-05-04 17:02 ` [PATCH net-next 2/2] sfc: allocate rx pages on the same node as the interrupt Edward Cree

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).