* RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
@ 2008-03-05 20:51 Tom Herbert
2008-03-05 21:21 ` David Miller
0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2008-03-05 20:51 UTC (permalink / raw)
To: davem, netdev
This patch implements kernel changes to allow scheduling of softirq's between processors.
Signed-off-by: Tom Herbert <therbert@google.com>
---
diff -uprN -X /tmp/donts/si_1 net-2.6/include/linux/interrupt.h net-2.6.patch/include/linux/interrupt.h
--- net-2.6/include/linux/interrupt.h 2008-03-05 09:03:21.033991000 -0800
+++ net-2.6.patch/include/linux/interrupt.h 2008-03-05 09:34:48.010014000 -0800
@@ -247,6 +247,9 @@ static inline void __deprecated save_and
enum
{
HI_SOFTIRQ=0,
+#ifdef CONFIG_SMP
+ SEND_CPU_SOFTIRQ,
+#endif
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
@@ -276,6 +279,13 @@ extern void softirq_init(void);
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+#ifdef CONFIG_SMP
+DECLARE_PER_CPU(atomic_t, alt_softirqs);
+
+extern void raise_softirq_oncpu(int cpu, unsigned int nr);
+extern void or_alt_softirqs_pending_irqoff(void);
+extern void or_alt_softirqs_pending(void);
+#endif
/* Tasklets --- multithreaded analogue of BHs.
diff -uprN -X /tmp/donts/si_1 net-2.6/include/linux/smp.h net-2.6.patch/include/linux/smp.h
--- net-2.6/include/linux/smp.h 2008-03-05 09:03:23.150865000 -0800
+++ net-2.6.patch/include/linux/smp.h 2008-03-05 09:25:33.530753000 -0800
@@ -33,6 +33,21 @@ extern void smp_send_stop(void);
*/
extern void smp_send_reschedule(int cpu);
+/*
+ * sends a 'reschedule' event to multiple CPUs in mask:
+ */
+#ifdef ARCH_HAS_SEND_RESCHEDULE_MASK
+extern void smp_send_reschedule_mask(cpumask_t mask);
+#else
+static inline void smp_send_reschedule_mask(cpumask_t mask)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, mask) {
+ smp_send_reschedule(cpu);
+ }
+}
+#endif
/*
* Prepare machine for booting other CPUs.
diff -uprN -X /tmp/donts/si_1 net-2.6/kernel/softirq.c net-2.6.patch/kernel/softirq.c
--- net-2.6/kernel/softirq.c 2008-03-05 09:03:26.407673000 -0800
+++ net-2.6.patch/kernel/softirq.c 2008-03-05 09:36:33.849334000 -0800
@@ -16,6 +16,7 @@
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
@@ -352,6 +353,62 @@ void open_softirq(int nr, void (*action)
softirq_vec[nr].action = action;
}
+#ifdef CONFIG_SMP
+/*
+ * Functions and definitions to support scheduling of softirqs between CPU's.
+ */
+
+DEFINE_PER_CPU(atomic_t, alt_softirqs);
+static DEFINE_PER_CPU(cpumask_t, softirq_cpus);
+
+static void send_cpu_softirq_action(struct softirq_action *a)
+{
+ cpumask_t mask;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mask = __get_cpu_var(softirq_cpus);
+ cpus_clear(__get_cpu_var(softirq_cpus));
+ local_irq_restore(flags);
+
+ /*
+ * Wake up CPUs by sending them a reschedule event. This is
+ * usually implemented by an IPI.
+ */
+ if (!cpus_empty(mask))
+ smp_send_reschedule_mask(mask);
+}
+
+void raise_softirq_oncpu(int cpu, unsigned int nr)
+{
+ if (cpu == get_cpu())
+ raise_softirq(nr);
+ else if (!test_and_set_bit(nr, &per_cpu(alt_softirqs, cpu))) {
+ cpu_set(cpu, __get_cpu_var(softirq_cpus));
+ raise_softirq(SEND_CPU_SOFTIRQ);
+ }
+}
+
+
+inline void or_alt_softirqs_pending_irqoff(void)
+{
+ __u32 pending;
+
+ pending = atomic_xchg(&__get_cpu_var(alt_softirqs), 0);
+ or_softirq_pending(pending);
+}
+
+
+void or_alt_softirqs_pending(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ or_alt_softirqs_pending_irqoff();
+ local_irq_restore(flags);
+}
+#endif /* CONFIG_SMP */
+
/* Tasklets */
struct tasklet_head
{
@@ -488,6 +545,9 @@ void __init softirq_init(void)
{
open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+#ifdef CONFIG_SMP
+ open_softirq(SEND_CPU_SOFTIRQ, send_cpu_softirq_action, NULL);
+#endif
}
static int ksoftirqd(void * __bind_cpu)
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-05 20:51 RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs Tom Herbert
@ 2008-03-05 21:21 ` David Miller
2008-03-07 19:02 ` Max Krasnyanskiy
0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2008-03-05 21:21 UTC (permalink / raw)
To: therbert; +Cc: netdev
From: therbert@google.com (Tom Herbert)
Date: Wed, 5 Mar 2008 12:51:16 -0800 (PST)
> This patch implements kernel changes to allow scheduling of softirq's between processors.
>
> Signed-off-by: Tom Herbert <therbert@google.com>
I've stated this in the past and I still feel that it is foolish to
put all of this code into the kernel when every single piece of
networking hardware will be doing this for us transparently.
Maybe if someone had proposed this 4 or 5 years ago, but right now
this code will be irrelevant by the time it ships to any real users.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-05 21:21 ` David Miller
@ 2008-03-07 19:02 ` Max Krasnyanskiy
2008-03-10 23:26 ` Tom Herbert
2008-03-11 1:06 ` Brandeburg, Jesse
0 siblings, 2 replies; 12+ messages in thread
From: Max Krasnyanskiy @ 2008-03-07 19:02 UTC (permalink / raw)
To: David Miller; +Cc: therbert, netdev, Steven Rostedt, Ingo Molnar
David Miller wrote:
> From: therbert@google.com (Tom Herbert)
> Date: Wed, 5 Mar 2008 12:51:16 -0800 (PST)
>
>> This patch implements kernel changes to allow scheduling of softirq's between processors.
>>
>> Signed-off-by: Tom Herbert <therbert@google.com>
>
> I've stated this in the past and I still feel that it is foolish to
> put all of this code into the kernel when every single piece of
> networking hardware will be doing this for us transparently.
>
> Maybe if someone had proposed this 4 or 5 years ago, but right now
> this code will be irrelevant by the time it ships to any real users.
Plus it seems that for this kind of stuff it be better to replace network
softirq with kthreads (like in -rt kernel) and let the scheduler take care of
the load balancing. More flexible and scalable.
I'd suggest for your to play with -rt kernel and see if it already does what
you need.
Max
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-07 19:02 ` Max Krasnyanskiy
@ 2008-03-10 23:26 ` Tom Herbert
2008-03-11 1:06 ` Brandeburg, Jesse
1 sibling, 0 replies; 12+ messages in thread
From: Tom Herbert @ 2008-03-10 23:26 UTC (permalink / raw)
To: Max Krasnyanskiy; +Cc: David Miller, netdev, Steven Rostedt, Ingo Molnar
> Plus it seems that for this kind of stuff it be better to replace network
> softirq with kthreads (like in -rt kernel) and let the scheduler take care of
> the load balancing. More flexible and scalable.
> I'd suggest for your to play with -rt kernel and see if it already does what
> you need.
We looked a bit at using the ksoftirqd thread for that, but that
seemed to have more complexity and overhead as a solution.
I didn't look at -r kernel though, thanks for the pointer to that.
Tom
^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-07 19:02 ` Max Krasnyanskiy
2008-03-10 23:26 ` Tom Herbert
@ 2008-03-11 1:06 ` Brandeburg, Jesse
2008-03-11 16:20 ` Tom Herbert
1 sibling, 1 reply; 12+ messages in thread
From: Brandeburg, Jesse @ 2008-03-11 1:06 UTC (permalink / raw)
To: Max Krasnyanskiy, David Miller
Cc: therbert, netdev, Steven Rostedt, Ingo Molnar
Max Krasnyanskiy wrote:
> David Miller wrote:
>> From: therbert@google.com (Tom Herbert)
>> Date: Wed, 5 Mar 2008 12:51:16 -0800 (PST)
>>
>>> This patch implements kernel changes to allow scheduling of
>>> softirq's between processors.
>>>
>>> Signed-off-by: Tom Herbert <therbert@google.com>
>>
>> I've stated this in the past and I still feel that it is foolish to
>> put all of this code into the kernel when every single piece of
>> networking hardware will be doing this for us transparently.
>>
>> Maybe if someone had proposed this 4 or 5 years ago, but right now
>> this code will be irrelevant by the time it ships to any real users.
>
> Plus it seems that for this kind of stuff it be better to replace
> network softirq with kthreads (like in -rt kernel) and let the
> scheduler take care of the load balancing. More flexible and scalable.
> I'd suggest for your to play with -rt kernel and see if it already
> does what you need.
Could we use something like this to reschedule NAPI onto other
processors? If we get unlucky enough to have multiple napi routines
polling on a single CPU, and one or more completely idle CPUs (idle at
least for softirq) then we could really use one or the other of these
solutions.
anyone know how -rt kernels work for high I/O load environments like 10
Gigabit Ethernet?
Jesse
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-11 1:06 ` Brandeburg, Jesse
@ 2008-03-11 16:20 ` Tom Herbert
2008-03-11 16:52 ` Ben Hutchings
0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2008-03-11 16:20 UTC (permalink / raw)
To: Brandeburg, Jesse
Cc: Max Krasnyanskiy, David Miller, netdev, Steven Rostedt,
Ingo Molnar
> Could we use something like this to reschedule NAPI onto other
> processors? If we get unlucky enough to have multiple napi routines
> polling on a single CPU, and one or more completely idle CPUs (idle at
> least for softirq) then we could really use one or the other of these
> solutions.
Jesse,
This patch does provide a general mechanism to schedule (or
reschedule) NAPI to other CPUs. We implemented two scheduling
algorithms: 1) round robin scheduling of NAPI poll function across a
set of CPUs on a per device basis 2) scheduling stack processing by
directing packets to a CPU (using backlog queue); a hash is done on
the 4-tuple to direct the packets for a connection to same CPU thus
emulating HW RSS (we will add support for those devices that can
provide the computed hash per packet).
An algorithm that reschedules NAPI based on load is possible. In the
case you described, maybe the scheduling could take into account the
number of devices in the poll list for the CPU.
Tom
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-11 16:20 ` Tom Herbert
@ 2008-03-11 16:52 ` Ben Hutchings
2008-03-11 23:48 ` Tom Herbert
0 siblings, 1 reply; 12+ messages in thread
From: Ben Hutchings @ 2008-03-11 16:52 UTC (permalink / raw)
To: Tom Herbert
Cc: Brandeburg, Jesse, Max Krasnyanskiy, David Miller, netdev,
Steven Rostedt, Ingo Molnar
Tom Herbert wrote:
> > Could we use something like this to reschedule NAPI onto other
> > processors? If we get unlucky enough to have multiple napi routines
> > polling on a single CPU, and one or more completely idle CPUs (idle at
> > least for softirq) then we could really use one or the other of these
> > solutions.
>
> Jesse,
>
> This patch does provide a general mechanism to schedule (or
> reschedule) NAPI to other CPUs. We implemented two scheduling
> algorithms: 1) round robin scheduling of NAPI poll function across a
> set of CPUs on a per device basis 2) scheduling stack processing by
> directing packets to a CPU (using backlog queue); a hash is done on
> the 4-tuple to direct the packets for a connection to same CPU thus
> emulating HW RSS (we will add support for those devices that can
> provide the computed hash per packet).
Recent versions of Windows that are aware of RSS will instruct network
drivers to update their RSS indirection tables so that received
packets are handled on the same CPU as the thread likely to use the
received data. This requires that the network stack knows the hash
function, so Microsoft specified a "Toeplitz hash" for IPv4 and IPv6.
You can expect that most hardware RSS implementations will use this
function and not a simple XOR.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-11 16:52 ` Ben Hutchings
@ 2008-03-11 23:48 ` Tom Herbert
2008-03-12 15:10 ` Ben Hutchings
0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2008-03-11 23:48 UTC (permalink / raw)
To: Ben Hutchings
Cc: Brandeburg, Jesse, Max Krasnyanskiy, David Miller, netdev,
Steven Rostedt, Ingo Molnar
> Recent versions of Windows that are aware of RSS will instruct network
> drivers to update their RSS indirection tables so that received
> packets are handled on the same CPU as the thread likely to use the
> received data. This requires that the network stack knows the hash
> function, so Microsoft specified a "Toeplitz hash" for IPv4 and IPv6.
> You can expect that most hardware RSS implementations will use this
> function and not a simple XOR.
>
Thanks. I am planning to do the Toeplitz hash in the softRSS. Also,
I was planning to add support for devices that provide the Toeplitz
hash but don't do full RSS.
One nice feature about Microsoft RSS seems to be the ability for the
stack to dynamically re-balance networking load of a device using
an indirection table that is set up in the device. I was wondering if
this is supported in Linux stack or if anyone is working on that?
Tom
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-11 23:48 ` Tom Herbert
@ 2008-03-12 15:10 ` Ben Hutchings
2008-03-13 3:41 ` David Miller
0 siblings, 1 reply; 12+ messages in thread
From: Ben Hutchings @ 2008-03-12 15:10 UTC (permalink / raw)
To: Tom Herbert
Cc: Brandeburg, Jesse, Max Krasnyanskiy, David Miller, netdev,
Steven Rostedt, Ingo Molnar
Tom Herbert wrote:
> > Recent versions of Windows that are aware of RSS will instruct network
> > drivers to update their RSS indirection tables so that received
> > packets are handled on the same CPU as the thread likely to use the
> > received data. This requires that the network stack knows the hash
> > function, so Microsoft specified a "Toeplitz hash" for IPv4 and IPv6.
> > You can expect that most hardware RSS implementations will use this
> > function and not a simple XOR.
> >
>
> Thanks. I am planning to do the Toeplitz hash in the softRSS. Also,
> I was planning to add support for devices that provide the Toeplitz
> hash but don't do full RSS.
>
> One nice feature about Microsoft RSS seems to be the ability for the
> stack to dynamically re-balance networking load of a device using
> an indirection table that is set up in the device. I was wondering if
> this is supported in Linux stack or if anyone is working on that?
It's not supported, but it's on David Miller's to-do list[1].
By the way, Microsoft suddenly decided that RSS is too problematic to
enable by default[2] because software and hardware can disagree about
hash values in the presence of some local NAT implementations.
Hopefully we can avoid that particular pitfall in Linux.
Ben.
1. http://vger.kernel.org/~davem/net_todo.html
2. http://support.microsoft.com/kb/948496
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-12 15:10 ` Ben Hutchings
@ 2008-03-13 3:41 ` David Miller
2008-03-13 11:47 ` Ben Hutchings
0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2008-03-13 3:41 UTC (permalink / raw)
To: bhutchings; +Cc: therbert, jesse.brandeburg, maxk, netdev, rostedt, mingo
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 12 Mar 2008 15:10:52 +0000
> It's not supported, but it's on David Miller's to-do list[1].
...
> 1. http://vger.kernel.org/~davem/net_todo.html
That copy of the TODO list is out of date, and we fully
support multiqueue RX devices, it's a device driver level
issue at this point.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-13 3:41 ` David Miller
@ 2008-03-13 11:47 ` Ben Hutchings
2008-03-13 12:53 ` David Miller
0 siblings, 1 reply; 12+ messages in thread
From: Ben Hutchings @ 2008-03-13 11:47 UTC (permalink / raw)
To: David Miller; +Cc: therbert, jesse.brandeburg, maxk, netdev, rostedt, mingo
David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Wed, 12 Mar 2008 15:10:52 +0000
>
> > It's not supported, but it's on David Miller's to-do list[1].
> ...
> > 1. http://vger.kernel.org/~davem/net_todo.html
>
> That copy of the TODO list is out of date, and we fully
> support multiqueue RX devices, it's a device driver level
> issue at this point.
The specific part I was referring to was:
"This scheme can be further improved upon, if the host tells the driver what
CPU it wished to run a particular session on. With this information, the
driver can steer a session to the same CPU that the scheduler runs the
socket reads on, and achieve the best cache locality for both kernel and
user level rx processing."
So far as I'm aware, this hasn't been done. If it has, I would love to know
how to work with it.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs
2008-03-13 11:47 ` Ben Hutchings
@ 2008-03-13 12:53 ` David Miller
0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2008-03-13 12:53 UTC (permalink / raw)
To: bhutchings; +Cc: therbert, jesse.brandeburg, maxk, netdev, rostedt, mingo
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 13 Mar 2008 11:47:19 +0000
> The specific part I was referring to was:
>
> "This scheme can be further improved upon, if the host tells the driver what
> CPU it wished to run a particular session on. With this information, the
> driver can steer a session to the same CPU that the scheduler runs the
> socket reads on, and achieve the best cache locality for both kernel and
> user level rx processing."
>
> So far as I'm aware, this hasn't been done. If it has, I would love to know
> how to work with it.
That bit is not implemented, no.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2008-03-13 12:53 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-05 20:51 RFC [PATCH net-2.6 1/6] net: Scheduling softirqs between CPUSs Tom Herbert
2008-03-05 21:21 ` David Miller
2008-03-07 19:02 ` Max Krasnyanskiy
2008-03-10 23:26 ` Tom Herbert
2008-03-11 1:06 ` Brandeburg, Jesse
2008-03-11 16:20 ` Tom Herbert
2008-03-11 16:52 ` Ben Hutchings
2008-03-11 23:48 ` Tom Herbert
2008-03-12 15:10 ` Ben Hutchings
2008-03-13 3:41 ` David Miller
2008-03-13 11:47 ` Ben Hutchings
2008-03-13 12:53 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).