From: Jens Axboe <jens.axboe@oracle.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org, Alan.Brunelle@hp.com,
arjan@linux.intel.com, dgc@sgi.com, npiggin@suse.de
Subject: Re: [PATCH 4/8] x86: add support for remotely triggering the block softirq
Date: Thu, 7 Feb 2008 15:18:47 +0100 [thread overview]
Message-ID: <20080207141847.GK15220@kernel.dk> (raw)
In-Reply-To: <20080207103804.GE16735@elte.hu>
On Thu, Feb 07 2008, Ingo Molnar wrote:
>
> * Jens Axboe <jens.axboe@oracle.com> wrote:
>
> > > pick up the threaded softirq patches from -rt, those move all
> > > softirqs processing into kernel threads. I'd suggest to extend those
> > > via wakeup-from-remote functionality - it fits the construct quite
> > > naturally. You should also be able to directly observe any
> > > performance impact of threaded softirq handlers. (and if you find
> > > any, let me know so that we can make it faster :-)
> >
> > I was just considering that, since I knew -rt moved the softirqs into
> > threads. I'll look into it, but may not post anything until after my
> > vacation.
>
> we should more seriously investigate kernel thread scalability for
> another reason as well: besides -rt, any generic async IO facility we
> pick up will likely heavily rely on them. Kernel thread scheduling is
> quite a bit lighter than user task scheduling [no TLB flushes, etc.] -
> and if it is still not good enough we could probably accelerate them
> some more. (and everyone will benefit) irq-context softirqs on the other
> hand are quite rigid and bring in many atomicity assumptions so they are
> not as natural to program for.
Here's a patch on top of the other patchset that switches blk-softirq.c
to using kernel threads instead. It's pretty simple. Booted tested,
works for me :-)
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 5ae8345..73c71c7 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -7,6 +7,8 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
#include <linux/cpu.h>
#include "blk.h"
@@ -14,41 +16,136 @@
struct blk_comp {
spinlock_t lock;
struct list_head list;
- int dead;
+ struct task_struct *task;
};
-static DEFINE_PER_CPU(struct blk_comp, blk_cpu_done);
+static DEFINE_PER_CPU(struct blk_comp, blk_irq_data);
+
+static void raise_blk_irq(int cpu)
+{
+ struct blk_comp *bc = &per_cpu(blk_irq_data, cpu);
+
+ wake_up_process(bc->task);
+}
+
+static void blk_done_softirq(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct request *rq;
+
+ rq = list_entry(list->next, struct request, donelist);
+ list_del_init(&rq->donelist);
+ rq->q->softirq_done_fn(rq);
+ }
+}
+
+static int blk_irq_thread(void *data)
+{
+ struct blk_comp *bc = data;
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+ sched_setscheduler(current, SCHED_FIFO, ¶m);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop()) {
+ LIST_HEAD(local_list);
+
+ preempt_disable();
+ smp_mb();
+ if (list_empty(&bc->list)) {
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ smp_mb();
+ while (!list_empty(&bc->list)) {
+ spin_lock_irq(&bc->lock);
+ list_replace_init(&bc->list, &local_list);
+ spin_unlock_irq(&bc->lock);
+
+
+ blk_done_softirq(&local_list);
+ }
+ preempt_enable();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+static int create_blk_irq_thread(int cpu)
+{
+ struct blk_comp *bc = &per_cpu(blk_irq_data, cpu);
+ struct task_struct *task;
+
+ spin_lock_init(&bc->lock);
+ INIT_LIST_HEAD(&bc->list);
+ bc->task = NULL;
+
+ task = kthread_create(blk_irq_thread, bc, "blk-irq-%d", cpu);
+ if (IS_ERR(task)) {
+ printk("block: irq thread creation failed\n");
+ return 1;
+ }
+
+ kthread_bind(task, cpu);
+ bc->task = task;
+ return 0;
+}
static int __cpuinit blk_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (unsigned long) hcpu;
+ struct task_struct *task;
struct blk_comp *bc;
switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ bc = &per_cpu(blk_irq_data, cpu);
+ if (bc->task) {
+ printk("blk: task for %d already there\n", cpu);
+ break;
+ }
+ if (create_blk_irq_thread(cpu))
+ return NOTIFY_BAD;
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ raise_blk_irq(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ bc = &per_cpu(blk_irq_data, cpu);
+ if (!bc->task);
+ break;
+ kthread_bind(bc->task, any_online_cpu(cpu_online_map));
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
+ struct sched_param param;
struct blk_comp *dead;
local_irq_disable();
- bc = &__get_cpu_var(blk_cpu_done);
- dead = &per_cpu(blk_cpu_done, cpu);
+ bc = &__get_cpu_var(blk_irq_data);
+ dead = &per_cpu(blk_irq_data, cpu);
double_spin_lock(&bc->lock, &dead->lock, bc < dead);
list_splice_init(&dead->list, &bc->list);
- dead->dead = 1;
double_spin_unlock(&bc->lock, &dead->lock, bc < dead);
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
- local_irq_enable();
- break;
- }
- case CPU_ONLINE: {
- local_irq_disable();
- bc = &per_cpu(blk_cpu_done, cpu);
- bc->dead = 0;
+
+ param.sched_priority = MAX_RT_PRIO - 1;
+ task = dead->task;
+ sched_setscheduler(task, SCHED_FIFO, ¶m);
+ dead->task = NULL;
+ raise_blk_irq(smp_processor_id());
local_irq_enable();
+ kthread_stop(task);
break;
}
}
@@ -56,35 +153,10 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
+static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};
-/*
- * splice the completion data to a local structure and hand off to
- * process_completion_queue() to complete the requests
- */
-static void blk_done_softirq(struct softirq_action *h)
-{
- struct blk_comp *bc;
- struct list_head local_list;
-
- local_irq_disable();
- bc = &__get_cpu_var(blk_cpu_done);
- spin_lock(&bc->lock);
- list_replace_init(&bc->list, &local_list);
- spin_unlock(&bc->lock);
- local_irq_enable();
-
- while (!list_empty(&local_list)) {
- struct request *rq;
-
- rq = list_entry(local_list.next, struct request, donelist);
- list_del_init(&rq->donelist);
- rq->q->softirq_done_fn(rq);
- }
-}
-
/**
* blk_complete_request - end I/O on a request
* @req: the request being processed
@@ -117,11 +189,11 @@ void blk_complete_request(struct request *req)
if (ccpu == cpu) {
is_dead:
- bc = &__get_cpu_var(blk_cpu_done);
+ bc = &__get_cpu_var(blk_irq_data);
q->local_complete++;
} else {
- bc = &per_cpu(blk_cpu_done, ccpu);
- if (unlikely(bc->dead)) {
+ bc = &per_cpu(blk_irq_data, ccpu);
+ if (unlikely(!bc->task)) {
ccpu = cpu;
goto is_dead;
}
@@ -134,30 +206,27 @@ is_dead:
list_add_tail(&req->donelist, &bc->list);
spin_unlock(&bc->lock);
- if (was_empty) {
- if (cpu == ccpu)
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
- else
- raise_softirq_on_cpu(ccpu, BLOCK_SOFTIRQ);
- }
+ if (was_empty)
+ raise_blk_irq(ccpu);
local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_complete_request);
-static int __init blk_softirq_init(void)
+__init int blk_softirq_init(void)
{
int i;
for_each_possible_cpu(i) {
- struct blk_comp *bc = &per_cpu(blk_cpu_done, i);
+ void *cpu;
+ int err;
- spin_lock_init(&bc->lock);
- INIT_LIST_HEAD(&bc->list);
- bc->dead = 0;
+ cpu = (void *) (long) i;
+ err = blk_cpu_notify(&blk_cpu_notifier, CPU_UP_PREPARE, cpu);
+ BUG_ON(err == NOTIFY_BAD);
+ blk_cpu_notify(&blk_cpu_notifier, CPU_ONLINE, cpu);
}
- open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
register_hotcpu_notifier(&blk_cpu_notifier);
return 0;
}
--
Jens Axboe
next prev parent reply other threads:[~2008-02-07 14:19 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-07 9:18 [PATCH 0/8] IO queuing and complete affinity Jens Axboe
2008-02-07 9:18 ` [PATCH 1/8] block: split softirq handling into blk-softirq.c Jens Axboe
2008-02-07 9:18 ` [PATCH 2/8] Add interface for queuing work on a specific CPU Jens Axboe
2008-02-07 9:45 ` Andrew Morton
2008-02-07 9:49 ` Jens Axboe
2008-02-07 17:44 ` Harvey Harrison
2008-02-11 10:51 ` Oleg Nesterov
2008-02-07 9:19 ` [PATCH 3/8] block: make kblockd_schedule_work() take the queue as parameter Jens Axboe
2008-02-07 9:19 ` [PATCH 4/8] x86: add support for remotely triggering the block softirq Jens Axboe
2008-02-07 10:07 ` Ingo Molnar
2008-02-07 10:17 ` Jens Axboe
2008-02-07 10:25 ` Ingo Molnar
2008-02-07 10:31 ` Jens Axboe
2008-02-07 10:38 ` Ingo Molnar
2008-02-07 14:18 ` Jens Axboe [this message]
2008-02-07 10:49 ` [patch] block layer: kmemcheck fixes Ingo Molnar
2008-02-07 17:42 ` Linus Torvalds
2008-02-07 17:55 ` Jens Axboe
2008-02-07 19:31 ` Ingo Molnar
2008-02-07 20:06 ` Jens Axboe
2008-02-08 1:22 ` David Miller
2008-02-08 1:28 ` Linus Torvalds
2008-02-08 15:09 ` Arjan van de Ven
2008-02-08 22:44 ` Nick Piggin
2008-02-08 22:56 ` Arjan van de Ven
2008-02-08 23:58 ` Nick Piggin
2008-02-08 11:38 ` Jens Axboe
2008-02-07 9:19 ` [PATCH 5/8] x86-64: add support for remotely triggering the block softirq Jens Axboe
2008-02-07 9:19 ` [PATCH 6/8] ia64: " Jens Axboe
2008-02-07 9:19 ` [PATCH 7/8] kernel: add generic softirq interface for triggering a remote softirq Jens Axboe
2008-02-07 9:19 ` [PATCH 8/8] block: add test code for testing CPU affinity Jens Axboe
2008-02-07 15:16 ` [PATCH 0/8] IO queuing and complete affinity Alan D. Brunelle
2008-02-07 18:25 ` IO queuing and complete affinity with threads (was Re: [PATCH 0/8] IO queuing and complete affinity) Jens Axboe
2008-02-07 20:40 ` Alan D. Brunelle
2008-02-08 7:38 ` Nick Piggin
2008-02-08 7:47 ` Jens Axboe
2008-02-08 7:53 ` Nick Piggin
2008-02-08 7:59 ` Jens Axboe
2008-02-08 8:12 ` Nick Piggin
2008-02-08 8:24 ` Jens Axboe
2008-02-08 8:33 ` Nick Piggin
2008-02-11 5:22 ` David Chinner
2008-02-12 8:28 ` Jeremy Higdon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080207141847.GK15220@kernel.dk \
--to=jens.axboe@oracle.com \
--cc=Alan.Brunelle@hp.com \
--cc=arjan@linux.intel.com \
--cc=dgc@sgi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=npiggin@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.