From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: xen-devel@lists.xenproject.org, keir@xen.org
Subject: Re: [RFC PATCH v1] Replace tasklets with per-cpu implementation.
Date: Tue, 2 Sep 2014 16:28:34 -0400 [thread overview]
Message-ID: <20140902202834.GC16610@laptop.dumpdata.com> (raw)
In-Reply-To: <5400A5EB020000780002F118@mail.emea.novell.com>
On Fri, Aug 29, 2014 at 03:10:19PM +0100, Jan Beulich wrote:
> >>> On 29.08.14 at 15:46, <konrad.wilk@oracle.com> wrote:
> > On Thu, Aug 28, 2014 at 01:39:54PM +0100, Jan Beulich wrote:
> >> >>> On 27.08.14 at 19:58, <konrad.wilk@oracle.com> wrote:
> >> > The 'hvm_do_IRQ_dpci' is the on that is most often scheduled
> >> > and run. The performance bottleneck comes from the fact that
> >> > we take the same spinlock three times: tasklet_schedule,
> >> > when we are about to execute the tasklet, and when we are
> >> > done executing the tasklet.
> >>
> >> Before starting all the work here, did you investigate alternatives
> >> to this specific use of a tasklet? E.g., it being a softirq one, making
> >> it have its own softirq?
> >
> > If I understand you right, you mean implement an tasklet API that
> > would only been be used by the hvm_do_IRQ_dpci? Its own spinlock,
> > list, and an seperate tasklet_schedule?
>
> No, just a new softirq type, e.g. HVM_DPCI_SOFTIRQ (added to
> the enum in xen/include/xen/softirq.h and all the necessary
> handling put in place).
I typed this prototype up and asked folks with the right hardware to
test it. It _ought_ to, thought I think that the tasklet code
still could use an overhaul.
>From deecf148e0061027c61af30882eee76a66299686 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 29 Aug 2014 13:40:09 -0400
Subject: [PATCH] dpci: Replace tasklet with an softirq
PROTOTYPE.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
xen/drivers/passthrough/io.c | 149 ++++++++++++++++++++++++++++++++++++++++--
xen/drivers/passthrough/pci.c | 5 +-
xen/include/xen/hvm/irq.h | 3 +
xen/include/xen/sched.h | 3 +
xen/include/xen/softirq.h | 1 +
5 files changed, 156 insertions(+), 5 deletions(-)
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index c83af68..4c8ff3b 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,10 +26,105 @@
#include <xen/hvm/irq.h>
#include <xen/tasklet.h>
+bool_t use_softirq;
+boolean_param("use_softirq", use_softirq);
+
struct rangeset *__read_mostly mmio_ro_ranges;
static void hvm_dirq_assist(unsigned long _d);
+static DEFINE_PER_CPU(struct list_head, dpci_list);
+
+enum {
+ STATE_SCHED,
+ STATE_RUN
+};
+static void schedule_dpci_for(struct domain *d)
+{
+ if ( !test_and_set_bit(STATE_SCHED, &d->state) )
+ {
+ unsigned long flags;
+ struct list_head *list;
+
+ local_irq_save(flags);
+ INIT_LIST_HEAD(&d->list);
+ list = &__get_cpu_var(dpci_list);
+ list_add_tail(&d->list, list);
+
+ local_irq_restore(flags);
+ raise_softirq(HVM_DPCI_SOFTIRQ);
+ }
+}
+
+void dpci_kill(struct domain *d)
+{
+ s_time_t s, now;
+ int i = 0;
+
+ s = NOW();
+ while ( test_and_set_bit(STATE_SCHED, &d->state) )
+ {
+ do {
+ now = NOW();
+ process_pending_softirqs();
+ if ( ((now - s) >> 30) > 5 )
+ {
+ s = now;
+ printk("%s stuck .. \n", __func__);
+ i++;
+ }
+ if ( i > 12 )
+ BUG();
+ } while ( test_bit(STATE_SCHED, &d->state) );
+ }
+ while (test_bit(STATE_RUN, &(d)->state))
+ {
+ cpu_relax();
+ }
+ clear_bit(STATE_SCHED, &d->state);
+}
+
+static void dpci_softirq(void)
+{
+
+ struct domain *d;
+ struct list_head *list;
+ struct list_head our_list;
+
+ local_irq_disable();
+ list = &__get_cpu_var(dpci_list);
+
+ INIT_LIST_HEAD(&our_list);
+ list_splice(list, &our_list);
+
+ INIT_LIST_HEAD(&__get_cpu_var(dpci_list));
+
+ local_irq_enable();
+
+ while (!list_empty(&our_list))
+ {
+ d = list_entry(our_list.next, struct domain, list);
+ list_del(&d->list);
+
+ if ( !test_and_set_bit(STATE_RUN, &(d)->state) )
+ {
+ if ( !test_and_clear_bit(STATE_SCHED, &d->state) )
+ BUG();
+ hvm_dirq_assist((unsigned long)d);
+ clear_bit(STATE_RUN, &(d)->state);
+ continue;
+ }
+
+ local_irq_disable();
+
+ INIT_LIST_HEAD(&d->list);
+ list_add_tail(&d->list, &__get_cpu_var(dpci_list));
+ local_irq_enable();
+
+ raise_softirq(HVM_DPCI_SOFTIRQ);
+ }
+}
+
bool_t pt_irq_need_timer(uint32_t flags)
{
return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
@@ -119,9 +214,13 @@ int pt_irq_create_bind_vtd(
return -ENOMEM;
}
memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
- percpu_tasklet_init(
- &hvm_irq_dpci->dirq_tasklet,
- hvm_dirq_assist, (unsigned long)d);
+ if ( !use_softirq )
+ percpu_tasklet_init(
+ &hvm_irq_dpci->dirq_tasklet,
+ hvm_dirq_assist, (unsigned long)d);
+ else
+ printk("%s: Using HVM_DPCI_SOFTIRQ for d%d.\n", __func__, d->domain_id);
+
hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
d->nr_pirqs);
hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
@@ -398,7 +497,10 @@ int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
return 0;
set_bit(mirq, dpci->dirq_mask);
- tasklet_schedule(&dpci->dirq_tasklet);
+ if ( !use_softirq )
+ tasklet_schedule(&dpci->dirq_tasklet);
+ else
+ schedule_dpci_for(d);
return 1;
}
@@ -574,11 +676,50 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
unlock:
spin_unlock(&d->event_lock);
}
+#include <xen/cpu.h>
+static int cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ printk("%s for CPU%d\n", __func__, cpu);
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ BUG(); /* To implement. */
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+ .notifier_call = cpu_callback,
+ .priority = 99
+};
static int __init setup_mmio_ro_ranges(void)
{
mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
RANGESETF_prettyprint_hex);
+ printk("HVM_DPCI_SOFTIRQ is %s\n", use_softirq ? "active" : "offline");
+ if ( use_softirq )
+ {
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+
+ open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
+ register_cpu_notifier(&cpu_nfb);
+ }
return 0;
}
__initcall(setup_mmio_ro_ranges);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 111ac96..24900da 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -296,7 +296,10 @@ static void pci_clean_dpci_irqs(struct domain *d)
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+ if ( !use_softirq )
+ tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+ else
+ dpci_kill(d);
for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
i < d->nr_pirqs;
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index f21b02c..340293c 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -102,6 +102,9 @@ struct hvm_irq_dpci {
struct tasklet dirq_tasklet;
};
+extern bool_t use_softirq;
+void dpci_kill(struct domain *d);
+
/* Modify state of a PCI INTx wire. */
void hvm_pci_intx_assert(
struct domain *d, unsigned int device, unsigned int intx);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c04b25d..ba9982e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -332,6 +332,9 @@ struct domain
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
+ /* For HVM_DPCI_SOFTIRQ. Locking is bit wonky. */
+ struct list_head list;
+ unsigned long state;
};
struct domain_setup_info
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index c5b429c..7134727 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -9,6 +9,7 @@ enum {
RCU_SOFTIRQ,
TASKLET_SOFTIRQ_PERCPU,
TASKLET_SOFTIRQ,
+ HVM_DPCI_SOFTIRQ,
NR_COMMON_SOFTIRQS
};
--
1.9.3
>
> Jan
>
>
next prev parent reply other threads:[~2014-09-02 20:28 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-27 17:58 [RFC PATCH v1] Replace tasklets with per-cpu implementation Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 1/5] tasklet: Introduce per-cpu tasklet for softirq Konrad Rzeszutek Wilk
2014-08-27 18:53 ` Andrew Cooper
2014-08-27 19:06 ` Konrad Rzeszutek Wilk
2014-08-28 8:17 ` Jan Beulich
2014-08-27 17:58 ` [RFC PATCH v1 2/5] tasklet: Add cross CPU feeding of per-cpu tasklets Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 3/5] tasklet: Remove the old-softirq implementation Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 4/5] tasklet: Introduce per-cpu tasklet for schedule tasklet Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 5/5] tasklet: Remove the scaffolding Konrad Rzeszutek Wilk
2014-08-28 12:39 ` [RFC PATCH v1] Replace tasklets with per-cpu implementation Jan Beulich
2014-08-29 13:46 ` Konrad Rzeszutek Wilk
2014-08-29 14:10 ` Jan Beulich
2014-09-02 20:28 ` Konrad Rzeszutek Wilk [this message]
2014-09-03 8:03 ` Jan Beulich
2014-09-08 19:01 ` Konrad Rzeszutek Wilk
2014-09-09 9:01 ` Jan Beulich
2014-09-09 14:37 ` Konrad Rzeszutek Wilk
2014-09-09 16:37 ` Jan Beulich
2014-09-10 16:03 ` Konrad Rzeszutek Wilk
2014-09-10 16:25 ` Jan Beulich
2014-09-10 16:35 ` Konrad Rzeszutek Wilk
-- strict thread matches above, loose matches on Subject: below --
2014-08-29 20:58 Arianna Avanzini
2014-09-02 20:10 ` Konrad Rzeszutek Wilk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140902202834.GC16610@laptop.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=JBeulich@suse.com \
--cc=keir@xen.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).