xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: xen-devel@lists.xenproject.org, keir@xen.org
Subject: Re: [RFC PATCH v1] Replace tasklets with per-cpu implementation.
Date: Tue, 2 Sep 2014 16:28:34 -0400	[thread overview]
Message-ID: <20140902202834.GC16610@laptop.dumpdata.com> (raw)
In-Reply-To: <5400A5EB020000780002F118@mail.emea.novell.com>

On Fri, Aug 29, 2014 at 03:10:19PM +0100, Jan Beulich wrote:
> >>> On 29.08.14 at 15:46, <konrad.wilk@oracle.com> wrote:
> > On Thu, Aug 28, 2014 at 01:39:54PM +0100, Jan Beulich wrote:
> >> >>> On 27.08.14 at 19:58, <konrad.wilk@oracle.com> wrote:
> >> > The 'hvm_do_IRQ_dpci' is the on that is most often scheduled
> >> > and run. The performance bottleneck comes from the fact that
> >> > we take the same spinlock three times: tasklet_schedule,
> >> > when we are about to execute the tasklet, and when we are
> >> > done executing the tasklet.
> >> 
> >> Before starting all the work here, did you investigate alternatives
> >> to this specific use of a tasklet? E.g., it being a softirq one, making
> >> it have its own softirq?
> > 
> > If I understand you right, you mean implement an tasklet API that
> > would only been be used by the hvm_do_IRQ_dpci? Its own spinlock,
> > list, and an seperate tasklet_schedule?
> 
> No, just a new softirq type, e.g. HVM_DPCI_SOFTIRQ (added to
> the enum in xen/include/xen/softirq.h and all the necessary
> handling put in place).

I typed this prototype up and asked folks with the right hardware to
test it. It _ought_ to, thought I think that the tasklet code
still could use an overhaul.


>From deecf148e0061027c61af30882eee76a66299686 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 29 Aug 2014 13:40:09 -0400
Subject: [PATCH] dpci: Replace tasklet with an softirq

PROTOTYPE.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 xen/drivers/passthrough/io.c  | 149 ++++++++++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/pci.c |   5 +-
 xen/include/xen/hvm/irq.h     |   3 +
 xen/include/xen/sched.h       |   3 +
 xen/include/xen/softirq.h     |   1 +
 5 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index c83af68..4c8ff3b 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -26,10 +26,105 @@
 #include <xen/hvm/irq.h>
 #include <xen/tasklet.h>
 
+bool_t use_softirq;
+boolean_param("use_softirq", use_softirq);
+
 struct rangeset *__read_mostly mmio_ro_ranges;
 
 static void hvm_dirq_assist(unsigned long _d);
 
+static DEFINE_PER_CPU(struct list_head, dpci_list);
+
+enum {
+    STATE_SCHED,
+    STATE_RUN
+};
+static void schedule_dpci_for(struct domain *d)
+{
+    if ( !test_and_set_bit(STATE_SCHED, &d->state) )
+    {
+        unsigned long flags;
+        struct list_head *list;
+
+        local_irq_save(flags);
+        INIT_LIST_HEAD(&d->list);
+        list = &__get_cpu_var(dpci_list);
+        list_add_tail(&d->list, list);
+
+        local_irq_restore(flags);
+        raise_softirq(HVM_DPCI_SOFTIRQ);
+    }
+}
+
+void dpci_kill(struct domain *d)
+{
+    s_time_t s, now;
+    int i = 0;
+
+    s = NOW();
+    while ( test_and_set_bit(STATE_SCHED, &d->state) )
+    {
+        do {
+            now = NOW();
+            process_pending_softirqs();
+            if ( ((now - s) >> 30) > 5 )
+            {
+                s = now;
+                printk("%s stuck .. \n", __func__);
+                i++;
+            }
+            if ( i > 12 )
+                BUG();
+        } while ( test_bit(STATE_SCHED, &d->state) );
+    }
+    while (test_bit(STATE_RUN, &(d)->state))
+    {
+        cpu_relax();
+    }
+    clear_bit(STATE_SCHED, &d->state);
+}
+
+static void dpci_softirq(void)
+{
+
+    struct domain *d;
+    struct list_head *list;
+    struct list_head our_list;
+
+    local_irq_disable();
+    list = &__get_cpu_var(dpci_list);
+
+    INIT_LIST_HEAD(&our_list);
+    list_splice(list, &our_list);
+
+    INIT_LIST_HEAD(&__get_cpu_var(dpci_list));
+
+    local_irq_enable();
+
+    while (!list_empty(&our_list))
+    {
+        d = list_entry(our_list.next, struct domain, list);
+        list_del(&d->list);
+
+        if ( !test_and_set_bit(STATE_RUN, &(d)->state) )
+        {
+            if ( !test_and_clear_bit(STATE_SCHED, &d->state) )
+                BUG();
+            hvm_dirq_assist((unsigned long)d);
+            clear_bit(STATE_RUN, &(d)->state);
+            continue;
+        }
+
+        local_irq_disable();
+
+        INIT_LIST_HEAD(&d->list);
+        list_add_tail(&d->list, &__get_cpu_var(dpci_list));
+        local_irq_enable();
+
+        raise_softirq(HVM_DPCI_SOFTIRQ);
+    }
+}
+
 bool_t pt_irq_need_timer(uint32_t flags)
 {
     return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
@@ -119,9 +214,13 @@ int pt_irq_create_bind_vtd(
             return -ENOMEM;
         }
         memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
-        percpu_tasklet_init(
-            &hvm_irq_dpci->dirq_tasklet,
-            hvm_dirq_assist, (unsigned long)d);
+        if ( !use_softirq )
+            percpu_tasklet_init(
+                &hvm_irq_dpci->dirq_tasklet,
+                hvm_dirq_assist, (unsigned long)d);
+        else
+            printk("%s: Using HVM_DPCI_SOFTIRQ for d%d.\n", __func__, d->domain_id);
+
         hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
                                            d->nr_pirqs);
         hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
@@ -398,7 +497,10 @@ int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
         return 0;
 
     set_bit(mirq, dpci->dirq_mask);
-    tasklet_schedule(&dpci->dirq_tasklet);
+    if ( !use_softirq )
+        tasklet_schedule(&dpci->dirq_tasklet);
+    else
+        schedule_dpci_for(d);
     return 1;
 }
 
@@ -574,11 +676,50 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
 unlock:
     spin_unlock(&d->event_lock);
 }
+#include <xen/cpu.h>
+static int cpu_callback(
+    struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+
+    printk("%s for CPU%d\n", __func__, cpu);
+
+    switch ( action )
+    {
+    case CPU_UP_PREPARE:
+        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+        break;
+    case CPU_UP_CANCELED:
+    case CPU_DEAD:
+        BUG(); /* To implement. */
+        break;
+    default:
+        break;
+    }
+
+    return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+    .notifier_call = cpu_callback,
+    .priority = 99
+};
 
 static int __init setup_mmio_ro_ranges(void)
 {
     mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
                                   RANGESETF_prettyprint_hex);
+    printk("HVM_DPCI_SOFTIRQ is %s\n", use_softirq ? "active" : "offline");
+    if ( use_softirq )
+    {
+        int cpu;
+
+        for_each_online_cpu(cpu)
+            INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+
+        open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
+        register_cpu_notifier(&cpu_nfb);
+    }
     return 0;
 }
 __initcall(setup_mmio_ro_ranges);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 111ac96..24900da 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -296,7 +296,10 @@ static void pci_clean_dpci_irqs(struct domain *d)
     hvm_irq_dpci = domain_get_irq_dpci(d);
     if ( hvm_irq_dpci != NULL )
     {
-        tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+        if ( !use_softirq )
+            tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
+        else
+            dpci_kill(d);
 
         for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
               i < d->nr_pirqs;
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index f21b02c..340293c 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -102,6 +102,9 @@ struct hvm_irq_dpci {
     struct tasklet dirq_tasklet;
 };
 
+extern bool_t use_softirq;
+void dpci_kill(struct domain *d);
+
 /* Modify state of a PCI INTx wire. */
 void hvm_pci_intx_assert(
     struct domain *d, unsigned int device, unsigned int intx);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c04b25d..ba9982e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -332,6 +332,9 @@ struct domain
     nodemask_t node_affinity;
     unsigned int last_alloc_node;
     spinlock_t node_affinity_lock;
+    /* For HVM_DPCI_SOFTIRQ. Locking is bit wonky. */
+    struct list_head list;
+    unsigned long state;
 };
 
 struct domain_setup_info
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index c5b429c..7134727 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -9,6 +9,7 @@ enum {
     RCU_SOFTIRQ,
     TASKLET_SOFTIRQ_PERCPU,
     TASKLET_SOFTIRQ,
+    HVM_DPCI_SOFTIRQ,
     NR_COMMON_SOFTIRQS
 };
 
-- 
1.9.3

> 
> Jan
> 
> 

  reply	other threads:[~2014-09-02 20:28 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-27 17:58 [RFC PATCH v1] Replace tasklets with per-cpu implementation Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 1/5] tasklet: Introduce per-cpu tasklet for softirq Konrad Rzeszutek Wilk
2014-08-27 18:53   ` Andrew Cooper
2014-08-27 19:06     ` Konrad Rzeszutek Wilk
2014-08-28  8:17     ` Jan Beulich
2014-08-27 17:58 ` [RFC PATCH v1 2/5] tasklet: Add cross CPU feeding of per-cpu tasklets Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 3/5] tasklet: Remove the old-softirq implementation Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 4/5] tasklet: Introduce per-cpu tasklet for schedule tasklet Konrad Rzeszutek Wilk
2014-08-27 17:58 ` [RFC PATCH v1 5/5] tasklet: Remove the scaffolding Konrad Rzeszutek Wilk
2014-08-28 12:39 ` [RFC PATCH v1] Replace tasklets with per-cpu implementation Jan Beulich
2014-08-29 13:46   ` Konrad Rzeszutek Wilk
2014-08-29 14:10     ` Jan Beulich
2014-09-02 20:28       ` Konrad Rzeszutek Wilk [this message]
2014-09-03  8:03         ` Jan Beulich
2014-09-08 19:01           ` Konrad Rzeszutek Wilk
2014-09-09  9:01             ` Jan Beulich
2014-09-09 14:37               ` Konrad Rzeszutek Wilk
2014-09-09 16:37                 ` Jan Beulich
2014-09-10 16:03                   ` Konrad Rzeszutek Wilk
2014-09-10 16:25                     ` Jan Beulich
2014-09-10 16:35                       ` Konrad Rzeszutek Wilk
  -- strict thread matches above, loose matches on Subject: below --
2014-08-29 20:58 Arianna Avanzini
2014-09-02 20:10 ` Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140902202834.GC16610@laptop.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=JBeulich@suse.com \
    --cc=keir@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).