From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Sander Eikelenboom <linux@eikelenboom.it>
Cc: xen-devel <xen-devel@lists.xenproject.org>,
Jan Beulich <JBeulich@suse.com>
Subject: Re: Xen-unstable: xen panic RIP: dpci_softirq
Date: Fri, 14 Nov 2014 15:25:13 -0500 [thread overview]
Message-ID: <20141114202513.GA3281@laptop.dumpdata.com> (raw)
In-Reply-To: <1393541150.20141114175923@eikelenboom.it>
[-- Attachment #1: Type: text/plain, Size: 795 bytes --]
On Fri, Nov 14, 2014 at 05:59:23PM +0100, Sander Eikelenboom wrote:
>
> Friday, November 14, 2014, 4:43:58 PM, you wrote:
>
> >>>> On 14.11.14 at 16:20, <linux@eikelenboom.it> wrote:
> >> If it still helps i could try Andrews suggestion and try out with only
> >> commit aeeea485 ..
>
> > Yes, even if it's pretty certain it's the second of the commits, verifying
> > this would be helpful (or if the assumption is wrong, the pattern it's
> > dying with would change and hence perhaps provide further clues).
>
> > Jan
>
>
> Ok with a revert of f6dd295 .. it survived cooking and eating a nice bowl of
> pasta without a panic. So it would probably be indeed that specific commit.
Could you try running with these two patches while you enjoy an beer in the evening?
>
> --
> Sander
>
[-- Attachment #2: 0001-dpci-Add-ZOMBIE-state.patch --]
[-- Type: text/plain, Size: 3334 bytes --]
>From b8c267ea34663a9585e1aee9c09dede240b546f9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 14 Nov 2014 12:15:26 -0500
Subject: [PATCH 1/2] dpci: Add ZOMBIE state.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
xen/drivers/passthrough/io.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index efc66dc..8e9141e 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -50,20 +50,25 @@ static DEFINE_PER_CPU(struct list_head, dpci_list);
enum {
STATE_SCHED,
- STATE_RUN
+ STATE_RUN,
+ STATE_ZOMBIE
};
/*
* This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
- * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
- * the softirq had a chance to run).
+ * That is until the STATE_SCHED and STATE_ZOMBIE state has been cleared. The
+ * STATE_SCHED and STATE_ZOMBIE state can be cleared by the 'dpci_softirq'
+ * (when it has executed 'hvm_dirq_assist'). The STATE_SCHED can be cleared
+ * by 'pt_pirq_softirq_reset' (which will try to clear the state before the
+ * softirq had a chance to run).
*/
static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
{
unsigned long flags;
+ if ( test_bit(STATE_ZOMBIE, &pirq_dpci->state) )
+ return;
+
if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
return;
@@ -85,7 +90,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
*/
bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
{
- if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
+ if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED) | (1 << STATE_ZOMBIE) ) )
return 1;
/*
@@ -109,7 +114,7 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
ASSERT(spin_is_locked(&d->event_lock));
- switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
+ switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 1 << STATE_ZOMBIE ) )
{
case (1 << STATE_SCHED):
/*
@@ -120,6 +125,7 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
/* fallthrough. */
case (1 << STATE_RUN):
case (1 << STATE_RUN) | (1 << STATE_SCHED):
+ case (1 << STATE_RUN) | (1 << STATE_SCHED) | (1 << STATE_ZOMBIE):
/*
* The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
* to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
@@ -779,6 +785,7 @@ unlock:
static void dpci_softirq(void)
{
unsigned int cpu = smp_processor_id();
+ unsigned int reset = 0;
LIST_HEAD(our_list);
local_irq_disable();
@@ -805,7 +812,15 @@ static void dpci_softirq(void)
hvm_dirq_assist(d, pirq_dpci);
put_domain(d);
}
+ else
+ reset = 1;
+
clear_bit(STATE_RUN, &pirq_dpci->state);
+ if ( reset )
+ {
+ clear_bit(STATE_ZOMBIE, &pirq_dpci->state);
+ reset = 0;
+ }
}
}
--
1.9.3
[-- Attachment #3: 0002-debug.patch --]
[-- Type: text/plain, Size: 9583 bytes --]
>From a4171fa12583eabd126bc5b4c305f49b2fb2b515 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 14 Nov 2014 15:00:39 -0500
Subject: [PATCH 2/2] debug:
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
xen/drivers/passthrough/io.c | 180 ++++++++++++++++++++++++++++++++++++++++++-
xen/include/xen/hvm/irq.h | 2 +
2 files changed, 178 insertions(+), 4 deletions(-)
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 8e9141e..23e5ed1 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -54,6 +54,117 @@ enum {
STATE_ZOMBIE
};
+struct _debug_f {
+ unsigned int domid;
+ unsigned long count;
+ s_time_t last;
+ struct list_head list;
+ unsigned int state;
+ struct hvm_pirq_dpci *dpci;
+};
+
+struct __debug {
+ struct _debug_f ok;
+ struct _debug_f poison;
+ struct _debug_f raise;
+ struct _debug_f reset;
+ struct _debug_f zombie_softirq;
+ struct _debug_f zombie_raise;
+};
+
+static DEFINE_PER_CPU(struct __debug, _d);
+
+void _record(struct _debug_f *d, struct hvm_pirq_dpci *pirq_dpci)
+{
+ if (pirq_dpci->dom)
+ d->domid = pirq_dpci->dom->domain_id;
+ else
+ d->domid = 0xdead;
+ d->count ++;
+ d->last = NOW();
+ d->list.next = pirq_dpci->softirq_list.next;
+ d->list.prev = pirq_dpci->softirq_list.prev;
+ d->state = pirq_dpci->state;
+ d->dpci = pirq_dpci;
+}
+
+enum {
+ Z_SOFTIRQ,
+ Z_RAISE,
+ ERR_POISON,
+ OK_SOFTIRQ,
+ OK_RAISE,
+ OK_RESET,
+};
+
+static void dump_record(struct _debug_f *d, unsigned int type)
+{
+ static const char *const names[] = {
+ [Z_SOFTIRQ] = "Z-softirq ",
+ [Z_RAISE] = "Z-raise ",
+ [ERR_POISON] = "ERR-poison",
+ [OK_SOFTIRQ] = "OK-softirq",
+ [OK_RAISE] = "OK-raise ",
+ [OK_RESET] = "OK-reset ",
+ };
+#define LONG(x) [_HVM_IRQ_DPCI_##x] = #x
+ static const char *const names_flag[] = {
+ LONG(MACH_PCI_SHIFT),
+ LONG(MAPPED_SHIFT),
+ LONG(EOI_LATCH_SHIFT),
+ LONG(GUEST_PCI_SHIFT),
+ LONG(GUEST_MSI_SHIFT),
+ };
+#undef LONG
+ unsigned int i;
+ s_time_t now;
+
+ if ( d->domid == 0 )
+ return;
+
+ if ( type >= ARRAY_SIZE(names) )
+ BUG();
+
+ now = NOW();
+ printk("d%d %s %lumsec ago, state:%x, %ld count, [prev:%p, next:%p] ",
+ d->domid, names[type],
+ (unsigned long)((now - d->last) / MILLISECS(1)),
+ d->state, d->count, d->list.prev, d->list.next);
+
+ if ( d->dpci )
+ {
+ struct hvm_pirq_dpci *pirq_dpci = d->dpci;
+
+ for ( i = 0; i <= _HVM_IRQ_DPCI_GUEST_MSI_SHIFT; i++ )
+ if ( pirq_dpci->flags & 1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT )
+ printk("%s ", names_flag[i]);
+
+ printk(" PIRQ:%d", pirq_dpci->pirq);
+ if (pirq_dpci->line)
+ printk(" LINE: %d", pirq_dpci->line);
+ }
+ printk("\n");
+ memset(d, 0, sizeof(struct _debug_f));
+}
+
+static void dump_debug(unsigned char key)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu ( cpu )
+ {
+ struct __debug *d;
+ d = &per_cpu(_d, cpu);
+
+ printk("CPU%02d: \n" ,cpu);
+ dump_record(&d->ok, OK_SOFTIRQ);
+ dump_record(&d->raise, OK_RAISE);
+ dump_record(&d->reset, OK_RESET);
+ dump_record(&d->poison, ERR_POISON);
+ dump_record(&d->zombie_softirq, Z_SOFTIRQ);
+ dump_record(&d->zombie_raise, Z_RAISE);
+ }
+}
/*
* This can be called multiple times, but the softirq is only raised once.
* That is until the STATE_SCHED and STATE_ZOMBIE state has been cleared. The
@@ -65,13 +176,18 @@ enum {
static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
{
unsigned long flags;
+ struct __debug *d = &__get_cpu_var(_d);
if ( test_bit(STATE_ZOMBIE, &pirq_dpci->state) )
+ {
+ _record(&d->zombie_raise, pirq_dpci);
return;
-
+ }
if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
return;
+ _record(&d->raise, pirq_dpci);
+
get_knownalive_domain(pirq_dpci->dom);
local_irq_save(flags);
@@ -111,9 +227,12 @@ bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
{
struct domain *d = pirq_dpci->dom;
+ struct __debug *debug = &__get_cpu_var(_d);
ASSERT(spin_is_locked(&d->event_lock));
+ _record(&debug->reset, pirq_dpci);
+
switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 1 << STATE_ZOMBIE ) )
{
case (1 << STATE_SCHED):
@@ -277,6 +396,7 @@ int pt_irq_create_bind(
* As such on every 'pt_irq_create_bind' call we MUST set it.
*/
pirq_dpci->dom = d;
+ pirq_dpci->pirq = pirq;
/* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
rc = pirq_guest_bind(d->vcpu[0], info, 0);
if ( rc == 0 && pt_irq_bind->u.msi.gtable )
@@ -291,6 +411,7 @@ int pt_irq_create_bind(
* to be scheduled but we must deal with the one that may be
* in the queue.
*/
+ pirq_dpci->line = __LINE__;
pt_pirq_softirq_reset(pirq_dpci);
}
}
@@ -300,6 +421,7 @@ int pt_irq_create_bind(
pirq_dpci->gmsi.gvec = 0;
pirq_dpci->dom = NULL;
pirq_dpci->flags = 0;
+ pirq_dpci->pirq = -pirq;
pirq_cleanup_check(info, d);
spin_unlock(&d->event_lock);
return rc;
@@ -544,6 +666,7 @@ int pt_irq_destroy_bind(
* See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
* call to pt_pirq_softirq_reset.
*/
+ pirq_dpci->line = __LINE__;
pt_pirq_softirq_reset(pirq_dpci);
pirq_cleanup_check(pirq, d);
@@ -778,6 +901,8 @@ unlock:
spin_unlock(&d->event_lock);
}
+#include <xen/console.h>
+
/*
* Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
* doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
@@ -787,6 +912,9 @@ static void dpci_softirq(void)
unsigned int cpu = smp_processor_id();
unsigned int reset = 0;
LIST_HEAD(our_list);
+ struct __debug *debug;
+
+ debug = &per_cpu(_d, cpu);
local_irq_disable();
list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
@@ -796,9 +924,22 @@ static void dpci_softirq(void)
{
struct hvm_pirq_dpci *pirq_dpci;
struct domain *d;
+ struct list_head *entry;
pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list);
- list_del(&pirq_dpci->softirq_list);
+ entry = &pirq_dpci->softirq_list;
+ if ( entry->next == LIST_POISON1 || entry->next == LIST_POISON2 ||
+ entry->prev == LIST_POISON2 || entry->prev == LIST_POISON2 )
+ {
+ _record(&debug->poison, pirq_dpci);
+ console_start_sync();
+ dump_debug((char)0);
+ console_end_sync();
+ domain_crash(pirq_dpci->dom);
+ break;
+ }
+ _record(&debug->ok, pirq_dpci);
+ list_del(entry);
d = pirq_dpci->dom;
smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
@@ -813,8 +954,10 @@ static void dpci_softirq(void)
put_domain(d);
}
else
+ {
+ _record(&debug->zombie_softirq, pirq_dpci);
reset = 1;
-
+ }
clear_bit(STATE_RUN, &pirq_dpci->state);
if ( reset )
{
@@ -833,6 +976,7 @@ static int cpu_callback(
{
case CPU_UP_PREPARE:
INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+ memset(&per_cpu(_d, cpu), 0, sizeof(struct __debug));
break;
case CPU_UP_CANCELED:
case CPU_DEAD:
@@ -854,15 +998,43 @@ static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback,
};
+#include <xen/keyhandler.h>
+
+static struct keyhandler dump_debug_keyhandler = {
+ .diagnostic = 1,
+ .u.fn = dump_debug,
+ .desc = "dpci debug stats"
+};
+static struct timer debug_timer;
+static s_time_t last_time = 0;
+static unsigned int debug_cnt = 0;
+
+static void debug_timer_fn(void *d)
+{
+ if ( ( debug_cnt ++ % 10 ) == 0 )
+ printk("--MARK--\n");
+
+ last_time = NOW();
+ set_timer(&debug_timer, last_time + SECONDS(1));
+}
+
static int __init setup_dpci_softirq(void)
{
unsigned int cpu;
for_each_online_cpu(cpu)
+ {
INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
-
+ memset(&per_cpu(_d, cpu), 0, sizeof(struct __debug));
+ }
open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
register_cpu_notifier(&cpu_nfb);
+
+ init_timer(&debug_timer, debug_timer_fn, NULL, smp_processor_id());
+ last_time = NOW();
+ set_timer(&debug_timer, NOW() + SECONDS(1));
+ register_keyhandler('k', &dump_debug_keyhandler);
+
return 0;
}
__initcall(setup_dpci_softirq);
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 9709397..1fb1292 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -100,6 +100,8 @@ struct hvm_pirq_dpci {
struct hvm_gmsi_info gmsi;
struct timer timer;
struct list_head softirq_list;
+ unsigned int pirq;
+ unsigned int line;
};
void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
--
1.9.3
[-- Attachment #4: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2014-11-14 20:25 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-14 13:11 Xen-unstable: xen panic RIP: dpci_softirq Sander Eikelenboom
2014-11-14 13:32 ` Andrew Cooper
2014-11-14 13:37 ` Andrew Cooper
2014-11-14 13:57 ` Jan Beulich
2014-11-14 14:34 ` Sander Eikelenboom
2014-11-14 15:09 ` Jan Beulich
2014-11-14 15:20 ` Sander Eikelenboom
2014-11-14 15:43 ` Jan Beulich
2014-11-14 16:59 ` Sander Eikelenboom
2014-11-14 17:05 ` Konrad Rzeszutek Wilk
2014-11-14 19:56 ` Sander Eikelenboom
2014-11-14 20:25 ` Konrad Rzeszutek Wilk [this message]
2014-11-14 22:09 ` Sander Eikelenboom
2014-11-17 16:34 ` Konrad Rzeszutek Wilk
2014-11-17 17:04 ` Sander Eikelenboom
2014-11-17 20:43 ` Konrad Rzeszutek Wilk
2014-11-17 22:40 ` Sander Eikelenboom
2014-11-18 8:28 ` Jan Beulich
[not found] ` <20141118024927.GA32256@andromeda.dapyr.net>
2014-11-18 11:07 ` Sander Eikelenboom
2014-11-18 15:09 ` Sander Eikelenboom
2014-11-18 16:06 ` Sander Eikelenboom
2014-11-18 16:16 ` Konrad Rzeszutek Wilk
2014-11-18 17:03 ` Sander Eikelenboom
2014-11-18 17:20 ` Jan Beulich
2014-11-18 20:34 ` Konrad Rzeszutek Wilk
2014-11-18 20:56 ` Konrad Rzeszutek Wilk
2014-11-18 22:12 ` Sander Eikelenboom
2014-11-19 1:55 ` Konrad Rzeszutek Wilk
2014-11-19 11:16 ` Sander Eikelenboom
2014-11-19 15:04 ` Konrad Rzeszutek Wilk
2014-11-19 17:27 ` Sander Eikelenboom
2014-11-17 18:01 ` Sander Eikelenboom
2014-11-18 9:30 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141114202513.GA3281@laptop.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=JBeulich@suse.com \
--cc=linux@eikelenboom.it \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.