* [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-09 10:46 ` Bharat Bhushan
0 siblings, 0 replies; 38+ messages in thread
From: Bharat Bhushan @ 2012-07-09 10:34 UTC (permalink / raw)
To: kvm-ppc, kvm, agraf, bharatb.yadav; +Cc: Bharat Bhushan
This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.
Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
-v2:
- Addressed the review comments
arch/powerpc/include/asm/kvm_book3s.h | 5 ++
arch/powerpc/include/asm/kvm_booke.h | 5 ++
arch/powerpc/include/asm/kvm_host.h | 3 +
arch/powerpc/include/asm/kvm_ppc.h | 3 +
arch/powerpc/include/asm/reg_booke.h | 7 ++
arch/powerpc/kvm/44x.c | 1 +
arch/powerpc/kvm/booke.c | 128 +++++++++++++++++++++++++++++++++
arch/powerpc/kvm/booke_emulate.c | 8 ++
arch/powerpc/kvm/powerpc.c | 31 +++++++-
include/linux/kvm.h | 2 +
10 files changed, 189 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a..7bbc6cd 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -446,6 +446,11 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
}
#endif
+static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index b7cd335..e5b86c1 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -100,4 +100,9 @@ static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.shared->msr;
}
+
+static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.tsr & TCR_WRC_MASK;
+}
#endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12f..01047f4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ spinlock_t wdt_lock;
+ struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
@@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
+ u8 watchdog_enable;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..e5cf4b9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_watchdog_func(unsigned long data);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
@@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
+extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
+ (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
+#endif
+
/* Bit definitions for the TSR. */
#define TSR_ENW 0x80000000 /* Enable Next Watchdog */
#define TSR_WIS 0x40000000 /* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc..a213aba 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,6 +28,7 @@
#include <asm/kvm_44x.h>
#include <asm/kvm_ppc.h>
+#include "booke.h"
#include "44x_tlb.h"
#include "booke.h"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..97f54ea 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
#ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
break;
+ case BOOKE_IRQPRIO_WATCHDOG:
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,101 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
return allowed;
}
+/*
+ * Return the number of jiffies until the next timeout. If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, that return NEXT_TIMER_MAX_DELTA
+ * instead.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+ u64 tb, wdt_tb, wdt_ticks = 0;
+ u64 nr_jiffies = 0;
+ u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+ wdt_tb = 1ULL << (63 - period);
+ tb = get_tb();
+ /*
+ * The watchdog timeout will hapeen when TB bit corresponding
+ * to watchdog will toggle from 0 to 1.
+ */
+ if (tb & wdt_tb)
+ wdt_ticks = wdt_tb;
+
+ wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+ /* Convert timebase ticks to jiffies */
+ nr_jiffies = wdt_ticks;
+
+ if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+ nr_jiffies++;
+
+ return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr_jiffies;
+
+ nr_jiffies = watchdog_next_timeout(vcpu);
+ spin_lock(&vcpu->arch.wdt_lock);
+ if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+ else
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock(&vcpu->arch.wdt_lock);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ u32 tsr, new_tsr;
+ int final;
+
+ do {
+ new_tsr = tsr = vcpu->arch.tsr;
+ final = 0;
+
+ /* Time out event */
+ if (tsr & TSR_ENW) {
+ if (tsr & TSR_WIS) {
+ new_tsr = (tsr & ~TCR_WRC_MASK) |
+ (vcpu->arch.tcr & TCR_WRC_MASK);
+ vcpu->arch.tcr &= ~TCR_WRC_MASK;
+ final = 1;
+ } else {
+ new_tsr = tsr | TSR_WIS;
+ }
+ } else {
+ new_tsr = tsr | TSR_ENW;
+ }
+ } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+ if (new_tsr & (TSR_WIS | TCR_WRC_MASK)) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * Stop running the watchdog timer after final expiration to
+ * prevent the host from being flooded with timers if the
+ * guest sets a short period.
+ * Timers will resume when TSR/TCR is updated next time.
+ */
+ if (!final)
+ arm_next_watchdog(vcpu);
+}
static void update_timer_ints(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
kvmppc_core_queue_dec(vcpu);
else
kvmppc_core_dequeue_dec(vcpu);
+
+ if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+ kvmppc_core_queue_watchdog(vcpu);
+ else
+ kvmppc_core_dequeue_watchdog(vcpu);
}
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -516,6 +616,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
}
+ if (vcpu->arch.tsr & TCR_WRC_MASK) {
+ kvm_run->exit_reason = KVM_EXIT_WDT;
+ ret = 0;
+ goto out;
+ }
+
kvm_guest_enter();
#ifdef CONFIG_PPC_FPU
@@ -977,6 +1083,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
}
}
+ if (vcpu->arch.tsr & TCR_WRC_MASK) {
+ run->exit_reason = KVM_EXIT_WDT;
+ r = RESUME_HOST | (r & RESUME_FLAG_NV);
+ }
return r;
}
@@ -1106,7 +1216,14 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ u32 old_tsr = vcpu->arch.tsr;
+
vcpu->arch.tsr = sregs->u.e.tsr;
+
+ if (vcpu->arch.watchdog_enable && ((old_tsr ^ vcpu->arch.tsr) &
+ (TSR_ENW | TSR_WIS | TCR_WRC_MASK)))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1267,6 +1384,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
+ if (vcpu->arch.watchdog_enable)
+ arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
@@ -1281,6 +1400,15 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
{
clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+ /*
+ * We may have stopped the watchdog due to
+ * being stuck on final expiration.
+ */
+ if (vcpu->arch.watchdog_enable && (tsr_bits & (TSR_ENW |
+ TSR_WIS | TCR_WRC_MASK)))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb..5a66ade 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
case SPRN_TCR:
+ /*
+ * WRC is a 2-bit field that is supposed to preserve its
+ * value once written to non-zero.
+ */
+ if (vcpu->arch.tcr & TCR_WRC_MASK) {
+ spr_val &= ~TCR_WRC_MASK;
+ spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+ }
kvmppc_set_tcr(vcpu, spr_val);
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc8..646c4da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,9 +38,13 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions) ||
- v->requests;
+ bool ret = !(v->arch.shared->msr & MSR_WE) ||
+ !!(v->arch.pending_exceptions) ||
+ v->requests;
+
+ ret = ret || kvmppc_get_tsr_wrc(v);
+
+ return ret;
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -237,6 +241,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_GET_PVINFO:
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
+ case KVM_CAP_PPC_WDT:
#endif
r = 1;
break;
@@ -386,13 +391,21 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
-
+#ifdef CONFIG_BOOKE
+ spin_lock_init(&vcpu->arch.wdt_lock);
+#endif
return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_destroy(vcpu);
+#ifdef CONFIG_BOOKE
+ spin_lock(&vcpu->arch.wdt_lock);
+ if (vcpu->arch.watchdog_enable)
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock(&vcpu->arch.wdt_lock);
+#endif
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -637,6 +650,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_WDT:
+ r = 0;
+ /* setup watchdog timer once */
+ if (!vcpu->arch.watchdog_enable)
+ setup_timer(&vcpu->arch.wdt_timer,
+ kvmppc_watchdog_func, (unsigned long)vcpu);
+ vcpu->arch.watchdog_enable = true;
+ break;
+#endif
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2ce09aa..b9fdb52 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@ struct kvm_pit_config {
#define KVM_EXIT_OSI 18
#define KVM_EXIT_PAPR_HCALL 19
#define KVM_EXIT_S390_UCONTROL 20
+#define KVM_EXIT_WDT 21
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -618,6 +619,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_PPC_WDT 81
#ifdef KVM_CAP_IRQ_ROUTING
--
1.7.0.4
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-09 10:46 ` Bharat Bhushan
0 siblings, 0 replies; 38+ messages in thread
From: Bharat Bhushan @ 2012-07-09 10:46 UTC (permalink / raw)
To: kvm-ppc, kvm, agraf, bharatb.yadav; +Cc: Bharat Bhushan
This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.
Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
-v2:
- Addressed the review comments
arch/powerpc/include/asm/kvm_book3s.h | 5 ++
arch/powerpc/include/asm/kvm_booke.h | 5 ++
arch/powerpc/include/asm/kvm_host.h | 3 +
arch/powerpc/include/asm/kvm_ppc.h | 3 +
arch/powerpc/include/asm/reg_booke.h | 7 ++
arch/powerpc/kvm/44x.c | 1 +
arch/powerpc/kvm/booke.c | 128 +++++++++++++++++++++++++++++++++
arch/powerpc/kvm/booke_emulate.c | 8 ++
arch/powerpc/kvm/powerpc.c | 31 +++++++-
include/linux/kvm.h | 2 +
10 files changed, 189 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a..7bbc6cd 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -446,6 +446,11 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
}
#endif
+static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index b7cd335..e5b86c1 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -100,4 +100,9 @@ static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.shared->msr;
}
+
+static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.tsr & TCR_WRC_MASK;
+}
#endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12f..01047f4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ spinlock_t wdt_lock;
+ struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
@@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
+ u8 watchdog_enable;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..e5cf4b9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_watchdog_func(unsigned long data);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
@@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
+extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
+ (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
+#endif
+
/* Bit definitions for the TSR. */
#define TSR_ENW 0x80000000 /* Enable Next Watchdog */
#define TSR_WIS 0x40000000 /* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc..a213aba 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,6 +28,7 @@
#include <asm/kvm_44x.h>
#include <asm/kvm_ppc.h>
+#include "booke.h"
#include "44x_tlb.h"
#include "booke.h"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..97f54ea 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
#ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
break;
+ case BOOKE_IRQPRIO_WATCHDOG:
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,101 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
return allowed;
}
+/*
+ * Return the number of jiffies until the next timeout. If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, that return NEXT_TIMER_MAX_DELTA
+ * instead.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+ u64 tb, wdt_tb, wdt_ticks = 0;
+ u64 nr_jiffies = 0;
+ u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+ wdt_tb = 1ULL << (63 - period);
+ tb = get_tb();
+ /*
+ * The watchdog timeout will hapeen when TB bit corresponding
+ * to watchdog will toggle from 0 to 1.
+ */
+ if (tb & wdt_tb)
+ wdt_ticks = wdt_tb;
+
+ wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+ /* Convert timebase ticks to jiffies */
+ nr_jiffies = wdt_ticks;
+
+ if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+ nr_jiffies++;
+
+ return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr_jiffies;
+
+ nr_jiffies = watchdog_next_timeout(vcpu);
+ spin_lock(&vcpu->arch.wdt_lock);
+ if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+ else
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock(&vcpu->arch.wdt_lock);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ u32 tsr, new_tsr;
+ int final;
+
+ do {
+ new_tsr = tsr = vcpu->arch.tsr;
+ final = 0;
+
+ /* Time out event */
+ if (tsr & TSR_ENW) {
+ if (tsr & TSR_WIS) {
+ new_tsr = (tsr & ~TCR_WRC_MASK) |
+ (vcpu->arch.tcr & TCR_WRC_MASK);
+ vcpu->arch.tcr &= ~TCR_WRC_MASK;
+ final = 1;
+ } else {
+ new_tsr = tsr | TSR_WIS;
+ }
+ } else {
+ new_tsr = tsr | TSR_ENW;
+ }
+ } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+ if (new_tsr & (TSR_WIS | TCR_WRC_MASK)) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * Stop running the watchdog timer after final expiration to
+ * prevent the host from being flooded with timers if the
+ * guest sets a short period.
+ * Timers will resume when TSR/TCR is updated next time.
+ */
+ if (!final)
+ arm_next_watchdog(vcpu);
+}
static void update_timer_ints(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
kvmppc_core_queue_dec(vcpu);
else
kvmppc_core_dequeue_dec(vcpu);
+
+ if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+ kvmppc_core_queue_watchdog(vcpu);
+ else
+ kvmppc_core_dequeue_watchdog(vcpu);
}
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -516,6 +616,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
}
+ if (vcpu->arch.tsr & TCR_WRC_MASK) {
+ kvm_run->exit_reason = KVM_EXIT_WDT;
+ ret = 0;
+ goto out;
+ }
+
kvm_guest_enter();
#ifdef CONFIG_PPC_FPU
@@ -977,6 +1083,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
}
}
+ if (vcpu->arch.tsr & TCR_WRC_MASK) {
+ run->exit_reason = KVM_EXIT_WDT;
+ r = RESUME_HOST | (r & RESUME_FLAG_NV);
+ }
return r;
}
@@ -1106,7 +1216,14 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ u32 old_tsr = vcpu->arch.tsr;
+
vcpu->arch.tsr = sregs->u.e.tsr;
+
+ if (vcpu->arch.watchdog_enable && ((old_tsr ^ vcpu->arch.tsr) &
+ (TSR_ENW | TSR_WIS | TCR_WRC_MASK)))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1267,6 +1384,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
+ if (vcpu->arch.watchdog_enable)
+ arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
@@ -1281,6 +1400,15 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
{
clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+ /*
+ * We may have stopped the watchdog due to
+ * being stuck on final expiration.
+ */
+ if (vcpu->arch.watchdog_enable && (tsr_bits & (TSR_ENW |
+ TSR_WIS | TCR_WRC_MASK)))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb..5a66ade 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
case SPRN_TCR:
+ /*
+ * WRC is a 2-bit field that is supposed to preserve its
+ * value once written to non-zero.
+ */
+ if (vcpu->arch.tcr & TCR_WRC_MASK) {
+ spr_val &= ~TCR_WRC_MASK;
+ spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+ }
kvmppc_set_tcr(vcpu, spr_val);
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc8..646c4da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,9 +38,13 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions) ||
- v->requests;
+ bool ret = !(v->arch.shared->msr & MSR_WE) ||
+ !!(v->arch.pending_exceptions) ||
+ v->requests;
+
+ ret = ret || kvmppc_get_tsr_wrc(v);
+
+ return ret;
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -237,6 +241,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_GET_PVINFO:
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
+ case KVM_CAP_PPC_WDT:
#endif
r = 1;
break;
@@ -386,13 +391,21 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
-
+#ifdef CONFIG_BOOKE
+ spin_lock_init(&vcpu->arch.wdt_lock);
+#endif
return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_destroy(vcpu);
+#ifdef CONFIG_BOOKE
+ spin_lock(&vcpu->arch.wdt_lock);
+ if (vcpu->arch.watchdog_enable)
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock(&vcpu->arch.wdt_lock);
+#endif
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -637,6 +650,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_WDT:
+ r = 0;
+ /* setup watchdog timer once */
+ if (!vcpu->arch.watchdog_enable)
+ setup_timer(&vcpu->arch.wdt_timer,
+ kvmppc_watchdog_func, (unsigned long)vcpu);
+ vcpu->arch.watchdog_enable = true;
+ break;
+#endif
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2ce09aa..b9fdb52 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@ struct kvm_pit_config {
#define KVM_EXIT_OSI 18
#define KVM_EXIT_PAPR_HCALL 19
#define KVM_EXIT_S390_UCONTROL 20
+#define KVM_EXIT_WDT 21
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -618,6 +619,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_PPC_WDT 81
#ifdef KVM_CAP_IRQ_ROUTING
--
1.7.0.4
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-09 10:46 ` Bharat Bhushan
@ 2012-07-16 17:18 ` Alexander Graf
-1 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-16 17:18 UTC (permalink / raw)
To: Bharat Bhushan
Cc: kvm-ppc, kvm, bharatb.yadav, Bharat Bhushan,
Benjamin Herrenschmidt, Kumar Gala
On 07/09/2012 12:34 PM, Bharat Bhushan wrote:
> This patch adds the watchdog emulation in KVM. The watchdog
> emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
> The kernel timer are used for watchdog emulation and emulates
> h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
> if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
> it is configured.
>
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> -v2:
> - Addressed the review comments
>
> arch/powerpc/include/asm/kvm_book3s.h | 5 ++
> arch/powerpc/include/asm/kvm_booke.h | 5 ++
> arch/powerpc/include/asm/kvm_host.h | 3 +
> arch/powerpc/include/asm/kvm_ppc.h | 3 +
> arch/powerpc/include/asm/reg_booke.h | 7 ++
> arch/powerpc/kvm/44x.c | 1 +
> arch/powerpc/kvm/booke.c | 128
+++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/booke_emulate.c | 8 ++
> arch/powerpc/kvm/powerpc.c | 31 +++++++-
> include/linux/kvm.h | 2 +
> 10 files changed, 189 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h
b/arch/powerpc/include/asm/kvm_book3s.h
> index f0e0c6a..7bbc6cd 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -446,6 +446,11 @@ static inline bool
kvmppc_critical_section(struct kvm_vcpu *vcpu)
> }
> #endif
>
> +static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
> +{
> + return 0;
> +}
> +
> /* Magic register values loaded into r3 and r4 before the 'sc' assembly
> * instruction for the OSI hypercalls */
> #define OSI_SC_MAGIC_R3 0x113724FA
> diff --git a/arch/powerpc/include/asm/kvm_booke.h
b/arch/powerpc/include/asm/kvm_booke.h
> index b7cd335..e5b86c1 100644
> --- a/arch/powerpc/include/asm/kvm_booke.h
> +++ b/arch/powerpc/include/asm/kvm_booke.h
> @@ -100,4 +100,9 @@ static inline ulong kvmppc_get_msr(struct
kvm_vcpu *vcpu)
> {
> return vcpu->arch.shared->msr;
> }
> +
> +static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.tsr & TCR_WRC_MASK;
> +}
> #endif /* __ASM_KVM_BOOKE_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h
b/arch/powerpc/include/asm/kvm_host.h
> index 50ea12f..01047f4 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
> ulong fault_esr;
> ulong queued_dear;
> ulong queued_esr;
> + spinlock_t wdt_lock;
> + struct timer_list wdt_timer;
These should be booke specific. If you can't fit them anywhere else, at
least do an #ifdef for booke kvm around them, so we know where they belong.
> u32 tlbcfg[4];
> u32 mmucfg;
> u32 epr;
> @@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
> u8 osi_needed;
> u8 osi_enabled;
> u8 papr_enabled;
> + u8 watchdog_enable;
> u8 sane;
> u8 cpu_type;
> u8 hcall_needed;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..e5cf4b9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run,
struct kvm_vcpu *vcpu);
> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> extern void kvmppc_decrementer_func(unsigned long data);
> +extern void kvmppc_watchdog_func(unsigned long data);
> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
>
> /* Core-specific hooks */
> @@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct
kvm_vcpu *vcpu,
> struct kvm_interrupt *irq);
> extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> struct kvm_interrupt *irq);
> +extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
>
> extern int kvmppc_core_emulate_op(struct kvm_run *run, struct
kvm_vcpu *vcpu,
> unsigned int op, int *advance);
> diff --git a/arch/powerpc/include/asm/reg_booke.h
b/arch/powerpc/include/asm/reg_booke.h
> index 2d916c4..e07e6af 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -539,6 +539,13 @@
> #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
> #define TCR_ARE 0x00400000 /* Auto Reload Enable */
>
> +#ifdef CONFIG_E500
> +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
> + (((tcr) & 0x1E0000) >> 15))
> +#else
> +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
> +#endif
Ben and/or Kumar, mind to ack this bit?
> +
> /* Bit definitions for the TSR. */
> #define TSR_ENW 0x80000000 /* Enable Next Watchdog */
> #define TSR_WIS 0x40000000 /* WDT Interrupt Status */
> diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
> index 50e7dbc..a213aba 100644
> --- a/arch/powerpc/kvm/44x.c
> +++ b/arch/powerpc/kvm/44x.c
> @@ -28,6 +28,7 @@
> #include <asm/kvm_44x.h>
> #include <asm/kvm_ppc.h>
>
> +#include "booke.h"
Look 2 lines below :)
> #include "44x_tlb.h"
> #include "booke.h"
>
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index d25a097..97f54ea 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct
kvm_vcpu *vcpu,
> clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
> }
>
> +void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
> +}
> +
> +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
> +}
> +
> static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0,
u32 srr1)
> {
> #ifdef CONFIG_KVM_BOOKE_HV
> @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct
kvm_vcpu *vcpu,
> msr_mask = MSR_CE | MSR_ME | MSR_DE;
> int_class = INT_CLASS_NONCRIT;
> break;
> + case BOOKE_IRQPRIO_WATCHDOG:
> case BOOKE_IRQPRIO_CRITICAL:
> case BOOKE_IRQPRIO_DBELL_CRIT:
> allowed = vcpu->arch.shared->msr & MSR_CE;
> @@ -404,12 +415,101 @@ static int kvmppc_booke_irqprio_deliver(struct
kvm_vcpu *vcpu,
> return allowed;
> }
>
> +/*
> + * Return the number of jiffies until the next timeout. If the
timeout is
> + * longer than the NEXT_TIMER_MAX_DELTA, that
then?
> return NEXT_TIMER_MAX_DELTA
> + * instead.
I can read code. The important piece of information in the comment is
missing: The reason.
> + */
> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
> +{
> + u64 tb, wdt_tb, wdt_ticks = 0;
> + u64 nr_jiffies = 0;
> + u32 period = TCR_GET_WP(vcpu->arch.tcr);
> +
> + wdt_tb = 1ULL << (63 - period);
> + tb = get_tb();
> + /*
> + * The watchdog timeout will hapeen when TB bit corresponding
> + * to watchdog will toggle from 0 to 1.
> + */
> + if (tb & wdt_tb)
> + wdt_ticks = wdt_tb;
> +
> + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> +
> + /* Convert timebase ticks to jiffies */
> + nr_jiffies = wdt_ticks;
> +
> + if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> + nr_jiffies++;
> +
> + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
> +}
> +
> +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
> +{
> + unsigned long nr_jiffies;
> +
> + nr_jiffies = watchdog_next_timeout(vcpu);
> + spin_lock(&vcpu->arch.wdt_lock);
> + if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> + else
> + del_timer(&vcpu->arch.wdt_timer);
> + spin_unlock(&vcpu->arch.wdt_lock);
> +}
> +
> +void kvmppc_watchdog_func(unsigned long data)
> +{
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> + u32 tsr, new_tsr;
> + int final;
> +
> + do {
> + new_tsr = tsr = vcpu->arch.tsr;
> + final = 0;
> +
> + /* Time out event */
> + if (tsr & TSR_ENW) {
> + if (tsr & TSR_WIS) {
> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> + (vcpu->arch.tcr & TCR_WRC_MASK);
> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
Can't we just poke the vcpu to exit the VM and do the above on its own?
This is the watdog expired case, right? I'd also prefer to have an
explicit event for the expiry than a special TSR check in the main loop.
Also call me sceptic on the reset of tcr. If our user space watchdog
event is "write a message", then we essentially want to hide the fact
that the watchdog expired from the guest, right? In that case, the
second time-out wouldn't do anything guest visible.
> + final = 1;
> + } else {
> + new_tsr = tsr | TSR_WIS;
> + }
> + } else {
> + new_tsr = tsr | TSR_ENW;
> + }
> + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> +
> + if (new_tsr & (TSR_WIS | TCR_WRC_MASK)) {
> + smp_wmb();
> + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
> +
> + /*
> + * Stop running the watchdog timer after final expiration to
> + * prevent the host from being flooded with timers if the
> + * guest sets a short period.
> + * Timers will resume when TSR/TCR is updated next time.
> + */
> + if (!final)
> + arm_next_watchdog(vcpu);
> +}
> static void update_timer_ints(struct kvm_vcpu *vcpu)
> {
> if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
> kvmppc_core_queue_dec(vcpu);
> else
> kvmppc_core_dequeue_dec(vcpu);
> +
> + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> + kvmppc_core_queue_watchdog(vcpu);
> + else
> + kvmppc_core_dequeue_watchdog(vcpu);
> }
>
> static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
> @@ -516,6 +616,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run,
struct kvm_vcpu *vcpu)
> goto out;
> }
>
> + if (vcpu->arch.tsr & TCR_WRC_MASK) {
> + kvm_run->exit_reason = KVM_EXIT_WDT;
> + ret = 0;
> + goto out;
> + }
> +
> kvm_guest_enter();
>
> #ifdef CONFIG_PPC_FPU
> @@ -977,6 +1083,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
struct kvm_vcpu *vcpu,
> kvmppc_account_exit(vcpu, SIGNAL_EXITS);
> }
> }
> + if (vcpu->arch.tsr & TCR_WRC_MASK) {
> + run->exit_reason = KVM_EXIT_WDT;
> + r = RESUME_HOST | (r & RESUME_FLAG_NV);
> + }
>
> return r;
> }
> @@ -1106,7 +1216,14 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> }
>
> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> + u32 old_tsr = vcpu->arch.tsr;
> +
> vcpu->arch.tsr = sregs->u.e.tsr;
> +
> + if (vcpu->arch.watchdog_enable && ((old_tsr ^ vcpu->arch.tsr) &
> + (TSR_ENW | TSR_WIS | TCR_WRC_MASK)))
> + arm_next_watchdog(vcpu);
Can you please move all required checks into arm_next_watchdog and call
it unconditionally? Especially the watchdog_enable. Unless you want to
follow Scott's scheme and merely make the user space delivery depend on
watchdog_enable, which also works.
> +
> update_timer_ints(vcpu);
> }
>
> @@ -1267,6 +1384,8 @@ void kvmppc_core_commit_memory_region(struct
kvm *kvm,
> void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> {
> vcpu->arch.tcr = new_tcr;
> + if (vcpu->arch.watchdog_enable)
> + arm_next_watchdog(vcpu);
> update_timer_ints(vcpu);
> }
>
> @@ -1281,6 +1400,15 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu
*vcpu, u32 tsr_bits)
> void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> {
> clear_bits(tsr_bits, &vcpu->arch.tsr);
> +
> + /*
> + * We may have stopped the watchdog due to
> + * being stuck on final expiration.
> + */
> + if (vcpu->arch.watchdog_enable && (tsr_bits & (TSR_ENW |
> + TSR_WIS | TCR_WRC_MASK)))
> + arm_next_watchdog(vcpu);
> +
> update_timer_ints(vcpu);
> }
>
> diff --git a/arch/powerpc/kvm/booke_emulate.c
b/arch/powerpc/kvm/booke_emulate.c
> index 12834bb..5a66ade 100644
> --- a/arch/powerpc/kvm/booke_emulate.c
> +++ b/arch/powerpc/kvm/booke_emulate.c
> @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu
*vcpu, int sprn, ulong spr_val)
> kvmppc_clr_tsr_bits(vcpu, spr_val);
> break;
> case SPRN_TCR:
> + /*
> + * WRC is a 2-bit field that is supposed to preserve its
> + * value once written to non-zero.
> + */
> + if (vcpu->arch.tcr & TCR_WRC_MASK) {
> + spr_val &= ~TCR_WRC_MASK;
> + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
> + }
> kvmppc_set_tcr(vcpu, spr_val);
> break;
>
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 87f4dc8..646c4da 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -38,9 +38,13 @@
>
> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
> {
> - return !(v->arch.shared->msr & MSR_WE) ||
> - !!(v->arch.pending_exceptions) ||
> - v->requests;
> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> + !!(v->arch.pending_exceptions) ||
> + v->requests;
> +
> + ret = ret || kvmppc_get_tsr_wrc(v);
Why do you need to declare the cpu as non-runnable when a watchdog event
occured?
> +
> + return ret;
> }
>
> int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
> @@ -237,6 +241,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> case KVM_CAP_PPC_GET_PVINFO:
> #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
> case KVM_CAP_SW_TLB:
> + case KVM_CAP_PPC_WDT:
> #endif
> r = 1;
> break;
> @@ -386,13 +391,21 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> #ifdef CONFIG_KVM_EXIT_TIMING
> mutex_init(&vcpu->arch.exit_timing_lock);
> #endif
> -
> +#ifdef CONFIG_BOOKE
> + spin_lock_init(&vcpu->arch.wdt_lock);
> +#endif
> return 0;
> }
>
> void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> {
> kvmppc_mmu_destroy(vcpu);
> +#ifdef CONFIG_BOOKE
> + spin_lock(&vcpu->arch.wdt_lock);
> + if (vcpu->arch.watchdog_enable)
> + del_timer(&vcpu->arch.wdt_timer);
> + spin_unlock(&vcpu->arch.wdt_lock);
> +#endif
> }
>
> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> @@ -637,6 +650,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct
kvm_vcpu *vcpu,
> r = 0;
> vcpu->arch.papr_enabled = true;
> break;
> +#ifdef CONFIG_BOOKE
> + case KVM_CAP_PPC_WDT:
> + r = 0;
> + /* setup watchdog timer once */
> + if (!vcpu->arch.watchdog_enable)
> + setup_timer(&vcpu->arch.wdt_timer,
> + kvmppc_watchdog_func, (unsigned long)vcpu);
> + vcpu->arch.watchdog_enable = true;
> + break;
> +#endif
> #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
> case KVM_CAP_SW_TLB: {
> struct kvm_config_tlb cfg;
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2ce09aa..b9fdb52 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -163,6 +163,7 @@ struct kvm_pit_config {
> #define KVM_EXIT_OSI 18
> #define KVM_EXIT_PAPR_HCALL 19
> #define KVM_EXIT_S390_UCONTROL 20
> +#define KVM_EXIT_WDT 21
Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
may have the chance to reuse it.
Alex
>
> /* For KVM_EXIT_INTERNAL_ERROR */
> #define KVM_INTERNAL_ERROR_EMULATION 1
> @@ -618,6 +619,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_GET_SMMU_INFO 78
> #define KVM_CAP_S390_COW 79
> #define KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_PPC_WDT 81
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-16 17:18 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-16 17:18 UTC (permalink / raw)
To: Bharat Bhushan
Cc: kvm-ppc, kvm, bharatb.yadav, Bharat Bhushan,
Benjamin Herrenschmidt, Kumar Gala
On 07/09/2012 12:34 PM, Bharat Bhushan wrote:
> This patch adds the watchdog emulation in KVM. The watchdog
> emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
> The kernel timer are used for watchdog emulation and emulates
> h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
> if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
> it is configured.
>
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> -v2:
> - Addressed the review comments
>
> arch/powerpc/include/asm/kvm_book3s.h | 5 ++
> arch/powerpc/include/asm/kvm_booke.h | 5 ++
> arch/powerpc/include/asm/kvm_host.h | 3 +
> arch/powerpc/include/asm/kvm_ppc.h | 3 +
> arch/powerpc/include/asm/reg_booke.h | 7 ++
> arch/powerpc/kvm/44x.c | 1 +
> arch/powerpc/kvm/booke.c | 128
+++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/booke_emulate.c | 8 ++
> arch/powerpc/kvm/powerpc.c | 31 +++++++-
> include/linux/kvm.h | 2 +
> 10 files changed, 189 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h
b/arch/powerpc/include/asm/kvm_book3s.h
> index f0e0c6a..7bbc6cd 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -446,6 +446,11 @@ static inline bool
kvmppc_critical_section(struct kvm_vcpu *vcpu)
> }
> #endif
>
> +static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
> +{
> + return 0;
> +}
> +
> /* Magic register values loaded into r3 and r4 before the 'sc' assembly
> * instruction for the OSI hypercalls */
> #define OSI_SC_MAGIC_R3 0x113724FA
> diff --git a/arch/powerpc/include/asm/kvm_booke.h
b/arch/powerpc/include/asm/kvm_booke.h
> index b7cd335..e5b86c1 100644
> --- a/arch/powerpc/include/asm/kvm_booke.h
> +++ b/arch/powerpc/include/asm/kvm_booke.h
> @@ -100,4 +100,9 @@ static inline ulong kvmppc_get_msr(struct
kvm_vcpu *vcpu)
> {
> return vcpu->arch.shared->msr;
> }
> +
> +static inline u32 kvmppc_get_tsr_wrc(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.tsr & TCR_WRC_MASK;
> +}
> #endif /* __ASM_KVM_BOOKE_H__ */
> diff --git a/arch/powerpc/include/asm/kvm_host.h
b/arch/powerpc/include/asm/kvm_host.h
> index 50ea12f..01047f4 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
> ulong fault_esr;
> ulong queued_dear;
> ulong queued_esr;
> + spinlock_t wdt_lock;
> + struct timer_list wdt_timer;
These should be booke specific. If you can't fit them anywhere else, at
least do an #ifdef for booke kvm around them, so we know where they belong.
> u32 tlbcfg[4];
> u32 mmucfg;
> u32 epr;
> @@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
> u8 osi_needed;
> u8 osi_enabled;
> u8 papr_enabled;
> + u8 watchdog_enable;
> u8 sane;
> u8 cpu_type;
> u8 hcall_needed;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..e5cf4b9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run,
struct kvm_vcpu *vcpu);
> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> extern void kvmppc_decrementer_func(unsigned long data);
> +extern void kvmppc_watchdog_func(unsigned long data);
> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
>
> /* Core-specific hooks */
> @@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct
kvm_vcpu *vcpu,
> struct kvm_interrupt *irq);
> extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> struct kvm_interrupt *irq);
> +extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
>
> extern int kvmppc_core_emulate_op(struct kvm_run *run, struct
kvm_vcpu *vcpu,
> unsigned int op, int *advance);
> diff --git a/arch/powerpc/include/asm/reg_booke.h
b/arch/powerpc/include/asm/reg_booke.h
> index 2d916c4..e07e6af 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -539,6 +539,13 @@
> #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
> #define TCR_ARE 0x00400000 /* Auto Reload Enable */
>
> +#ifdef CONFIG_E500
> +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
> + (((tcr) & 0x1E0000) >> 15))
> +#else
> +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
> +#endif
Ben and/or Kumar, mind to ack this bit?
> +
> /* Bit definitions for the TSR. */
> #define TSR_ENW 0x80000000 /* Enable Next Watchdog */
> #define TSR_WIS 0x40000000 /* WDT Interrupt Status */
> diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
> index 50e7dbc..a213aba 100644
> --- a/arch/powerpc/kvm/44x.c
> +++ b/arch/powerpc/kvm/44x.c
> @@ -28,6 +28,7 @@
> #include <asm/kvm_44x.h>
> #include <asm/kvm_ppc.h>
>
> +#include "booke.h"
Look 2 lines below :)
> #include "44x_tlb.h"
> #include "booke.h"
>
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index d25a097..97f54ea 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct
kvm_vcpu *vcpu,
> clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
> }
>
> +void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
> +}
> +
> +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
> +}
> +
> static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0,
u32 srr1)
> {
> #ifdef CONFIG_KVM_BOOKE_HV
> @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct
kvm_vcpu *vcpu,
> msr_mask = MSR_CE | MSR_ME | MSR_DE;
> int_class = INT_CLASS_NONCRIT;
> break;
> + case BOOKE_IRQPRIO_WATCHDOG:
> case BOOKE_IRQPRIO_CRITICAL:
> case BOOKE_IRQPRIO_DBELL_CRIT:
> allowed = vcpu->arch.shared->msr & MSR_CE;
> @@ -404,12 +415,101 @@ static int kvmppc_booke_irqprio_deliver(struct
kvm_vcpu *vcpu,
> return allowed;
> }
>
> +/*
> + * Return the number of jiffies until the next timeout. If the
timeout is
> + * longer than the NEXT_TIMER_MAX_DELTA, that
then?
> return NEXT_TIMER_MAX_DELTA
> + * instead.
I can read code. The important piece of information in the comment is
missing: The reason.
> + */
> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
> +{
> + u64 tb, wdt_tb, wdt_ticks = 0;
> + u64 nr_jiffies = 0;
> + u32 period = TCR_GET_WP(vcpu->arch.tcr);
> +
> + wdt_tb = 1ULL << (63 - period);
> + tb = get_tb();
> + /*
> + * The watchdog timeout will hapeen when TB bit corresponding
> + * to watchdog will toggle from 0 to 1.
> + */
> + if (tb & wdt_tb)
> + wdt_ticks = wdt_tb;
> +
> + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> +
> + /* Convert timebase ticks to jiffies */
> + nr_jiffies = wdt_ticks;
> +
> + if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> + nr_jiffies++;
> +
> + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
> +}
> +
> +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
> +{
> + unsigned long nr_jiffies;
> +
> + nr_jiffies = watchdog_next_timeout(vcpu);
> + spin_lock(&vcpu->arch.wdt_lock);
> + if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> + else
> + del_timer(&vcpu->arch.wdt_timer);
> + spin_unlock(&vcpu->arch.wdt_lock);
> +}
> +
> +void kvmppc_watchdog_func(unsigned long data)
> +{
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> + u32 tsr, new_tsr;
> + int final;
> +
> + do {
> + new_tsr = tsr = vcpu->arch.tsr;
> + final = 0;
> +
> + /* Time out event */
> + if (tsr & TSR_ENW) {
> + if (tsr & TSR_WIS) {
> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> + (vcpu->arch.tcr & TCR_WRC_MASK);
> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
Can't we just poke the vcpu to exit the VM and do the above on its own?
This is the watdog expired case, right? I'd also prefer to have an
explicit event for the expiry than a special TSR check in the main loop.
Also call me sceptic on the reset of tcr. If our user space watchdog
event is "write a message", then we essentially want to hide the fact
that the watchdog expired from the guest, right? In that case, the
second time-out wouldn't do anything guest visible.
> + final = 1;
> + } else {
> + new_tsr = tsr | TSR_WIS;
> + }
> + } else {
> + new_tsr = tsr | TSR_ENW;
> + }
> + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> +
> + if (new_tsr & (TSR_WIS | TCR_WRC_MASK)) {
> + smp_wmb();
> + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
> +
> + /*
> + * Stop running the watchdog timer after final expiration to
> + * prevent the host from being flooded with timers if the
> + * guest sets a short period.
> + * Timers will resume when TSR/TCR is updated next time.
> + */
> + if (!final)
> + arm_next_watchdog(vcpu);
> +}
> static void update_timer_ints(struct kvm_vcpu *vcpu)
> {
> if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
> kvmppc_core_queue_dec(vcpu);
> else
> kvmppc_core_dequeue_dec(vcpu);
> +
> + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> + kvmppc_core_queue_watchdog(vcpu);
> + else
> + kvmppc_core_dequeue_watchdog(vcpu);
> }
>
> static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
> @@ -516,6 +616,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run,
struct kvm_vcpu *vcpu)
> goto out;
> }
>
> + if (vcpu->arch.tsr & TCR_WRC_MASK) {
> + kvm_run->exit_reason = KVM_EXIT_WDT;
> + ret = 0;
> + goto out;
> + }
> +
> kvm_guest_enter();
>
> #ifdef CONFIG_PPC_FPU
> @@ -977,6 +1083,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
struct kvm_vcpu *vcpu,
> kvmppc_account_exit(vcpu, SIGNAL_EXITS);
> }
> }
> + if (vcpu->arch.tsr & TCR_WRC_MASK) {
> + run->exit_reason = KVM_EXIT_WDT;
> + r = RESUME_HOST | (r & RESUME_FLAG_NV);
> + }
>
> return r;
> }
> @@ -1106,7 +1216,14 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> }
>
> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> + u32 old_tsr = vcpu->arch.tsr;
> +
> vcpu->arch.tsr = sregs->u.e.tsr;
> +
> + if (vcpu->arch.watchdog_enable && ((old_tsr ^ vcpu->arch.tsr) &
> + (TSR_ENW | TSR_WIS | TCR_WRC_MASK)))
> + arm_next_watchdog(vcpu);
Can you please move all required checks into arm_next_watchdog and call
it unconditionally? Especially the watchdog_enable. Unless you want to
follow Scott's scheme and merely make the user space delivery depend on
watchdog_enable, which also works.
> +
> update_timer_ints(vcpu);
> }
>
> @@ -1267,6 +1384,8 @@ void kvmppc_core_commit_memory_region(struct
kvm *kvm,
> void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> {
> vcpu->arch.tcr = new_tcr;
> + if (vcpu->arch.watchdog_enable)
> + arm_next_watchdog(vcpu);
> update_timer_ints(vcpu);
> }
>
> @@ -1281,6 +1400,15 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu
*vcpu, u32 tsr_bits)
> void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> {
> clear_bits(tsr_bits, &vcpu->arch.tsr);
> +
> + /*
> + * We may have stopped the watchdog due to
> + * being stuck on final expiration.
> + */
> + if (vcpu->arch.watchdog_enable && (tsr_bits & (TSR_ENW |
> + TSR_WIS | TCR_WRC_MASK)))
> + arm_next_watchdog(vcpu);
> +
> update_timer_ints(vcpu);
> }
>
> diff --git a/arch/powerpc/kvm/booke_emulate.c
b/arch/powerpc/kvm/booke_emulate.c
> index 12834bb..5a66ade 100644
> --- a/arch/powerpc/kvm/booke_emulate.c
> +++ b/arch/powerpc/kvm/booke_emulate.c
> @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu
*vcpu, int sprn, ulong spr_val)
> kvmppc_clr_tsr_bits(vcpu, spr_val);
> break;
> case SPRN_TCR:
> + /*
> + * WRC is a 2-bit field that is supposed to preserve its
> + * value once written to non-zero.
> + */
> + if (vcpu->arch.tcr & TCR_WRC_MASK) {
> + spr_val &= ~TCR_WRC_MASK;
> + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
> + }
> kvmppc_set_tcr(vcpu, spr_val);
> break;
>
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 87f4dc8..646c4da 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -38,9 +38,13 @@
>
> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
> {
> - return !(v->arch.shared->msr & MSR_WE) ||
> - !!(v->arch.pending_exceptions) ||
> - v->requests;
> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> + !!(v->arch.pending_exceptions) ||
> + v->requests;
> +
> + ret = ret || kvmppc_get_tsr_wrc(v);
Why do you need to declare the cpu as non-runnable when a watchdog event
occured?
> +
> + return ret;
> }
>
> int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
> @@ -237,6 +241,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> case KVM_CAP_PPC_GET_PVINFO:
> #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
> case KVM_CAP_SW_TLB:
> + case KVM_CAP_PPC_WDT:
> #endif
> r = 1;
> break;
> @@ -386,13 +391,21 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> #ifdef CONFIG_KVM_EXIT_TIMING
> mutex_init(&vcpu->arch.exit_timing_lock);
> #endif
> -
> +#ifdef CONFIG_BOOKE
> + spin_lock_init(&vcpu->arch.wdt_lock);
> +#endif
> return 0;
> }
>
> void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> {
> kvmppc_mmu_destroy(vcpu);
> +#ifdef CONFIG_BOOKE
> + spin_lock(&vcpu->arch.wdt_lock);
> + if (vcpu->arch.watchdog_enable)
> + del_timer(&vcpu->arch.wdt_timer);
> + spin_unlock(&vcpu->arch.wdt_lock);
> +#endif
> }
>
> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> @@ -637,6 +650,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct
kvm_vcpu *vcpu,
> r = 0;
> vcpu->arch.papr_enabled = true;
> break;
> +#ifdef CONFIG_BOOKE
> + case KVM_CAP_PPC_WDT:
> + r = 0;
> + /* setup watchdog timer once */
> + if (!vcpu->arch.watchdog_enable)
> + setup_timer(&vcpu->arch.wdt_timer,
> + kvmppc_watchdog_func, (unsigned long)vcpu);
> + vcpu->arch.watchdog_enable = true;
> + break;
> +#endif
> #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
> case KVM_CAP_SW_TLB: {
> struct kvm_config_tlb cfg;
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2ce09aa..b9fdb52 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -163,6 +163,7 @@ struct kvm_pit_config {
> #define KVM_EXIT_OSI 18
> #define KVM_EXIT_PAPR_HCALL 19
> #define KVM_EXIT_S390_UCONTROL 20
> +#define KVM_EXIT_WDT 21
Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
may have the chance to reuse it.
Alex
>
> /* For KVM_EXIT_INTERNAL_ERROR */
> #define KVM_INTERNAL_ERROR_EMULATION 1
> @@ -618,6 +619,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_GET_SMMU_INFO 78
> #define KVM_CAP_S390_COW 79
> #define KVM_CAP_PPC_ALLOC_HTAB 80
> +#define KVM_CAP_PPC_WDT 81
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-16 17:18 ` Alexander Graf
@ 2012-07-17 1:02 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 1:02 UTC (permalink / raw)
To: Alexander Graf
Cc: Bharat Bhushan, kvm-ppc, kvm, bharatb.yadav, Bharat Bhushan,
Benjamin Herrenschmidt, Kumar Gala
On 07/16/2012 12:18 PM, Alexander Graf wrote:
>> +/*
>> + * Return the number of jiffies until the next timeout. If the
> timeout is
>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>
> then?
>
>> return NEXT_TIMER_MAX_DELTA
>> + * instead.
>
> I can read code.
Come on, it's not exactly x++; /* add one to x */
It's faster to read code (as well as know the constraints within which
you can modify it without having to spend a lot of time digesting all
the callers' use cases) when you have a high level description of its
interface contract, and can be selective about when to zoom in to the
details. Linux kernel code tends to be bad about this.
> The important piece of information in the comment is
> missing: The reason.
The reason for what? Why you want to know the next timeout? That's the
caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>> +void kvmppc_watchdog_func(unsigned long data)
>> +{
>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>> + u32 tsr, new_tsr;
>> + int final;
>> +
>> + do {
>> + new_tsr = tsr = vcpu->arch.tsr;
>> + final = 0;
>> +
>> + /* Time out event */
>> + if (tsr & TSR_ENW) {
>> + if (tsr & TSR_WIS) {
>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>
> Can't we just poke the vcpu to exit the VM and do the above on its own?
We've discussed this before. TSR updates are done via atomics, and we
send a request for the vcpu to act on the result. This is how the
decrementer works.
http://www.spinics.net/lists/kvm-ppc/msg03169.html
> This is the watdog expired case, right?
Final expiration, yes.
> I'd also prefer to have an
> explicit event for the expiry than a special TSR check in the main loop.
So check TSR[WRS] in update_timer_ints(), and have it queue a
pseudoexception? That would eliminate the need to change the runnable
function.
> Also call me sceptic on the reset of tcr. If our user space watchdog
> event is "write a message", then we essentially want to hide the fact
> that the watchdog expired from the guest, right? In that case, the
> second time-out wouldn't do anything guest visible.
This was probably copied straight out of the hardware documentation,
which explicitly says TCR[WRC] gets set to zero on final expiration (as
part of reset). We should leave that part up to userspace. It
definitely shouldn't be done inside the cmpxchg loop (or from interrupt
context -- only TSR gets the atomic treatment). I don't think the read
of TCR outside vcpu context is a problem, though.
>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
>> {
>> - return !(v->arch.shared->msr & MSR_WE) ||
>> - !!(v->arch.pending_exceptions) ||
>> - v->requests;
>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>> + !!(v->arch.pending_exceptions) ||
>> + v->requests;
>> +
>> + ret = ret || kvmppc_get_tsr_wrc(v);
>
> Why do you need to declare the cpu as non-runnable when a watchdog event
> occured?
It's the other way around -- it's always runnable when a watchdog exit
is pending. It's like a pending exception.
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index 2ce09aa..b9fdb52 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>> #define KVM_EXIT_OSI 18
>> #define KVM_EXIT_PAPR_HCALL 19
>> #define KVM_EXIT_S390_UCONTROL 20
>> +#define KVM_EXIT_WDT 21
>
> Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
> may have the chance to reuse it.
WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in
their names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 1:02 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 1:02 UTC (permalink / raw)
To: Alexander Graf
Cc: Bharat Bhushan, kvm-ppc, kvm, bharatb.yadav, Bharat Bhushan,
Benjamin Herrenschmidt, Kumar Gala
On 07/16/2012 12:18 PM, Alexander Graf wrote:
>> +/*
>> + * Return the number of jiffies until the next timeout. If the
> timeout is
>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>
> then?
>
>> return NEXT_TIMER_MAX_DELTA
>> + * instead.
>
> I can read code.
Come on, it's not exactly x++; /* add one to x */
It's faster to read code (as well as know the constraints within which
you can modify it without having to spend a lot of time digesting all
the callers' use cases) when you have a high level description of its
interface contract, and can be selective about when to zoom in to the
details. Linux kernel code tends to be bad about this.
> The important piece of information in the comment is
> missing: The reason.
The reason for what? Why you want to know the next timeout? That's the
caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>> +void kvmppc_watchdog_func(unsigned long data)
>> +{
>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>> + u32 tsr, new_tsr;
>> + int final;
>> +
>> + do {
>> + new_tsr = tsr = vcpu->arch.tsr;
>> + final = 0;
>> +
>> + /* Time out event */
>> + if (tsr & TSR_ENW) {
>> + if (tsr & TSR_WIS) {
>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>
> Can't we just poke the vcpu to exit the VM and do the above on its own?
We've discussed this before. TSR updates are done via atomics, and we
send a request for the vcpu to act on the result. This is how the
decrementer works.
http://www.spinics.net/lists/kvm-ppc/msg03169.html
> This is the watdog expired case, right?
Final expiration, yes.
> I'd also prefer to have an
> explicit event for the expiry than a special TSR check in the main loop.
So check TSR[WRS] in update_timer_ints(), and have it queue a
pseudoexception? That would eliminate the need to change the runnable
function.
> Also call me sceptic on the reset of tcr. If our user space watchdog
> event is "write a message", then we essentially want to hide the fact
> that the watchdog expired from the guest, right? In that case, the
> second time-out wouldn't do anything guest visible.
This was probably copied straight out of the hardware documentation,
which explicitly says TCR[WRC] gets set to zero on final expiration (as
part of reset). We should leave that part up to userspace. It
definitely shouldn't be done inside the cmpxchg loop (or from interrupt
context -- only TSR gets the atomic treatment). I don't think the read
of TCR outside vcpu context is a problem, though.
>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
>> {
>> - return !(v->arch.shared->msr & MSR_WE) ||
>> - !!(v->arch.pending_exceptions) ||
>> - v->requests;
>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>> + !!(v->arch.pending_exceptions) ||
>> + v->requests;
>> +
>> + ret = ret || kvmppc_get_tsr_wrc(v);
>
> Why do you need to declare the cpu as non-runnable when a watchdog event
> occured?
It's the other way around -- it's always runnable when a watchdog exit
is pending. It's like a pending exception.
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index 2ce09aa..b9fdb52 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>> #define KVM_EXIT_OSI 18
>> #define KVM_EXIT_PAPR_HCALL 19
>> #define KVM_EXIT_S390_UCONTROL 20
>> +#define KVM_EXIT_WDT 21
>
> Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
> may have the chance to reuse it.
WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in
their names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 1:02 ` Scott Wood
@ 2012-07-17 7:20 ` Alexander Graf
-1 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 7:20 UTC (permalink / raw)
To: Scott Wood
Cc: Bharat Bhushan, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Bharat Bhushan, Benjamin Herrenschmidt, Kumar Gala
On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>> +/*
>>> + * Return the number of jiffies until the next timeout. If the
>> timeout is
>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>
>> then?
>>
>>> return NEXT_TIMER_MAX_DELTA
>>> + * instead.
>>
>> I can read code.
>
> Come on, it's not exactly x++; /* add one to x */
>
> It's faster to read code (as well as know the constraints within which
> you can modify it without having to spend a lot of time digesting all
> the callers' use cases) when you have a high level description of its
> interface contract, and can be selective about when to zoom in to the
> details. Linux kernel code tends to be bad about this.
Yeah, not opposed to leave that part in :).
>
>> The important piece of information in the comment is
>> missing: The reason.
>
> The reason for what? Why you want to know the next timeout? That's the
> caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
Why we use the limit. IIRC it was explained in the last thread, just didn't make its way into the comment.
>
>>> +void kvmppc_watchdog_func(unsigned long data)
>>> +{
>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>> + u32 tsr, new_tsr;
>>> + int final;
>>> +
>>> + do {
>>> + new_tsr = tsr = vcpu->arch.tsr;
>>> + final = 0;
>>> +
>>> + /* Time out event */
>>> + if (tsr & TSR_ENW) {
>>> + if (tsr & TSR_WIS) {
>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>
>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>
> We've discussed this before. TSR updates are done via atomics, and we
> send a request for the vcpu to act on the result. This is how the
> decrementer works.
>
> http://www.spinics.net/lists/kvm-ppc/msg03169.html
Yeah, the major difference to the dec is the atomicity of the whole thing. Dec changes one bit to enable the interrupt line. The final expiration is more complex.
>
>> This is the watdog expired case, right?
>
> Final expiration, yes.
>
>> I'd also prefer to have an
>> explicit event for the expiry than a special TSR check in the main loop.
>
> So check TSR[WRS] in update_timer_ints(), and have it queue a
> pseudoexception?
Or here.
> That would eliminate the need to change the runnable
> function.
>
>> Also call me sceptic on the reset of tcr. If our user space watchdog
>> event is "write a message", then we essentially want to hide the fact
>> that the watchdog expired from the guest, right? In that case, the
>> second time-out wouldn't do anything guest visible.
>
> This was probably copied straight out of the hardware documentation,
> which explicitly says TCR[WRC] gets set to zero on final expiration (as
> part of reset). We should leave that part up to userspace. It
> definitely shouldn't be done inside the cmpxchg loop (or from interrupt
> context -- only TSR gets the atomic treatment). I don't think the read
> of TCR outside vcpu context is a problem, though.
Yeah, but it'd just make me less wary if only the vcpu thread itself accesses vcpu internal registers that aren't irq state and thus designed for it (TSR).
But yes, the most flexible way would probably be to do it from user space. Since it'd happen from within the vcpu context of user space, we can also guarantee that the TCR access is atomic.
>
>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
>>> {
>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>> - !!(v->arch.pending_exceptions) ||
>>> - v->requests;
>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>> + !!(v->arch.pending_exceptions) ||
>>> + v->requests;
>>> +
>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>
>> Why do you need to declare the cpu as non-runnable when a watchdog event
>> occured?
>
> It's the other way around -- it's always runnable when a watchdog exit
> is pending. It's like a pending exception.
Ah, so yes, we should just shove it into pending_exceptions then.
>
>>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>>> index 2ce09aa..b9fdb52 100644
>>> --- a/include/linux/kvm.h
>>> +++ b/include/linux/kvm.h
>>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>>> #define KVM_EXIT_OSI 18
>>> #define KVM_EXIT_PAPR_HCALL 19
>>> #define KVM_EXIT_S390_UCONTROL 20
>>> +#define KVM_EXIT_WDT 21
>>
>> Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
>> may have the chance to reuse it.
>
> WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in
> their names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
Ah, didn't know that it's a commonly used abbreviation. It's not like we're constrained in our line length for exit code handling usually though, so readable still wins for me :)
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 7:20 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 7:20 UTC (permalink / raw)
To: Scott Wood
Cc: Bharat Bhushan, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Bharat Bhushan, Benjamin Herrenschmidt, Kumar Gala
On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>> +/*
>>> + * Return the number of jiffies until the next timeout. If the
>> timeout is
>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>
>> then?
>>
>>> return NEXT_TIMER_MAX_DELTA
>>> + * instead.
>>
>> I can read code.
>
> Come on, it's not exactly x++; /* add one to x */
>
> It's faster to read code (as well as know the constraints within which
> you can modify it without having to spend a lot of time digesting all
> the callers' use cases) when you have a high level description of its
> interface contract, and can be selective about when to zoom in to the
> details. Linux kernel code tends to be bad about this.
Yeah, not opposed to leave that part in :).
>
>> The important piece of information in the comment is
>> missing: The reason.
>
> The reason for what? Why you want to know the next timeout? That's the
> caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
Why we use the limit. IIRC it was explained in the last thread, just didn't make its way into the comment.
>
>>> +void kvmppc_watchdog_func(unsigned long data)
>>> +{
>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>> + u32 tsr, new_tsr;
>>> + int final;
>>> +
>>> + do {
>>> + new_tsr = tsr = vcpu->arch.tsr;
>>> + final = 0;
>>> +
>>> + /* Time out event */
>>> + if (tsr & TSR_ENW) {
>>> + if (tsr & TSR_WIS) {
>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>
>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>
> We've discussed this before. TSR updates are done via atomics, and we
> send a request for the vcpu to act on the result. This is how the
> decrementer works.
>
> http://www.spinics.net/lists/kvm-ppc/msg03169.html
Yeah, the major difference to the dec is the atomicity of the whole thing. Dec changes one bit to enable the interrupt line. The final expiration is more complex.
>
>> This is the watdog expired case, right?
>
> Final expiration, yes.
>
>> I'd also prefer to have an
>> explicit event for the expiry than a special TSR check in the main loop.
>
> So check TSR[WRS] in update_timer_ints(), and have it queue a
> pseudoexception?
Or here.
> That would eliminate the need to change the runnable
> function.
>
>> Also call me sceptic on the reset of tcr. If our user space watchdog
>> event is "write a message", then we essentially want to hide the fact
>> that the watchdog expired from the guest, right? In that case, the
>> second time-out wouldn't do anything guest visible.
>
> This was probably copied straight out of the hardware documentation,
> which explicitly says TCR[WRC] gets set to zero on final expiration (as
> part of reset). We should leave that part up to userspace. It
> definitely shouldn't be done inside the cmpxchg loop (or from interrupt
> context -- only TSR gets the atomic treatment). I don't think the read
> of TCR outside vcpu context is a problem, though.
Yeah, but it'd just make me less wary if only the vcpu thread itself accesses vcpu internal registers that aren't irq state and thus designed for it (TSR).
But yes, the most flexible way would probably be to do it from user space. Since it'd happen from within the vcpu context of user space, we can also guarantee that the TCR access is atomic.
>
>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
>>> {
>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>> - !!(v->arch.pending_exceptions) ||
>>> - v->requests;
>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>> + !!(v->arch.pending_exceptions) ||
>>> + v->requests;
>>> +
>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>
>> Why do you need to declare the cpu as non-runnable when a watchdog event
>> occured?
>
> It's the other way around -- it's always runnable when a watchdog exit
> is pending. It's like a pending exception.
Ah, so yes, we should just shove it into pending_exceptions then.
>
>>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>>> index 2ce09aa..b9fdb52 100644
>>> --- a/include/linux/kvm.h
>>> +++ b/include/linux/kvm.h
>>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
>>> #define KVM_EXIT_OSI 18
>>> #define KVM_EXIT_PAPR_HCALL 19
>>> #define KVM_EXIT_S390_UCONTROL 20
>>> +#define KVM_EXIT_WDT 21
>>
>> Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
>> may have the chance to reuse it.
>
> WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in
> their names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
Ah, didn't know that it's a commonly used abbreviation. It's not like we're constrained in our line length for exit code handling usually though, so readable still wins for me :)
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 7:20 ` Alexander Graf
(?)
@ 2012-07-17 9:57 ` Bhushan Bharat-R65777
2012-07-17 12:51 ` Alexander Graf
-1 siblings, 1 reply; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 9:57 UTC (permalink / raw)
To: Alexander Graf, Wood Scott-B07421
Cc: <kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Tuesday, July 17, 2012 12:50 PM
> To: Wood Scott-B07421
> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
> <bharatb.yadav@gmail.com>; Bhushan Bharat-R65777; Benjamin Herrenschmidt; Kumar
> Gala
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
>
>
> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>
> > On 07/16/2012 12:18 PM, Alexander Graf wrote:
> >>> +/*
> >>> + * Return the number of jiffies until the next timeout. If the
> >> timeout is
> >>> + * longer than the NEXT_TIMER_MAX_DELTA, that
> >>
> >> then?
> >>
> >>> return NEXT_TIMER_MAX_DELTA
> >>> + * instead.
> >>
> >> I can read code.
> >
> > Come on, it's not exactly x++; /* add one to x */
> >
> > It's faster to read code (as well as know the constraints within which
> > you can modify it without having to spend a lot of time digesting all
> > the callers' use cases) when you have a high level description of its
> > interface contract, and can be selective about when to zoom in to the
> > details. Linux kernel code tends to be bad about this.
>
> Yeah, not opposed to leave that part in :).
>
> >
> >> The important piece of information in the comment is
> >> missing: The reason.
> >
> > The reason for what? Why you want to know the next timeout? That's
> > the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>
> Why we use the limit. IIRC it was explained in the last thread, just didn't make
> its way into the comment.
Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a purpose, so the comment described the puspose).
Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h), so removed the comment.
>
> >
> >>> +void kvmppc_watchdog_func(unsigned long data) {
> >>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>> + u32 tsr, new_tsr;
> >>> + int final;
> >>> +
> >>> + do {
> >>> + new_tsr = tsr = vcpu->arch.tsr;
> >>> + final = 0;
> >>> +
> >>> + /* Time out event */
> >>> + if (tsr & TSR_ENW) {
> >>> + if (tsr & TSR_WIS) {
> >>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> >>> + (vcpu->arch.tcr & TCR_WRC_MASK);
> >>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
> >>
> >> Can't we just poke the vcpu to exit the VM and do the above on its own?
> >
> > We've discussed this before. TSR updates are done via atomics, and we
> > send a request for the vcpu to act on the result. This is how the
> > decrementer works.
> >
> > http://www.spinics.net/lists/kvm-ppc/msg03169.html
>
> Yeah, the major difference to the dec is the atomicity of the whole thing. Dec
> changes one bit to enable the interrupt line. The final expiration is more
> complex.
Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>
> >
> >> This is the watdog expired case, right?
> >
> > Final expiration, yes.
> >
> >> I'd also prefer to have an
> >> explicit event for the expiry than a special TSR check in the main loop.
> >
> > So check TSR[WRS] in update_timer_ints(), and have it queue a
> > pseudoexception?
>
> Or here.
Do we mean define a sudo IROPRIO for final expiry.
>
> > That would eliminate the need to change the runnable function.
> >
> >> Also call me sceptic on the reset of tcr. If our user space watchdog
> >> event is "write a message", then we essentially want to hide the fact
> >> that the watchdog expired from the guest, right? In that case, the
> >> second time-out wouldn't do anything guest visible.
> >
> > This was probably copied straight out of the hardware documentation,
> > which explicitly says TCR[WRC] gets set to zero on final expiration
> > (as part of reset). We should leave that part up to userspace. It
> > definitely shouldn't be done inside the cmpxchg loop (or from
> > interrupt context -- only TSR gets the atomic treatment). I don't
> > think the read of TCR outside vcpu context is a problem, though.
>
> Yeah, but it'd just make me less wary if only the vcpu thread itself accesses
> vcpu internal registers that aren't irq state and thus designed for it (TSR).
>
> But yes, the most flexible way would probably be to do it from user space. Since
> it'd happen from within the vcpu context of user space, we can also guarantee
> that the TCR access is atomic.
Yes, will move the tcr.wrc clearing to userspace.
>
> >
> >>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
> >>> - return !(v->arch.shared->msr & MSR_WE) ||
> >>> - !!(v->arch.pending_exceptions) ||
> >>> - v->requests;
> >>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> >>> + !!(v->arch.pending_exceptions) ||
> >>> + v->requests;
> >>> +
> >>> + ret = ret || kvmppc_get_tsr_wrc(v);
> >>
> >> Why do you need to declare the cpu as non-runnable when a watchdog
> >> event occured?
> >
> > It's the other way around -- it's always runnable when a watchdog exit
> > is pending. It's like a pending exception.
>
> Ah, so yes, we should just shove it into pending_exceptions then.
Pending_exception? You mean sudo again here as said earlier.
Thanks
-Bharat
>
> >
> >>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h index
> >>> 2ce09aa..b9fdb52 100644
> >>> --- a/include/linux/kvm.h
> >>> +++ b/include/linux/kvm.h
> >>> @@ -163,6 +163,7 @@ struct kvm_pit_config {
> >>> #define KVM_EXIT_OSI 18
> >>> #define KVM_EXIT_PAPR_HCALL 19
> >>> #define KVM_EXIT_S390_UCONTROL 20
> >>> +#define KVM_EXIT_WDT 21
> >>
> >> Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
> >> may have the chance to reuse it.
> >
> > WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in
> > their names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
>
> Ah, didn't know that it's a commonly used abbreviation. It's not like we're
> constrained in our line length for exit code handling usually though, so
> readable still wins for me :)
>
> Alex
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 1:02 ` Scott Wood
@ 2012-07-17 11:31 ` Bhushan Bharat-R65777
-1 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 11:31 UTC (permalink / raw)
To: Wood Scott-B07421, Alexander Graf
Cc: kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogVHVlc2RheSwgSnVseSAxNywgMjAxMiA2OjMyIEFNDQo+IFRvOiBBbGV4YW5k
ZXIgR3JhZg0KPiBDYzogQmh1c2hhbiBCaGFyYXQtUjY1Nzc3OyBrdm0tcHBjQHZnZXIua2VybmVs
Lm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsNCj4gYmhhcmF0Yi55YWRhdkBnbWFpbC5jb207IEJo
dXNoYW4gQmhhcmF0LVI2NTc3NzsgQmVuamFtaW4gSGVycmVuc2NobWlkdDsgS3VtYXINCj4gR2Fs
YQ0KPiBTdWJqZWN0OiBSZTogW1BBVENIIDIvMiB2Ml0gS1ZNOiBQUEM6IGJvb2tlOiBBZGQgd2F0
Y2hkb2cgZW11bGF0aW9uDQo+IA0KPiBPbiAwNy8xNi8yMDEyIDEyOjE4IFBNLCBBbGV4YW5kZXIg
R3JhZiB3cm90ZToNCj4gPj4gKy8qDQo+ID4+ICsgKiBSZXR1cm4gdGhlIG51bWJlciBvZiBqaWZm
aWVzIHVudGlsIHRoZSBuZXh0IHRpbWVvdXQuICBJZiB0aGUNCj4gPiB0aW1lb3V0IGlzDQo+ID4+
ICsgKiBsb25nZXIgdGhhbiB0aGUgTkVYVF9USU1FUl9NQVhfREVMVEEsIHRoYXQNCj4gPg0KPiA+
IHRoZW4/DQo+ID4NCj4gPj4gIHJldHVybiBORVhUX1RJTUVSX01BWF9ERUxUQQ0KPiA+PiArICog
aW5zdGVhZC4NCj4gPg0KPiA+IEkgY2FuIHJlYWQgY29kZS4NCj4gDQo+IENvbWUgb24sIGl0J3Mg
bm90IGV4YWN0bHkgeCsrOyAvKiBhZGQgb25lIHRvIHggKi8NCj4gDQo+IEl0J3MgZmFzdGVyIHRv
IHJlYWQgY29kZSAoYXMgd2VsbCBhcyBrbm93IHRoZSBjb25zdHJhaW50cyB3aXRoaW4gd2hpY2gg
eW91IGNhbg0KPiBtb2RpZnkgaXQgd2l0aG91dCBoYXZpbmcgdG8gc3BlbmQgYSBsb3Qgb2YgdGlt
ZSBkaWdlc3RpbmcgYWxsIHRoZSBjYWxsZXJzJyB1c2UNCj4gY2FzZXMpIHdoZW4geW91IGhhdmUg
YSBoaWdoIGxldmVsIGRlc2NyaXB0aW9uIG9mIGl0cyBpbnRlcmZhY2UgY29udHJhY3QsIGFuZCBj
YW4NCj4gYmUgc2VsZWN0aXZlIGFib3V0IHdoZW4gdG8gem9vbSBpbiB0byB0aGUgZGV0YWlscy4g
IExpbnV4IGtlcm5lbCBjb2RlIHRlbmRzIHRvDQo+IGJlIGJhZCBhYm91dCB0aGlzLg0KPiANCj4g
PiBUaGUgaW1wb3J0YW50IHBpZWNlIG9mIGluZm9ybWF0aW9uIGluIHRoZSBjb21tZW50IGlzDQo+
ID4gbWlzc2luZzogVGhlIHJlYXNvbi4NCj4gDQo+IFRoZSByZWFzb24gZm9yIHdoYXQ/ICBXaHkg
eW91IHdhbnQgdG8ga25vdyB0aGUgbmV4dCB0aW1lb3V0PyAgVGhhdCdzIHRoZQ0KPiBjYWxsZXIn
cyBidXNpbmVzcy4gIE9yIHdoeSB3ZSB1c2UgTkVYVF9USU1FUl9NQVhfREVMVEEgYXMgdGhlIGxp
bWl0Pw0KPiANCj4gPj4gK3ZvaWQga3ZtcHBjX3dhdGNoZG9nX2Z1bmModW5zaWduZWQgbG9uZyBk
YXRhKSB7DQo+ID4+ICsgICAgc3RydWN0IGt2bV92Y3B1ICp2Y3B1ID0gKHN0cnVjdCBrdm1fdmNw
dSAqKWRhdGE7DQo+ID4+ICsgICAgdTMyIHRzciwgbmV3X3RzcjsNCj4gPj4gKyAgICBpbnQgZmlu
YWw7DQo+ID4+ICsNCj4gPj4gKyAgICBkbyB7DQo+ID4+ICsgICAgICAgIG5ld190c3IgPSB0c3Ig
PSB2Y3B1LT5hcmNoLnRzcjsNCj4gPj4gKyAgICAgICAgZmluYWwgPSAwOw0KPiA+PiArDQo+ID4+
ICsgICAgICAgIC8qIFRpbWUgb3V0IGV2ZW50ICovDQo+ID4+ICsgICAgICAgIGlmICh0c3IgJiBU
U1JfRU5XKSB7DQo+ID4+ICsgICAgICAgICAgICBpZiAodHNyICYgVFNSX1dJUykgew0KPiA+PiAr
ICAgICAgICAgICAgICAgIG5ld190c3IgPSAodHNyICYgflRDUl9XUkNfTUFTSykgfA0KPiA+PiAr
ICAgICAgICAgICAgICAgICAgICAgICh2Y3B1LT5hcmNoLnRjciAmIFRDUl9XUkNfTUFTSyk7DQo+
ID4+ICsgICAgICAgICAgICAgICAgdmNwdS0+YXJjaC50Y3IgJj0gflRDUl9XUkNfTUFTSzsNCj4g
Pg0KPiA+IENhbid0IHdlIGp1c3QgcG9rZSB0aGUgdmNwdSB0byBleGl0IHRoZSBWTSBhbmQgZG8g
dGhlIGFib3ZlIG9uIGl0cyBvd24/DQo+IA0KPiBXZSd2ZSBkaXNjdXNzZWQgdGhpcyBiZWZvcmUu
ICBUU1IgdXBkYXRlcyBhcmUgZG9uZSB2aWEgYXRvbWljcywgYW5kIHdlIHNlbmQgYQ0KPiByZXF1
ZXN0IGZvciB0aGUgdmNwdSB0byBhY3Qgb24gdGhlIHJlc3VsdC4gIFRoaXMgaXMgaG93IHRoZSBk
ZWNyZW1lbnRlciB3b3Jrcy4NCj4gDQo+IGh0dHA6Ly93d3cuc3Bpbmljcy5uZXQvbGlzdHMva3Zt
LXBwYy9tc2cwMzE2OS5odG1sDQo+IA0KPiA+IFRoaXMgaXMgdGhlIHdhdGRvZyBleHBpcmVkIGNh
c2UsIHJpZ2h0Pw0KPiANCj4gRmluYWwgZXhwaXJhdGlvbiwgeWVzLg0KPiANCj4gPiBJJ2QgYWxz
byBwcmVmZXIgdG8gaGF2ZSBhbg0KPiA+IGV4cGxpY2l0IGV2ZW50IGZvciB0aGUgZXhwaXJ5IHRo
YW4gYSBzcGVjaWFsIFRTUiBjaGVjayBpbiB0aGUgbWFpbiBsb29wLg0KPiANCj4gU28gY2hlY2sg
VFNSW1dSU10gaW4gdXBkYXRlX3RpbWVyX2ludHMoKSwgYW5kIGhhdmUgaXQgcXVldWUgYSBwc2V1
ZG9leGNlcHRpb24/DQo+IFRoYXQgd291bGQgZWxpbWluYXRlIHRoZSBuZWVkIHRvIGNoYW5nZSB0
aGUgcnVubmFibGUgZnVuY3Rpb24uDQo+IA0KPiA+IEFsc28gY2FsbCBtZSBzY2VwdGljIG9uIHRo
ZSByZXNldCBvZiB0Y3IuIElmIG91ciB1c2VyIHNwYWNlIHdhdGNoZG9nDQo+ID4gZXZlbnQgaXMg
IndyaXRlIGEgbWVzc2FnZSIsIHRoZW4gd2UgZXNzZW50aWFsbHkgd2FudCB0byBoaWRlIHRoZSBm
YWN0DQo+ID4gdGhhdCB0aGUgd2F0Y2hkb2cgZXhwaXJlZCBmcm9tIHRoZSBndWVzdCwgcmlnaHQ/
IEluIHRoYXQgY2FzZSwgdGhlDQo+ID4gc2Vjb25kIHRpbWUtb3V0IHdvdWxkbid0IGRvIGFueXRo
aW5nIGd1ZXN0IHZpc2libGUuDQo+IA0KPiBUaGlzIHdhcyBwcm9iYWJseSBjb3BpZWQgc3RyYWln
aHQgb3V0IG9mIHRoZSBoYXJkd2FyZSBkb2N1bWVudGF0aW9uLCB3aGljaA0KPiBleHBsaWNpdGx5
IHNheXMgVENSW1dSQ10gZ2V0cyBzZXQgdG8gemVybyBvbiBmaW5hbCBleHBpcmF0aW9uIChhcyBw
YXJ0IG9mDQo+IHJlc2V0KS4gIFdlIHNob3VsZCBsZWF2ZSB0aGF0IHBhcnQgdXAgdG8gdXNlcnNw
YWNlLiAgSXQgZGVmaW5pdGVseSBzaG91bGRuJ3QgYmUNCj4gZG9uZSBpbnNpZGUgdGhlIGNtcHhj
aGcgbG9vcCAob3IgZnJvbSBpbnRlcnJ1cHQgY29udGV4dCAtLSBvbmx5IFRTUiBnZXRzIHRoZQ0K
PiBhdG9taWMgdHJlYXRtZW50KS4gIEkgZG9uJ3QgdGhpbmsgdGhlIHJlYWQgb2YgVENSIG91dHNp
ZGUgdmNwdSBjb250ZXh0IGlzIGENCj4gcHJvYmxlbSwgdGhvdWdoLg0KPiANCj4gPj4gIGludCBr
dm1fYXJjaF92Y3B1X3J1bm5hYmxlKHN0cnVjdCBrdm1fdmNwdSAqdikgIHsNCj4gPj4gLSAgICBy
ZXR1cm4gISh2LT5hcmNoLnNoYXJlZC0+bXNyICYgTVNSX1dFKSB8fA0KPiA+PiAtICAgICAgICAg
ICAhISh2LT5hcmNoLnBlbmRpbmdfZXhjZXB0aW9ucykgfHwNCj4gPj4gLSAgICAgICAgICAgdi0+
cmVxdWVzdHM7DQo+ID4+ICsgICAgYm9vbCByZXQgPSAhKHYtPmFyY2guc2hhcmVkLT5tc3IgJiBN
U1JfV0UpIHx8DQo+ID4+ICsgICAgICAgICAgICEhKHYtPmFyY2gucGVuZGluZ19leGNlcHRpb25z
KSB8fA0KPiA+PiArICAgICAgICAgICB2LT5yZXF1ZXN0czsNCj4gPj4gKw0KPiA+PiArICAgIHJl
dCA9IHJldCB8fCBrdm1wcGNfZ2V0X3Rzcl93cmModik7DQo+ID4NCj4gPiBXaHkgZG8geW91IG5l
ZWQgdG8gZGVjbGFyZSB0aGUgY3B1IGFzIG5vbi1ydW5uYWJsZSB3aGVuIGEgd2F0Y2hkb2cNCj4g
PiBldmVudCBvY2N1cmVkPw0KPiANCj4gSXQncyB0aGUgb3RoZXIgd2F5IGFyb3VuZCAtLSBpdCdz
IGFsd2F5cyBydW5uYWJsZSB3aGVuIGEgd2F0Y2hkb2cgZXhpdCBpcw0KPiBwZW5kaW5nLiAgSXQn
cyBsaWtlIGEgcGVuZGluZyBleGNlcHRpb24uDQoNCldpdGggdGhlIGFib3ZlIGNoZWNrLCBBcmUg
d2UgdHJ5aW5nIHRvIGhhbmRsZSB0aGUgY2FzZSB3aGVyZSB3YXRjaGRvZyBpbnRlcnJ1cHQgYml0
IGluIHBlbmRpbmdfZXhjZXB0aW9uIGlzIGNsZWFyZWQgYnkgZ3Vlc3QgYWZ0ZXIgZmluYWwgZXhw
aXJ5IGJ1dCBiZWZvcmUgdGhlIHFlbXUgZXhpdD8gQW5kIHdlIHdhbnQgdGhhdCBpZiBUU1IuV1JT
IHVwZGF0ZSB3aW5zIHRoZSByYWNlIHdpdGggY2xlYXJpbmcgb2Ygd2F0Y2hkb2cgaW50ZXJydXB0
IGNvbmRpdGlvbiBmcm9tIGd1ZXN0IHRoZW4gYW55d2F5cyBsZXQgUUVNVSBleGl0IHdpdGggcmVh
c29uIEtWTV9FWElUX1dEVD8gDQoNCldoYXQgaWYgd2UgZG8gbm90IGFsbG93IGd1ZXN0IGNsZWFy
IHdhdGNoZG9nIGludGVycnVwdCBjb25kaXRpb24gaWYgZmluYWwgZXhwaXJ5IGFscmVhZHkgaGFw
cGVuZWQ/IEFuZCBsZXQgUUVNVSBjbGVhciBUU1IuRU5XIHwgVFNSLldJUyB8IFRTUi5XUlMgaW4g
YWxsIHNlbGVjdGVkIHBvbGljeSAoZGVidWcgfCByZXNldCBldGMpLg0KDQpUaGFua3MNCi1CaGFy
YXQNCg0KPiANCj4gPj4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgva3ZtLmggYi9pbmNsdWRl
L2xpbnV4L2t2bS5oIGluZGV4DQo+ID4+IDJjZTA5YWEuLmI5ZmRiNTIgMTAwNjQ0DQo+ID4+IC0t
LSBhL2luY2x1ZGUvbGludXgva3ZtLmgNCj4gPj4gKysrIGIvaW5jbHVkZS9saW51eC9rdm0uaA0K
PiA+PiBAQCAtMTYzLDYgKzE2Myw3IEBAIHN0cnVjdCBrdm1fcGl0X2NvbmZpZyB7DQo+ID4+ICAj
ZGVmaW5lIEtWTV9FWElUX09TSSAgICAgICAgICAgICAgMTgNCj4gPj4gICNkZWZpbmUgS1ZNX0VY
SVRfUEFQUl9IQ0FMTCAgICAgIDE5DQo+ID4+ICAjZGVmaW5lIEtWTV9FWElUX1MzOTBfVUNPTlRS
T0wgICAgICAyMA0KPiA+PiArI2RlZmluZSBLVk1fRVhJVF9XRFQgICAgICAgICAgICAgIDIxDQo+
ID4NCj4gPiBQbGVhc2UgbWFrZSB0aGlzIGEgbW9yZSBnZW5lcmljIEtWTV9FWElUX1dBVENIRE9H
IHNvIHRoYXQgb3RoZXIgYXJjaHMNCj4gPiBtYXkgaGF2ZSB0aGUgY2hhbmNlIHRvIHJldXNlIGl0
Lg0KPiANCj4gV0RUIGlzIGdlbmVyaWMgKDg1IG9mIDExNSBmaWxlcyBpbiBkcml2ZXJzL3dhdGNo
ZG9nLyBjb250YWluICJ3ZHQiIGluIHRoZWlyDQo+IG5hbWVzKSwganVzdCBvdmVybHkgYWJicmV2
aWF0ZWQuICBLVk1fRVhJVF9XQVRDSERPRyBpcyBtb3JlIHJlYWRhYmxlLg0KPiANCj4gLVNjb3R0
DQo
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 11:31 ` Bhushan Bharat-R65777
0 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 11:31 UTC (permalink / raw)
To: Wood Scott-B07421, Alexander Graf
Cc: kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, July 17, 2012 6:32 AM
> To: Alexander Graf
> Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> bharatb.yadav@gmail.com; Bhushan Bharat-R65777; Benjamin Herrenschmidt; Kumar
> Gala
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/16/2012 12:18 PM, Alexander Graf wrote:
> >> +/*
> >> + * Return the number of jiffies until the next timeout. If the
> > timeout is
> >> + * longer than the NEXT_TIMER_MAX_DELTA, that
> >
> > then?
> >
> >> return NEXT_TIMER_MAX_DELTA
> >> + * instead.
> >
> > I can read code.
>
> Come on, it's not exactly x++; /* add one to x */
>
> It's faster to read code (as well as know the constraints within which you can
> modify it without having to spend a lot of time digesting all the callers' use
> cases) when you have a high level description of its interface contract, and can
> be selective about when to zoom in to the details. Linux kernel code tends to
> be bad about this.
>
> > The important piece of information in the comment is
> > missing: The reason.
>
> The reason for what? Why you want to know the next timeout? That's the
> caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>
> >> +void kvmppc_watchdog_func(unsigned long data) {
> >> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >> + u32 tsr, new_tsr;
> >> + int final;
> >> +
> >> + do {
> >> + new_tsr = tsr = vcpu->arch.tsr;
> >> + final = 0;
> >> +
> >> + /* Time out event */
> >> + if (tsr & TSR_ENW) {
> >> + if (tsr & TSR_WIS) {
> >> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> >> + (vcpu->arch.tcr & TCR_WRC_MASK);
> >> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
> >
> > Can't we just poke the vcpu to exit the VM and do the above on its own?
>
> We've discussed this before. TSR updates are done via atomics, and we send a
> request for the vcpu to act on the result. This is how the decrementer works.
>
> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>
> > This is the watdog expired case, right?
>
> Final expiration, yes.
>
> > I'd also prefer to have an
> > explicit event for the expiry than a special TSR check in the main loop.
>
> So check TSR[WRS] in update_timer_ints(), and have it queue a pseudoexception?
> That would eliminate the need to change the runnable function.
>
> > Also call me sceptic on the reset of tcr. If our user space watchdog
> > event is "write a message", then we essentially want to hide the fact
> > that the watchdog expired from the guest, right? In that case, the
> > second time-out wouldn't do anything guest visible.
>
> This was probably copied straight out of the hardware documentation, which
> explicitly says TCR[WRC] gets set to zero on final expiration (as part of
> reset). We should leave that part up to userspace. It definitely shouldn't be
> done inside the cmpxchg loop (or from interrupt context -- only TSR gets the
> atomic treatment). I don't think the read of TCR outside vcpu context is a
> problem, though.
>
> >> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
> >> - return !(v->arch.shared->msr & MSR_WE) ||
> >> - !!(v->arch.pending_exceptions) ||
> >> - v->requests;
> >> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> >> + !!(v->arch.pending_exceptions) ||
> >> + v->requests;
> >> +
> >> + ret = ret || kvmppc_get_tsr_wrc(v);
> >
> > Why do you need to declare the cpu as non-runnable when a watchdog
> > event occured?
>
> It's the other way around -- it's always runnable when a watchdog exit is
> pending. It's like a pending exception.
With the above check, Are we trying to handle the case where watchdog interrupt bit in pending_exception is cleared by guest after final expiry but before the qemu exit? And we want that if TSR.WRS update wins the race with clearing of watchdog interrupt condition from guest then anyways let QEMU exit with reason KVM_EXIT_WDT?
What if we do not allow guest clear watchdog interrupt condition if final expiry already happened? And let QEMU clear TSR.ENW | TSR.WIS | TSR.WRS in all selected policy (debug | reset etc).
Thanks
-Bharat
>
> >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h index
> >> 2ce09aa..b9fdb52 100644
> >> --- a/include/linux/kvm.h
> >> +++ b/include/linux/kvm.h
> >> @@ -163,6 +163,7 @@ struct kvm_pit_config {
> >> #define KVM_EXIT_OSI 18
> >> #define KVM_EXIT_PAPR_HCALL 19
> >> #define KVM_EXIT_S390_UCONTROL 20
> >> +#define KVM_EXIT_WDT 21
> >
> > Please make this a more generic KVM_EXIT_WATCHDOG so that other archs
> > may have the chance to reuse it.
>
> WDT is generic (85 of 115 files in drivers/watchdog/ contain "wdt" in their
> names), just overly abbreviated. KVM_EXIT_WATCHDOG is more readable.
>
> -Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 9:57 ` Bhushan Bharat-R65777
@ 2012-07-17 12:51 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 12:51 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 12:50 PM
>> To: Wood Scott-B07421
>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Bhushan Bharat-R65777; Benjamin Herrenschmidt; Kumar
>> Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>>
>>
>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>
>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>> +/*
>>>>> + * Return the number of jiffies until the next timeout. If the
>>>> timeout is
>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>> then?
>>>>
>>>>> return NEXT_TIMER_MAX_DELTA
>>>>> + * instead.
>>>> I can read code.
>>> Come on, it's not exactly x++; /* add one to x */
>>>
>>> It's faster to read code (as well as know the constraints within which
>>> you can modify it without having to spend a lot of time digesting all
>>> the callers' use cases) when you have a high level description of its
>>> interface contract, and can be selective about when to zoom in to the
>>> details. Linux kernel code tends to be bad about this.
>> Yeah, not opposed to leave that part in :).
>>
>>>> The important piece of information in the comment is
>>>> missing: The reason.
>>> The reason for what? Why you want to know the next timeout? That's
>>> the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>> Why we use the limit. IIRC it was explained in the last thread, just didn't make
>> its way into the comment.
> Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a purpose, so the comment described the puspose).
> Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h), so removed the comment.
Ah, ok. Just saying, if you comment on some mechanism, like you did
here, please also include the reasoning behind it. For example
Do foo if x is true.
isn't particularly helpful. However
Do foo if x is true because the bar API will break with high values
is very helpful. It includes the action and reason of the code :).
Alternatively, to me the same as above would be
/* bar API will break with high values */
if (x)
do(foo)
because in this case the code is the action description. Either variant
works fine for me.
>
>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>> + u32 tsr, new_tsr;
>>>>> + int final;
>>>>> +
>>>>> + do {
>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>> + final = 0;
>>>>> +
>>>>> + /* Time out event */
>>>>> + if (tsr & TSR_ENW) {
>>>>> + if (tsr & TSR_WIS) {
>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>> We've discussed this before. TSR updates are done via atomics, and we
>>> send a request for the vcpu to act on the result. This is how the
>>> decrementer works.
>>>
>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>> Yeah, the major difference to the dec is the atomicity of the whole thing. Dec
>> changes one bit to enable the interrupt line. The final expiration is more
>> complex.
> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
Final expiration sets TCR. TSR should be ok.
>
>>>> This is the watdog expired case, right?
>>> Final expiration, yes.
>>>
>>>> I'd also prefer to have an
>>>> explicit event for the expiry than a special TSR check in the main loop.
>>> So check TSR[WRS] in update_timer_ints(), and have it queue a
>>> pseudoexception?
>> Or here.
> Do we mean define a sudo IROPRIO for final expiry.
We can also define an event that is sent through kvm_make_request. But
yeah, IRQPRIO is probably easier. Not 100% sure which way is better
though. Avi, any preferences?
>
>>> That would eliminate the need to change the runnable function.
>>>
>>>> Also call me sceptic on the reset of tcr. If our user space watchdog
>>>> event is "write a message", then we essentially want to hide the fact
>>>> that the watchdog expired from the guest, right? In that case, the
>>>> second time-out wouldn't do anything guest visible.
>>> This was probably copied straight out of the hardware documentation,
>>> which explicitly says TCR[WRC] gets set to zero on final expiration
>>> (as part of reset). We should leave that part up to userspace. It
>>> definitely shouldn't be done inside the cmpxchg loop (or from
>>> interrupt context -- only TSR gets the atomic treatment). I don't
>>> think the read of TCR outside vcpu context is a problem, though.
>> Yeah, but it'd just make me less wary if only the vcpu thread itself accesses
>> vcpu internal registers that aren't irq state and thus designed for it (TSR).
>>
>> But yes, the most flexible way would probably be to do it from user space. Since
>> it'd happen from within the vcpu context of user space, we can also guarantee
>> that the TCR access is atomic.
> Yes, will move the tcr.wrc clearing to userspace.
>
>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>> - !!(v->arch.pending_exceptions) ||
>>>>> - v->requests;
>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>> + !!(v->arch.pending_exceptions) ||
>>>>> + v->requests;
>>>>> +
>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>> event occured?
>>> It's the other way around -- it's always runnable when a watchdog exit
>>> is pending. It's like a pending exception.
>> Ah, so yes, we should just shove it into pending_exceptions then.
> Pending_exception? You mean sudo again here as said earlier.
pseudo :). Yeah, I'm referring to above. No need to check 500 different
conditions when we already have a bitmap that says "event is pending".
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 12:51 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 12:51 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 12:50 PM
>> To: Wood Scott-B07421
>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Bhushan Bharat-R65777; Benjamin Herrenschmidt; Kumar
>> Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>>
>>
>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>
>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>> +/*
>>>>> + * Return the number of jiffies until the next timeout. If the
>>>> timeout is
>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>> then?
>>>>
>>>>> return NEXT_TIMER_MAX_DELTA
>>>>> + * instead.
>>>> I can read code.
>>> Come on, it's not exactly x++; /* add one to x */
>>>
>>> It's faster to read code (as well as know the constraints within which
>>> you can modify it without having to spend a lot of time digesting all
>>> the callers' use cases) when you have a high level description of its
>>> interface contract, and can be selective about when to zoom in to the
>>> details. Linux kernel code tends to be bad about this.
>> Yeah, not opposed to leave that part in :).
>>
>>>> The important piece of information in the comment is
>>>> missing: The reason.
>>> The reason for what? Why you want to know the next timeout? That's
>>> the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>> Why we use the limit. IIRC it was explained in the last thread, just didn't make
>> its way into the comment.
> Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a purpose, so the comment described the puspose).
> Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h), so removed the comment.
Ah, ok. Just saying, if you comment on some mechanism, like you did
here, please also include the reasoning behind it. For example
Do foo if x is true.
isn't particularly helpful. However
Do foo if x is true because the bar API will break with high values
is very helpful. It includes the action and reason of the code :).
Alternatively, to me the same as above would be
/* bar API will break with high values */
if (x)
do(foo)
because in this case the code is the action description. Either variant
works fine for me.
>
>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>> + u32 tsr, new_tsr;
>>>>> + int final;
>>>>> +
>>>>> + do {
>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>> + final = 0;
>>>>> +
>>>>> + /* Time out event */
>>>>> + if (tsr & TSR_ENW) {
>>>>> + if (tsr & TSR_WIS) {
>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>> We've discussed this before. TSR updates are done via atomics, and we
>>> send a request for the vcpu to act on the result. This is how the
>>> decrementer works.
>>>
>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>> Yeah, the major difference to the dec is the atomicity of the whole thing. Dec
>> changes one bit to enable the interrupt line. The final expiration is more
>> complex.
> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
Final expiration sets TCR. TSR should be ok.
>
>>>> This is the watdog expired case, right?
>>> Final expiration, yes.
>>>
>>>> I'd also prefer to have an
>>>> explicit event for the expiry than a special TSR check in the main loop.
>>> So check TSR[WRS] in update_timer_ints(), and have it queue a
>>> pseudoexception?
>> Or here.
> Do we mean define a sudo IROPRIO for final expiry.
We can also define an event that is sent through kvm_make_request. But
yeah, IRQPRIO is probably easier. Not 100% sure which way is better
though. Avi, any preferences?
>
>>> That would eliminate the need to change the runnable function.
>>>
>>>> Also call me sceptic on the reset of tcr. If our user space watchdog
>>>> event is "write a message", then we essentially want to hide the fact
>>>> that the watchdog expired from the guest, right? In that case, the
>>>> second time-out wouldn't do anything guest visible.
>>> This was probably copied straight out of the hardware documentation,
>>> which explicitly says TCR[WRC] gets set to zero on final expiration
>>> (as part of reset). We should leave that part up to userspace. It
>>> definitely shouldn't be done inside the cmpxchg loop (or from
>>> interrupt context -- only TSR gets the atomic treatment). I don't
>>> think the read of TCR outside vcpu context is a problem, though.
>> Yeah, but it'd just make me less wary if only the vcpu thread itself accesses
>> vcpu internal registers that aren't irq state and thus designed for it (TSR).
>>
>> But yes, the most flexible way would probably be to do it from user space. Since
>> it'd happen from within the vcpu context of user space, we can also guarantee
>> that the TCR access is atomic.
> Yes, will move the tcr.wrc clearing to userspace.
>
>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>> - !!(v->arch.pending_exceptions) ||
>>>>> - v->requests;
>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>> + !!(v->arch.pending_exceptions) ||
>>>>> + v->requests;
>>>>> +
>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>> event occured?
>>> It's the other way around -- it's always runnable when a watchdog exit
>>> is pending. It's like a pending exception.
>> Ah, so yes, we should just shove it into pending_exceptions then.
> Pending_exception? You mean sudo again here as said earlier.
pseudo :). Yeah, I'm referring to above. No need to check 500 different
conditions when we already have a bitmap that says "event is pending".
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 12:51 ` Alexander Graf
(?)
@ 2012-07-17 13:15 ` Bhushan Bharat-R65777
2012-07-17 14:01 ` Alexander Graf
-1 siblings, 1 reply; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 13:15 UTC (permalink / raw)
To: Alexander Graf
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Tuesday, July 17, 2012 6:22 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Tuesday, July 17, 2012 12:50 PM
> >> To: Wood Scott-B07421
> >> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
> >> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
> >> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
> >> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>
> >>
> >>
> >> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
> >>
> >>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
> >>>>> +/*
> >>>>> + * Return the number of jiffies until the next timeout. If the
> >>>> timeout is
> >>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
> >>>> then?
> >>>>
> >>>>> return NEXT_TIMER_MAX_DELTA
> >>>>> + * instead.
> >>>> I can read code.
> >>> Come on, it's not exactly x++; /* add one to x */
> >>>
> >>> It's faster to read code (as well as know the constraints within
> >>> which you can modify it without having to spend a lot of time
> >>> digesting all the callers' use cases) when you have a high level
> >>> description of its interface contract, and can be selective about
> >>> when to zoom in to the details. Linux kernel code tends to be bad about
> this.
> >> Yeah, not opposed to leave that part in :).
> >>
> >>>> The important piece of information in the comment is
> >>>> missing: The reason.
> >>> The reason for what? Why you want to know the next timeout? That's
> >>> the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
> >> Why we use the limit. IIRC it was explained in the last thread, just
> >> didn't make its way into the comment.
> > Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a
> purpose, so the comment described the puspose).
> > Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h),
> so removed the comment.
>
> Ah, ok. Just saying, if you comment on some mechanism, like you did here, please
> also include the reasoning behind it. For example
>
> Do foo if x is true.
>
> isn't particularly helpful. However
>
> Do foo if x is true because the bar API will break with high values
>
> is very helpful. It includes the action and reason of the code :).
> Alternatively, to me the same as above would be
>
> /* bar API will break with high values */
> if (x)
> do(foo)
>
> because in this case the code is the action description. Either variant works
> fine for me.
Ok :)
>
> >
> >>>>> +void kvmppc_watchdog_func(unsigned long data) {
> >>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>>> + u32 tsr, new_tsr;
> >>>>> + int final;
> >>>>> +
> >>>>> + do {
> >>>>> + new_tsr = tsr = vcpu->arch.tsr;
> >>>>> + final = 0;
> >>>>> +
> >>>>> + /* Time out event */
> >>>>> + if (tsr & TSR_ENW) {
> >>>>> + if (tsr & TSR_WIS) {
> >>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> >>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
> >>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
> >>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
> >>> We've discussed this before. TSR updates are done via atomics, and
> >>> we send a request for the vcpu to act on the result. This is how
> >>> the decrementer works.
> >>>
> >>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
> >> Yeah, the major difference to the dec is the atomicity of the whole
> >> thing. Dec changes one bit to enable the interrupt line. The final
> >> expiration is more complex.
> > Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>
> Final expiration sets TCR. TSR should be ok.
It is clearing some TCR bits :)
Let us move the TCR clearing to userspace (please see below response ^^). Then it is just setting TSR. Right?
>
> >
> >>>> This is the watdog expired case, right?
> >>> Final expiration, yes.
> >>>
> >>>> I'd also prefer to have an
> >>>> explicit event for the expiry than a special TSR check in the main loop.
> >>> So check TSR[WRS] in update_timer_ints(), and have it queue a
> >>> pseudoexception?
> >> Or here.
> > Do we mean define a sudo IROPRIO for final expiry.
>
> We can also define an event that is sent through kvm_make_request. But yeah,
> IRQPRIO is probably easier. Not 100% sure which way is better though. Avi, any
> preferences?
>
> >
> >>> That would eliminate the need to change the runnable function.
> >>>
> >>>> Also call me sceptic on the reset of tcr. If our user space watchdog
> >>>> event is "write a message", then we essentially want to hide the fact
> >>>> that the watchdog expired from the guest, right? In that case, the
> >>>> second time-out wouldn't do anything guest visible.
> >>> This was probably copied straight out of the hardware documentation,
> >>> which explicitly says TCR[WRC] gets set to zero on final expiration
> >>> (as part of reset). We should leave that part up to userspace. It
> >>> definitely shouldn't be done inside the cmpxchg loop (or from
> >>> interrupt context -- only TSR gets the atomic treatment). I don't
> >>> think the read of TCR outside vcpu context is a problem, though.
> >> Yeah, but it'd just make me less wary if only the vcpu thread itself accesses
> >> vcpu internal registers that aren't irq state and thus designed for it (TSR).
> >>
> >> But yes, the most flexible way would probably be to do it from user space.
> Since
> >> it'd happen from within the vcpu context of user space, we can also guarantee
> >> that the TCR access is atomic.
> > Yes, will move the tcr.wrc clearing to userspace.
^^ Here .. It is good to move clearing the TCR to guest.
Thanks
-Bharat
> >
> >>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
> >>>>> - return !(v->arch.shared->msr & MSR_WE) ||
> >>>>> - !!(v->arch.pending_exceptions) ||
> >>>>> - v->requests;
> >>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> >>>>> + !!(v->arch.pending_exceptions) ||
> >>>>> + v->requests;
> >>>>> +
> >>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
> >>>> Why do you need to declare the cpu as non-runnable when a watchdog
> >>>> event occured?
> >>> It's the other way around -- it's always runnable when a watchdog exit
> >>> is pending. It's like a pending exception.
> >> Ah, so yes, we should just shove it into pending_exceptions then.
> > Pending_exception? You mean sudo again here as said earlier.
>
> pseudo :). Yeah, I'm referring to above. No need to check 500 different
> conditions when we already have a bitmap that says "event is pending".
>
>
> Alex
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 13:15 ` Bhushan Bharat-R65777
@ 2012-07-17 14:01 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 14:01 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 6:22 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 12:50 PM
>>>> To: Wood Scott-B07421
>>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
>>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>>
>>>>
>>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>>>
>>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>>>> +/*
>>>>>>> + * Return the number of jiffies until the next timeout. If the
>>>>>> timeout is
>>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>>>> then?
>>>>>>
>>>>>>> return NEXT_TIMER_MAX_DELTA
>>>>>>> + * instead.
>>>>>> I can read code.
>>>>> Come on, it's not exactly x++; /* add one to x */
>>>>>
>>>>> It's faster to read code (as well as know the constraints within
>>>>> which you can modify it without having to spend a lot of time
>>>>> digesting all the callers' use cases) when you have a high level
>>>>> description of its interface contract, and can be selective about
>>>>> when to zoom in to the details. Linux kernel code tends to be bad about
>> this.
>>>> Yeah, not opposed to leave that part in :).
>>>>
>>>>>> The important piece of information in the comment is
>>>>>> missing: The reason.
>>>>> The reason for what? Why you want to know the next timeout? That's
>>>>> the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>>>> Why we use the limit. IIRC it was explained in the last thread, just
>>>> didn't make its way into the comment.
>>> Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a
>> purpose, so the comment described the puspose).
>>> Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h),
>> so removed the comment.
>>
>> Ah, ok. Just saying, if you comment on some mechanism, like you did here, please
>> also include the reasoning behind it. For example
>>
>> Do foo if x is true.
>>
>> isn't particularly helpful. However
>>
>> Do foo if x is true because the bar API will break with high values
>>
>> is very helpful. It includes the action and reason of the code :).
>> Alternatively, to me the same as above would be
>>
>> /* bar API will break with high values */
>> if (x)
>> do(foo)
>>
>> because in this case the code is the action description. Either variant works
>> fine for me.
> Ok :)
>
>>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>>>> + u32 tsr, new_tsr;
>>>>>>> + int final;
>>>>>>> +
>>>>>>> + do {
>>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>>>> + final = 0;
>>>>>>> +
>>>>>>> + /* Time out event */
>>>>>>> + if (tsr & TSR_ENW) {
>>>>>>> + if (tsr & TSR_WIS) {
>>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>>>> We've discussed this before. TSR updates are done via atomics, and
>>>>> we send a request for the vcpu to act on the result. This is how
>>>>> the decrementer works.
>>>>>
>>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>>>> Yeah, the major difference to the dec is the atomicity of the whole
>>>> thing. Dec changes one bit to enable the interrupt line. The final
>>>> expiration is more complex.
>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>> Final expiration sets TCR. TSR should be ok.
> It is clearing some TCR bits :)
>
> Let us move the TCR clearing to userspace (please see below response ^^). Then it is just setting TSR. Right?
Then TSR is still set, right? So we don't set it, it just keeps on being
1. We need to trigger another expired event in that if branch now.
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 14:01 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 14:01 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 6:22 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 12:50 PM
>>>> To: Wood Scott-B07421
>>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
>>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>>
>>>>
>>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>>>
>>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>>>> +/*
>>>>>>> + * Return the number of jiffies until the next timeout. If the
>>>>>> timeout is
>>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>>>> then?
>>>>>>
>>>>>>> return NEXT_TIMER_MAX_DELTA
>>>>>>> + * instead.
>>>>>> I can read code.
>>>>> Come on, it's not exactly x++; /* add one to x */
>>>>>
>>>>> It's faster to read code (as well as know the constraints within
>>>>> which you can modify it without having to spend a lot of time
>>>>> digesting all the callers' use cases) when you have a high level
>>>>> description of its interface contract, and can be selective about
>>>>> when to zoom in to the details. Linux kernel code tends to be bad about
>> this.
>>>> Yeah, not opposed to leave that part in :).
>>>>
>>>>>> The important piece of information in the comment is
>>>>>> missing: The reason.
>>>>> The reason for what? Why you want to know the next timeout? That's
>>>>> the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the limit?
>>>> Why we use the limit. IIRC it was explained in the last thread, just
>>>> didn't make its way into the comment.
>>> Earlier we have a comment on the #define MAX_TIMEOUT (new define added for a
>> purpose, so the comment described the puspose).
>>> Now we uses the generic #define NEX_TIMER_MAX_DELTA (include/linux/timer.h),
>> so removed the comment.
>>
>> Ah, ok. Just saying, if you comment on some mechanism, like you did here, please
>> also include the reasoning behind it. For example
>>
>> Do foo if x is true.
>>
>> isn't particularly helpful. However
>>
>> Do foo if x is true because the bar API will break with high values
>>
>> is very helpful. It includes the action and reason of the code :).
>> Alternatively, to me the same as above would be
>>
>> /* bar API will break with high values */
>> if (x)
>> do(foo)
>>
>> because in this case the code is the action description. Either variant works
>> fine for me.
> Ok :)
>
>>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>>>> + u32 tsr, new_tsr;
>>>>>>> + int final;
>>>>>>> +
>>>>>>> + do {
>>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>>>> + final = 0;
>>>>>>> +
>>>>>>> + /* Time out event */
>>>>>>> + if (tsr & TSR_ENW) {
>>>>>>> + if (tsr & TSR_WIS) {
>>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>>>> We've discussed this before. TSR updates are done via atomics, and
>>>>> we send a request for the vcpu to act on the result. This is how
>>>>> the decrementer works.
>>>>>
>>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>>>> Yeah, the major difference to the dec is the atomicity of the whole
>>>> thing. Dec changes one bit to enable the interrupt line. The final
>>>> expiration is more complex.
>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>> Final expiration sets TCR. TSR should be ok.
> It is clearing some TCR bits :)
>
> Let us move the TCR clearing to userspace (please see below response ^^). Then it is just setting TSR. Right?
Then TSR is still set, right? So we don't set it, it just keeps on being
1. We need to trigger another expired event in that if branch now.
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 14:01 ` Alexander Graf
(?)
@ 2012-07-17 14:13 ` Bhushan Bharat-R65777
2012-07-17 14:35 ` Alexander Graf
-1 siblings, 1 reply; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 14:13 UTC (permalink / raw)
To: Alexander Graf
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Tuesday, July 17, 2012 7:31 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Tuesday, July 17, 2012 6:22 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
> >> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
> >> Herrenschmidt; Kumar Gala; Avi Kivity
> >> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>
> >> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
> >>>> -----Original Message-----
> >>>> From: kvm-ppc-owner@vger.kernel.org
> >>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >>>> Sent: Tuesday, July 17, 2012 12:50 PM
> >>>> To: Wood Scott-B07421
> >>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
> >>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
> >>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
> >>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>>>
> >>>>
> >>>>
> >>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
> >>>>
> >>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
> >>>>>>> +/*
> >>>>>>> + * Return the number of jiffies until the next timeout. If the
> >>>>>> timeout is
> >>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
> >>>>>> then?
> >>>>>>
> >>>>>>> return NEXT_TIMER_MAX_DELTA
> >>>>>>> + * instead.
> >>>>>> I can read code.
> >>>>> Come on, it's not exactly x++; /* add one to x */
> >>>>>
> >>>>> It's faster to read code (as well as know the constraints within
> >>>>> which you can modify it without having to spend a lot of time
> >>>>> digesting all the callers' use cases) when you have a high level
> >>>>> description of its interface contract, and can be selective about
> >>>>> when to zoom in to the details. Linux kernel code tends to be bad
> >>>>> about
> >> this.
> >>>> Yeah, not opposed to leave that part in :).
> >>>>
> >>>>>> The important piece of information in the comment is
> >>>>>> missing: The reason.
> >>>>> The reason for what? Why you want to know the next timeout?
> >>>>> That's the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the
> limit?
> >>>> Why we use the limit. IIRC it was explained in the last thread,
> >>>> just didn't make its way into the comment.
> >>> Earlier we have a comment on the #define MAX_TIMEOUT (new define
> >>> added for a
> >> purpose, so the comment described the puspose).
> >>> Now we uses the generic #define NEX_TIMER_MAX_DELTA
> >>> (include/linux/timer.h),
> >> so removed the comment.
> >>
> >> Ah, ok. Just saying, if you comment on some mechanism, like you did
> >> here, please also include the reasoning behind it. For example
> >>
> >> Do foo if x is true.
> >>
> >> isn't particularly helpful. However
> >>
> >> Do foo if x is true because the bar API will break with high
> >> values
> >>
> >> is very helpful. It includes the action and reason of the code :).
> >> Alternatively, to me the same as above would be
> >>
> >> /* bar API will break with high values */
> >> if (x)
> >> do(foo)
> >>
> >> because in this case the code is the action description. Either
> >> variant works fine for me.
> > Ok :)
> >
> >>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
> >>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>>>>> + u32 tsr, new_tsr;
> >>>>>>> + int final;
> >>>>>>> +
> >>>>>>> + do {
> >>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
> >>>>>>> + final = 0;
> >>>>>>> +
> >>>>>>> + /* Time out event */
> >>>>>>> + if (tsr & TSR_ENW) {
> >>>>>>> + if (tsr & TSR_WIS) {
> >>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> >>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
> >>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
> >>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
> >>>>> We've discussed this before. TSR updates are done via atomics,
> >>>>> and we send a request for the vcpu to act on the result. This is
> >>>>> how the decrementer works.
> >>>>>
> >>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
> >>>> Yeah, the major difference to the dec is the atomicity of the whole
> >>>> thing. Dec changes one bit to enable the interrupt line. The final
> >>>> expiration is more complex.
> >>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
> >> Final expiration sets TCR. TSR should be ok.
> > It is clearing some TCR bits :)
> >
> > Let us move the TCR clearing to userspace (please see below response ^^). Then
> it is just setting TSR. Right?
>
> Then TSR is still set, right? So we don't set it, it just keeps on being 1. We
> need to trigger another expired event in that if branch now.
I am sorry Alex but I did not get you.
What I meant was that you were having concern that dec is atomic while watchdog final expiration (complex) is not atomic, but if we move the TCR.WRC clearing to userspace on final expiration then are you ok with current code?
Thanks
-Bharat
>
>
> Alex
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 14:13 ` Bhushan Bharat-R65777
@ 2012-07-17 14:35 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 14:35 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 7:31 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>>> Sent: Tuesday, July 17, 2012 12:50 PM
>>>>>> To: Wood Scott-B07421
>>>>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
>>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
>>>>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
>>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>
>>>>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>>>>>> +/*
>>>>>>>>> + * Return the number of jiffies until the next timeout. If the
>>>>>>>> timeout is
>>>>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>>>>>> then?
>>>>>>>>
>>>>>>>>> return NEXT_TIMER_MAX_DELTA
>>>>>>>>> + * instead.
>>>>>>>> I can read code.
>>>>>>> Come on, it's not exactly x++; /* add one to x */
>>>>>>>
>>>>>>> It's faster to read code (as well as know the constraints within
>>>>>>> which you can modify it without having to spend a lot of time
>>>>>>> digesting all the callers' use cases) when you have a high level
>>>>>>> description of its interface contract, and can be selective about
>>>>>>> when to zoom in to the details. Linux kernel code tends to be bad
>>>>>>> about
>>>> this.
>>>>>> Yeah, not opposed to leave that part in :).
>>>>>>
>>>>>>>> The important piece of information in the comment is
>>>>>>>> missing: The reason.
>>>>>>> The reason for what? Why you want to know the next timeout?
>>>>>>> That's the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the
>> limit?
>>>>>> Why we use the limit. IIRC it was explained in the last thread,
>>>>>> just didn't make its way into the comment.
>>>>> Earlier we have a comment on the #define MAX_TIMEOUT (new define
>>>>> added for a
>>>> purpose, so the comment described the puspose).
>>>>> Now we uses the generic #define NEX_TIMER_MAX_DELTA
>>>>> (include/linux/timer.h),
>>>> so removed the comment.
>>>>
>>>> Ah, ok. Just saying, if you comment on some mechanism, like you did
>>>> here, please also include the reasoning behind it. For example
>>>>
>>>> Do foo if x is true.
>>>>
>>>> isn't particularly helpful. However
>>>>
>>>> Do foo if x is true because the bar API will break with high
>>>> values
>>>>
>>>> is very helpful. It includes the action and reason of the code :).
>>>> Alternatively, to me the same as above would be
>>>>
>>>> /* bar API will break with high values */
>>>> if (x)
>>>> do(foo)
>>>>
>>>> because in this case the code is the action description. Either
>>>> variant works fine for me.
>>> Ok :)
>>>
>>>>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>>>>>> + u32 tsr, new_tsr;
>>>>>>>>> + int final;
>>>>>>>>> +
>>>>>>>>> + do {
>>>>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>>>>>> + final = 0;
>>>>>>>>> +
>>>>>>>>> + /* Time out event */
>>>>>>>>> + if (tsr & TSR_ENW) {
>>>>>>>>> + if (tsr & TSR_WIS) {
>>>>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>>>>>> We've discussed this before. TSR updates are done via atomics,
>>>>>>> and we send a request for the vcpu to act on the result. This is
>>>>>>> how the decrementer works.
>>>>>>>
>>>>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>>>>>> Yeah, the major difference to the dec is the atomicity of the whole
>>>>>> thing. Dec changes one bit to enable the interrupt line. The final
>>>>>> expiration is more complex.
>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>> Final expiration sets TCR. TSR should be ok.
>>> It is clearing some TCR bits :)
>>>
>>> Let us move the TCR clearing to userspace (please see below response ^^). Then
>> it is just setting TSR. Right?
>>
>> Then TSR is still set, right? So we don't set it, it just keeps on being 1. We
>> need to trigger another expired event in that if branch now.
> I am sorry Alex but I did not get you.
>
> What I meant was that you were having concern that dec is atomic while watchdog final expiration (complex) is not atomic, but if we move the TCR.WRC clearing to userspace on final expiration then are you ok with current code?
If we move the final expiration TCR.WRC clearing out of that branch, you
don't have any condition left to check if we are in the final expiration
path. So you need to set some other bit somewhere (eventually
pending_exceptions probably) that indicates that we need to return to
user space.
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 14:35 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 14:35 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Tuesday, July 17, 2012 7:31 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>>> Sent: Tuesday, July 17, 2012 12:50 PM
>>>>>> To: Wood Scott-B07421
>>>>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
>>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
>>>>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
>>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>
>>>>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
>>>>>>>>> +/*
>>>>>>>>> + * Return the number of jiffies until the next timeout. If the
>>>>>>>> timeout is
>>>>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
>>>>>>>> then?
>>>>>>>>
>>>>>>>>> return NEXT_TIMER_MAX_DELTA
>>>>>>>>> + * instead.
>>>>>>>> I can read code.
>>>>>>> Come on, it's not exactly x++; /* add one to x */
>>>>>>>
>>>>>>> It's faster to read code (as well as know the constraints within
>>>>>>> which you can modify it without having to spend a lot of time
>>>>>>> digesting all the callers' use cases) when you have a high level
>>>>>>> description of its interface contract, and can be selective about
>>>>>>> when to zoom in to the details. Linux kernel code tends to be bad
>>>>>>> about
>>>> this.
>>>>>> Yeah, not opposed to leave that part in :).
>>>>>>
>>>>>>>> The important piece of information in the comment is
>>>>>>>> missing: The reason.
>>>>>>> The reason for what? Why you want to know the next timeout?
>>>>>>> That's the caller's business. Or why we use NEXT_TIMER_MAX_DELTA as the
>> limit?
>>>>>> Why we use the limit. IIRC it was explained in the last thread,
>>>>>> just didn't make its way into the comment.
>>>>> Earlier we have a comment on the #define MAX_TIMEOUT (new define
>>>>> added for a
>>>> purpose, so the comment described the puspose).
>>>>> Now we uses the generic #define NEX_TIMER_MAX_DELTA
>>>>> (include/linux/timer.h),
>>>> so removed the comment.
>>>>
>>>> Ah, ok. Just saying, if you comment on some mechanism, like you did
>>>> here, please also include the reasoning behind it. For example
>>>>
>>>> Do foo if x is true.
>>>>
>>>> isn't particularly helpful. However
>>>>
>>>> Do foo if x is true because the bar API will break with high
>>>> values
>>>>
>>>> is very helpful. It includes the action and reason of the code :).
>>>> Alternatively, to me the same as above would be
>>>>
>>>> /* bar API will break with high values */
>>>> if (x)
>>>> do(foo)
>>>>
>>>> because in this case the code is the action description. Either
>>>> variant works fine for me.
>>> Ok :)
>>>
>>>>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
>>>>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>>>>>> + u32 tsr, new_tsr;
>>>>>>>>> + int final;
>>>>>>>>> +
>>>>>>>>> + do {
>>>>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
>>>>>>>>> + final = 0;
>>>>>>>>> +
>>>>>>>>> + /* Time out event */
>>>>>>>>> + if (tsr & TSR_ENW) {
>>>>>>>>> + if (tsr & TSR_WIS) {
>>>>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
>>>>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
>>>>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
>>>>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
>>>>>>> We've discussed this before. TSR updates are done via atomics,
>>>>>>> and we send a request for the vcpu to act on the result. This is
>>>>>>> how the decrementer works.
>>>>>>>
>>>>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
>>>>>> Yeah, the major difference to the dec is the atomicity of the whole
>>>>>> thing. Dec changes one bit to enable the interrupt line. The final
>>>>>> expiration is more complex.
>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>> Final expiration sets TCR. TSR should be ok.
>>> It is clearing some TCR bits :)
>>>
>>> Let us move the TCR clearing to userspace (please see below response ^^). Then
>> it is just setting TSR. Right?
>>
>> Then TSR is still set, right? So we don't set it, it just keeps on being 1. We
>> need to trigger another expired event in that if branch now.
> I am sorry Alex but I did not get you.
>
> What I meant was that you were having concern that dec is atomic while watchdog final expiration (complex) is not atomic, but if we move the TCR.WRC clearing to userspace on final expiration then are you ok with current code?
If we move the final expiration TCR.WRC clearing out of that branch, you
don't have any condition left to check if we are in the final expiration
path. So you need to set some other bit somewhere (eventually
pending_exceptions probably) that indicates that we need to return to
user space.
Alex
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 14:35 ` Alexander Graf
(?)
@ 2012-07-17 16:10 ` Bhushan Bharat-R65777
-1 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 16:10 UTC (permalink / raw)
To: Alexander Graf
Cc: Wood Scott-B07421, <kvm-ppc@vger.kernel.org>,
<kvm@vger.kernel.org>, <bharatb.yadav@gmail.com>,
Benjamin Herrenschmidt, Kumar Gala, Avi Kivity
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Tuesday, July 17, 2012 8:05 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi Kivity
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Tuesday, July 17, 2012 7:31 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
> >> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
> >> Herrenschmidt; Kumar Gala; Avi Kivity
> >> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>
> >> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
> >>>> -----Original Message-----
> >>>> From: kvm-ppc-owner@vger.kernel.org
> >>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >>>> Sent: Tuesday, July 17, 2012 6:22 PM
> >>>> To: Bhushan Bharat-R65777
> >>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
> >>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
> >>>> Herrenschmidt; Kumar Gala; Avi Kivity
> >>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>>>
> >>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
> >>>>>> -----Original Message-----
> >>>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander
> >>>>>> Graf
> >>>>>> Sent: Tuesday, July 17, 2012 12:50 PM
> >>>>>> To: Wood Scott-B07421
> >>>>>> Cc: Bhushan Bharat-R65777; <kvm-ppc@vger.kernel.org>;
> >>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Bhushan
> >>>>>> Bharat-R65777; Benjamin Herrenschmidt; Kumar Gala
> >>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog
> >>>>>> emulation
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 17.07.2012, at 03:02, Scott Wood <scottwood@freescale.com> wrote:
> >>>>>>
> >>>>>>> On 07/16/2012 12:18 PM, Alexander Graf wrote:
> >>>>>>>>> +/*
> >>>>>>>>> + * Return the number of jiffies until the next timeout. If
> >>>>>>>>> +the
> >>>>>>>> timeout is
> >>>>>>>>> + * longer than the NEXT_TIMER_MAX_DELTA, that
> >>>>>>>> then?
> >>>>>>>>
> >>>>>>>>> return NEXT_TIMER_MAX_DELTA
> >>>>>>>>> + * instead.
> >>>>>>>> I can read code.
> >>>>>>> Come on, it's not exactly x++; /* add one to x */
> >>>>>>>
> >>>>>>> It's faster to read code (as well as know the constraints within
> >>>>>>> which you can modify it without having to spend a lot of time
> >>>>>>> digesting all the callers' use cases) when you have a high level
> >>>>>>> description of its interface contract, and can be selective
> >>>>>>> about when to zoom in to the details. Linux kernel code tends
> >>>>>>> to be bad about
> >>>> this.
> >>>>>> Yeah, not opposed to leave that part in :).
> >>>>>>
> >>>>>>>> The important piece of information in the comment is
> >>>>>>>> missing: The reason.
> >>>>>>> The reason for what? Why you want to know the next timeout?
> >>>>>>> That's the caller's business. Or why we use
> >>>>>>> NEXT_TIMER_MAX_DELTA as the
> >> limit?
> >>>>>> Why we use the limit. IIRC it was explained in the last thread,
> >>>>>> just didn't make its way into the comment.
> >>>>> Earlier we have a comment on the #define MAX_TIMEOUT (new define
> >>>>> added for a
> >>>> purpose, so the comment described the puspose).
> >>>>> Now we uses the generic #define NEX_TIMER_MAX_DELTA
> >>>>> (include/linux/timer.h),
> >>>> so removed the comment.
> >>>>
> >>>> Ah, ok. Just saying, if you comment on some mechanism, like you did
> >>>> here, please also include the reasoning behind it. For example
> >>>>
> >>>> Do foo if x is true.
> >>>>
> >>>> isn't particularly helpful. However
> >>>>
> >>>> Do foo if x is true because the bar API will break with high
> >>>> values
> >>>>
> >>>> is very helpful. It includes the action and reason of the code :).
> >>>> Alternatively, to me the same as above would be
> >>>>
> >>>> /* bar API will break with high values */
> >>>> if (x)
> >>>> do(foo)
> >>>>
> >>>> because in this case the code is the action description. Either
> >>>> variant works fine for me.
> >>> Ok :)
> >>>
> >>>>>>>>> +void kvmppc_watchdog_func(unsigned long data) {
> >>>>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>>>>>>> + u32 tsr, new_tsr;
> >>>>>>>>> + int final;
> >>>>>>>>> +
> >>>>>>>>> + do {
> >>>>>>>>> + new_tsr = tsr = vcpu->arch.tsr;
> >>>>>>>>> + final = 0;
> >>>>>>>>> +
> >>>>>>>>> + /* Time out event */
> >>>>>>>>> + if (tsr & TSR_ENW) {
> >>>>>>>>> + if (tsr & TSR_WIS) {
> >>>>>>>>> + new_tsr = (tsr & ~TCR_WRC_MASK) |
> >>>>>>>>> + (vcpu->arch.tcr & TCR_WRC_MASK);
> >>>>>>>>> + vcpu->arch.tcr &= ~TCR_WRC_MASK;
> >>>>>>>> Can't we just poke the vcpu to exit the VM and do the above on its own?
> >>>>>>> We've discussed this before. TSR updates are done via atomics,
> >>>>>>> and we send a request for the vcpu to act on the result. This
> >>>>>>> is how the decrementer works.
> >>>>>>>
> >>>>>>> http://www.spinics.net/lists/kvm-ppc/msg03169.html
> >>>>>> Yeah, the major difference to the dec is the atomicity of the
> >>>>>> whole thing. Dec changes one bit to enable the interrupt line.
> >>>>>> The final expiration is more complex.
> >>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
> >>>> Final expiration sets TCR. TSR should be ok.
> >>> It is clearing some TCR bits :)
> >>>
> >>> Let us move the TCR clearing to userspace (please see below response
> >>> ^^). Then
> >> it is just setting TSR. Right?
> >>
> >> Then TSR is still set, right? So we don't set it, it just keeps on
> >> being 1. We need to trigger another expired event in that if branch now.
> > I am sorry Alex but I did not get you.
> >
> > What I meant was that you were having concern that dec is atomic while
> watchdog final expiration (complex) is not atomic, but if we move the TCR.WRC
> clearing to userspace on final expiration then are you ok with current code?
>
> If we move the final expiration TCR.WRC clearing out of that branch, you don't
> have any condition left to check if we are in the final expiration path. So you
> need to set some other bit somewhere (eventually pending_exceptions probably)
> that indicates that we need to return to user space.
Thanks Alex...
We still have TSR.WRS (TCR.WRC is different if you are getting confused with that), we can use that as a condition for final expiration (the way we are using currently in code).
-Bharat
>
>
> Alex
>
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 14:35 ` Alexander Graf
@ 2012-07-17 16:27 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 16:27 UTC (permalink / raw)
To: Alexander Graf
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 09:35 AM, Alexander Graf wrote:
> On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>>
>>> -----Original Message-----
>>> From: kvm-ppc-owner@vger.kernel.org
>>> [mailto:kvm-ppc-owner@vger.kernel.org] On
>>> Behalf Of Alexander Graf
>>> Sent: Tuesday, July 17, 2012 7:31 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi
>>> Kivity
>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>
>>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>
>>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>>> Final expiration sets TCR. TSR should be ok.
>>>> It is clearing some TCR bits :)
>>>>
>>>> Let us move the TCR clearing to userspace (please see below response
>>>> ^^). Then
>>> it is just setting TSR. Right?
>>>
>>> Then TSR is still set, right? So we don't set it, it just keeps on
>>> being 1. We
>>> need to trigger another expired event in that if branch now.
>> I am sorry Alex but I did not get you.
>>
>> What I meant was that you were having concern that dec is atomic while
>> watchdog final expiration (complex) is not atomic, but if we move the
>> TCR.WRC clearing to userspace on final expiration then are you ok with
>> current code?
>
> If we move the final expiration TCR.WRC clearing out of that branch, you
> don't have any condition left to check if we are in the final expiration
> path.
I don't follow.
Determining whether we're on final expiration is based only on the
previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
to determine whether we need to actually exit to QEMU (as opposed to a
final expiration with no action, which still should result in the timer
being stopped), but I don't see any problematic races there as long as
we don't try to update TCR from that context.
> So you need to set some other bit somewhere (eventually
> pending_exceptions probably) that indicates that we need to return to
> user space.
We should do that to simplify other parts of the code, but I'm not sure
what it has to do with TCR[WRC].
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 16:27 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 16:27 UTC (permalink / raw)
To: Alexander Graf
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 09:35 AM, Alexander Graf wrote:
> On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>>
>>> -----Original Message-----
>>> From: kvm-ppc-owner@vger.kernel.org
>>> [mailto:kvm-ppc-owner@vger.kernel.org] On
>>> Behalf Of Alexander Graf
>>> Sent: Tuesday, July 17, 2012 7:31 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi
>>> Kivity
>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>
>>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>
>>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>>> Final expiration sets TCR. TSR should be ok.
>>>> It is clearing some TCR bits :)
>>>>
>>>> Let us move the TCR clearing to userspace (please see below response
>>>> ^^). Then
>>> it is just setting TSR. Right?
>>>
>>> Then TSR is still set, right? So we don't set it, it just keeps on
>>> being 1. We
>>> need to trigger another expired event in that if branch now.
>> I am sorry Alex but I did not get you.
>>
>> What I meant was that you were having concern that dec is atomic while
>> watchdog final expiration (complex) is not atomic, but if we move the
>> TCR.WRC clearing to userspace on final expiration then are you ok with
>> current code?
>
> If we move the final expiration TCR.WRC clearing out of that branch, you
> don't have any condition left to check if we are in the final expiration
> path.
I don't follow.
Determining whether we're on final expiration is based only on the
previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
to determine whether we need to actually exit to QEMU (as opposed to a
final expiration with no action, which still should result in the timer
being stopped), but I don't see any problematic races there as long as
we don't try to update TCR from that context.
> So you need to set some other bit somewhere (eventually
> pending_exceptions probably) that indicates that we need to return to
> user space.
We should do that to simplify other parts of the code, but I'm not sure
what it has to do with TCR[WRC].
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 11:31 ` Bhushan Bharat-R65777
@ 2012-07-17 16:37 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 16:37 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>> - !!(v->arch.pending_exceptions) ||
>>>> - v->requests;
>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>> + !!(v->arch.pending_exceptions) ||
>>>> + v->requests;
>>>> +
>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>
>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>> event occured?
>>
>> It's the other way around -- it's always runnable when a watchdog exit is
>> pending. It's like a pending exception.
>
> With the above check, Are we trying to handle the case where watchdog
> interrupt bit in pending_exception is cleared by guest after final
> expiry but before the qemu exit?
No, we're just trying to test the actual condition we want to exit on.
The watchdog interrupt might be masked (either with WIE or CE).
> And we want that if TSR.WRS update
> wins the race with clearing of watchdog interrupt condition from
> guest then anyways let QEMU exit with reason KVM_EXIT_WDT?
What race? If ENW and WIS are both set when the watchdog timer fires,
it's a final expiration. It's irrelevant what happens to WIS after that
point, before enforcement kicks in.
> What if we do not allow guest clear watchdog interrupt condition if
> final expiry already happened?
What would that solve?
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 16:37 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 16:37 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>> - !!(v->arch.pending_exceptions) ||
>>>> - v->requests;
>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>> + !!(v->arch.pending_exceptions) ||
>>>> + v->requests;
>>>> +
>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>
>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>> event occured?
>>
>> It's the other way around -- it's always runnable when a watchdog exit is
>> pending. It's like a pending exception.
>
> With the above check, Are we trying to handle the case where watchdog
> interrupt bit in pending_exception is cleared by guest after final
> expiry but before the qemu exit?
No, we're just trying to test the actual condition we want to exit on.
The watchdog interrupt might be masked (either with WIE or CE).
> And we want that if TSR.WRS update
> wins the race with clearing of watchdog interrupt condition from
> guest then anyways let QEMU exit with reason KVM_EXIT_WDT?
What race? If ENW and WIS are both set when the watchdog timer fires,
it's a final expiration. It's irrelevant what happens to WIS after that
point, before enforcement kicks in.
> What if we do not allow guest clear watchdog interrupt condition if
> final expiry already happened?
What would that solve?
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 16:27 ` Scott Wood
@ 2012-07-17 16:51 ` Alexander Graf
-1 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 16:51 UTC (permalink / raw)
To: Scott Wood
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 06:27 PM, Scott Wood wrote:
> On 07/17/2012 09:35 AM, Alexander Graf wrote:
>> On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On
>>>> Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 7:31 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>>>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi
>>>> Kivity
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>>
>>>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>>>> Final expiration sets TCR. TSR should be ok.
>>>>> It is clearing some TCR bits :)
>>>>>
>>>>> Let us move the TCR clearing to userspace (please see below response
>>>>> ^^). Then
>>>> it is just setting TSR. Right?
>>>>
>>>> Then TSR is still set, right? So we don't set it, it just keeps on
>>>> being 1. We
>>>> need to trigger another expired event in that if branch now.
>>> I am sorry Alex but I did not get you.
>>>
>>> What I meant was that you were having concern that dec is atomic while
>>> watchdog final expiration (complex) is not atomic, but if we move the
>>> TCR.WRC clearing to userspace on final expiration then are you ok with
>>> current code?
>> If we move the final expiration TCR.WRC clearing out of that branch, you
>> don't have any condition left to check if we are in the final expiration
>> path.
> I don't follow.
>
> Determining whether we're on final expiration is based only on the
> previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
> to determine whether we need to actually exit to QEMU (as opposed to a
> final expiration with no action, which still should result in the timer
> being stopped), but I don't see any problematic races there as long as
> we don't try to update TCR from that context.
Yeah, what I was trying to say is that I would like to keep the TSR
state unmodified for the final expiry case. If user space decides to not
act upon it, it should be able to call into VCPU_RUN and have the same
behavior as if the watchdog never expired.
So there needs to be a check whether we return to user space based on
TCR, yes. But we shouldn't modify TSR or TCR in that branch. We only
trigger an event saying "return to user space with a watchdog expired exit".
Then user space can decide whether to act on it. If it decides to ignore
it, we don't have an oddball TSR.WRS state lingering around that'd need
clearing.
That way we can put a single if (watchdog_enabled) on the qemu bubble-up
event injection. If it's not enabled, it'd be as if TCR.WRC is 0 and the
watchdog expired without action. If it's enabled, user space can decide
to fire it up again if it deems it necessary.
Alex
>
>> So you need to set some other bit somewhere (eventually
>> pending_exceptions probably) that indicates that we need to return to
>> user space.
> We should do that to simplify other parts of the code, but I'm not sure
> what it has to do with TCR[WRC].
>
> -Scott
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 16:51 ` Alexander Graf
0 siblings, 0 replies; 38+ messages in thread
From: Alexander Graf @ 2012-07-17 16:51 UTC (permalink / raw)
To: Scott Wood
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 06:27 PM, Scott Wood wrote:
> On 07/17/2012 09:35 AM, Alexander Graf wrote:
>> On 07/17/2012 04:13 PM, Bhushan Bharat-R65777 wrote:
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On
>>>> Behalf Of Alexander Graf
>>>> Sent: Tuesday, July 17, 2012 7:31 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>; <kvm@vger.kernel.org>;
>>>> <bharatb.yadav@gmail.com>; Benjamin Herrenschmidt; Kumar Gala; Avi
>>>> Kivity
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 03:15 PM, Bhushan Bharat-R65777 wrote:
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>>> Sent: Tuesday, July 17, 2012 6:22 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: Wood Scott-B07421; <kvm-ppc@vger.kernel.org>;
>>>>>> <kvm@vger.kernel.org>; <bharatb.yadav@gmail.com>; Benjamin
>>>>>> Herrenschmidt; Kumar Gala; Avi Kivity
>>>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>>>
>>>>>> On 07/17/2012 11:57 AM, Bhushan Bharat-R65777 wrote:
>>>>>>> Is not setting the TSR.WRS atomic here (cmpxchg() will handle this)?
>>>>>> Final expiration sets TCR. TSR should be ok.
>>>>> It is clearing some TCR bits :)
>>>>>
>>>>> Let us move the TCR clearing to userspace (please see below response
>>>>> ^^). Then
>>>> it is just setting TSR. Right?
>>>>
>>>> Then TSR is still set, right? So we don't set it, it just keeps on
>>>> being 1. We
>>>> need to trigger another expired event in that if branch now.
>>> I am sorry Alex but I did not get you.
>>>
>>> What I meant was that you were having concern that dec is atomic while
>>> watchdog final expiration (complex) is not atomic, but if we move the
>>> TCR.WRC clearing to userspace on final expiration then are you ok with
>>> current code?
>> If we move the final expiration TCR.WRC clearing out of that branch, you
>> don't have any condition left to check if we are in the final expiration
>> path.
> I don't follow.
>
> Determining whether we're on final expiration is based only on the
> previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
> to determine whether we need to actually exit to QEMU (as opposed to a
> final expiration with no action, which still should result in the timer
> being stopped), but I don't see any problematic races there as long as
> we don't try to update TCR from that context.
Yeah, what I was trying to say is that I would like to keep the TSR
state unmodified for the final expiry case. If user space decides to not
act upon it, it should be able to call into VCPU_RUN and have the same
behavior as if the watchdog never expired.
So there needs to be a check whether we return to user space based on
TCR, yes. But we shouldn't modify TSR or TCR in that branch. We only
trigger an event saying "return to user space with a watchdog expired exit".
Then user space can decide whether to act on it. If it decides to ignore
it, we don't have an oddball TSR.WRS state lingering around that'd need
clearing.
That way we can put a single if (watchdog_enabled) on the qemu bubble-up
event injection. If it's not enabled, it'd be as if TCR.WRC is 0 and the
watchdog expired without action. If it's enabled, user space can decide
to fire it up again if it deems it necessary.
Alex
>
>> So you need to set some other bit somewhere (eventually
>> pending_exceptions probably) that indicates that we need to return to
>> user space.
> We should do that to simplify other parts of the code, but I'm not sure
> what it has to do with TCR[WRC].
>
> -Scott
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 16:37 ` Scott Wood
@ 2012-07-17 16:56 ` Bhushan Bharat-R65777
-1 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 16:56 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogVHVlc2RheSwgSnVseSAxNywgMjAxMiAxMDowOCBQTQ0KPiBUbzogQmh1c2hh
biBCaGFyYXQtUjY1Nzc3DQo+IENjOiBXb29kIFNjb3R0LUIwNzQyMTsgQWxleGFuZGVyIEdyYWY7
IGt2bS1wcGNAdmdlci5rZXJuZWwub3JnOw0KPiBrdm1Admdlci5rZXJuZWwub3JnOyBiaGFyYXRi
LnlhZGF2QGdtYWlsLmNvbTsgQmVuamFtaW4gSGVycmVuc2NobWlkdDsgS3VtYXIgR2FsYQ0KPiBT
dWJqZWN0OiBSZTogW1BBVENIIDIvMiB2Ml0gS1ZNOiBQUEM6IGJvb2tlOiBBZGQgd2F0Y2hkb2cg
ZW11bGF0aW9uDQo+IA0KPiBPbiAwNy8xNy8yMDEyIDA2OjMxIEFNLCBCaHVzaGFuIEJoYXJhdC1S
NjU3Nzcgd3JvdGU6DQo+ID4+Pj4gIGludCBrdm1fYXJjaF92Y3B1X3J1bm5hYmxlKHN0cnVjdCBr
dm1fdmNwdSAqdikgIHsNCj4gPj4+PiAtICAgIHJldHVybiAhKHYtPmFyY2guc2hhcmVkLT5tc3Ig
JiBNU1JfV0UpIHx8DQo+ID4+Pj4gLSAgICAgICAgICAgISEodi0+YXJjaC5wZW5kaW5nX2V4Y2Vw
dGlvbnMpIHx8DQo+ID4+Pj4gLSAgICAgICAgICAgdi0+cmVxdWVzdHM7DQo+ID4+Pj4gKyAgICBi
b29sIHJldCA9ICEodi0+YXJjaC5zaGFyZWQtPm1zciAmIE1TUl9XRSkgfHwNCj4gPj4+PiArICAg
ICAgICAgICAhISh2LT5hcmNoLnBlbmRpbmdfZXhjZXB0aW9ucykgfHwNCj4gPj4+PiArICAgICAg
ICAgICB2LT5yZXF1ZXN0czsNCj4gPj4+PiArDQo+ID4+Pj4gKyAgICByZXQgPSByZXQgfHwga3Zt
cHBjX2dldF90c3Jfd3JjKHYpOw0KPiA+Pj4NCj4gPj4+IFdoeSBkbyB5b3UgbmVlZCB0byBkZWNs
YXJlIHRoZSBjcHUgYXMgbm9uLXJ1bm5hYmxlIHdoZW4gYSB3YXRjaGRvZw0KPiA+Pj4gZXZlbnQg
b2NjdXJlZD8NCj4gPj4NCj4gPj4gSXQncyB0aGUgb3RoZXIgd2F5IGFyb3VuZCAtLSBpdCdzIGFs
d2F5cyBydW5uYWJsZSB3aGVuIGEgd2F0Y2hkb2cNCj4gPj4gZXhpdCBpcyBwZW5kaW5nLiAgSXQn
cyBsaWtlIGEgcGVuZGluZyBleGNlcHRpb24uDQo+ID4NCj4gPiBXaXRoIHRoZSBhYm92ZSBjaGVj
aywgQXJlIHdlIHRyeWluZyB0byBoYW5kbGUgdGhlIGNhc2Ugd2hlcmUgd2F0Y2hkb2cNCj4gPiBp
bnRlcnJ1cHQgYml0IGluIHBlbmRpbmdfZXhjZXB0aW9uIGlzIGNsZWFyZWQgYnkgZ3Vlc3QgYWZ0
ZXIgZmluYWwNCj4gPiBleHBpcnkgYnV0IGJlZm9yZSB0aGUgcWVtdSBleGl0Pw0KPiANCj4gTm8s
IHdlJ3JlIGp1c3QgdHJ5aW5nIHRvIHRlc3QgdGhlIGFjdHVhbCBjb25kaXRpb24gd2Ugd2FudCB0
byBleGl0IG9uLg0KPiBUaGUgd2F0Y2hkb2cgaW50ZXJydXB0IG1pZ2h0IGJlIG1hc2tlZCAoZWl0
aGVyIHdpdGggV0lFIG9yIENFKS4NCg0KSWYgdGhlIGludGVycnVwdCBpcyBtYXNrZWQgdGhlbiBz
dGlsbCB0aGUgcGVuZGluZ19leGNlcHRpb24gd2lsbCBiZSBzZXQuIEJ1dCBpbnRlcnJ1cHQgd2ls
bCBub3QgYmUgZGVsaXZlcmVkIHRvIGd1ZXN0IGFuZCBGaW5hbCBleHBpcnkgY2FuIHN0aWxsIGNh
dXNlIHJlc2V0IGV0YyAhISBSaWdodD8NCg0KPiANCj4gPiBBbmQgd2Ugd2FudCB0aGF0IGlmIFRT
Ui5XUlMgdXBkYXRlDQo+ID4gd2lucyB0aGUgcmFjZSB3aXRoIGNsZWFyaW5nIG9mIHdhdGNoZG9n
IGludGVycnVwdCBjb25kaXRpb24gZnJvbSBndWVzdA0KPiA+IHRoZW4gYW55d2F5cyBsZXQgUUVN
VSBleGl0IHdpdGggcmVhc29uIEtWTV9FWElUX1dEVD8NCj4gDQo+IFdoYXQgcmFjZT8gIElmIEVO
VyBhbmQgV0lTIGFyZSBib3RoIHNldCB3aGVuIHRoZSB3YXRjaGRvZyB0aW1lciBmaXJlcywgaXQn
cyBhDQo+IGZpbmFsIGV4cGlyYXRpb24uICBJdCdzIGlycmVsZXZhbnQgd2hhdCBoYXBwZW5zIHRv
IFdJUyBhZnRlciB0aGF0IHBvaW50LCBiZWZvcmUNCj4gZW5mb3JjZW1lbnQga2lja3MgaW4uDQoN
CldoYXQgSSB3YXMgdGhpbmtpbmcgb2Ygd2FzIGlmIHdlIGNhbiByZW1vdmUgdGhpcyBjaGVjayBp
biB2Y3B1X3J1bm5hYmxlKCkgYW5kIHVzZSB3YXRjaGRvZyBiaXQgaW4gcGVuZGluZ19leGNlcHRp
b24gdG8gcXVhbGlmeSB2Y3B1X3J1bm5hYmxlKCkNCg0KPiANCj4gPiBXaGF0IGlmIHdlIGRvIG5v
dCBhbGxvdyBndWVzdCBjbGVhciB3YXRjaGRvZyBpbnRlcnJ1cHQgY29uZGl0aW9uIGlmDQo+ID4g
ZmluYWwgZXhwaXJ5IGFscmVhZHkgaGFwcGVuZWQ/DQo+IA0KPiBXaGF0IHdvdWxkIHRoYXQgc29s
dmU/DQoNClJlbW92aW5nICAiIHJldCB8fCBrdm1wcGNfZ2V0X3Rzcl93cmModik7IiAgdGhpcyBm
cm9tIHZjcHVfcnVubmFibGUoKS4NCg0KVGhhbmtzDQotQmhhcmF0DQo+IA0KPiAtU2NvdHQNCg=
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 16:56 ` Bhushan Bharat-R65777
0 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 16:56 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, July 17, 2012 10:08 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
> >>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
> >>>> - return !(v->arch.shared->msr & MSR_WE) ||
> >>>> - !!(v->arch.pending_exceptions) ||
> >>>> - v->requests;
> >>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> >>>> + !!(v->arch.pending_exceptions) ||
> >>>> + v->requests;
> >>>> +
> >>>> + ret = ret || kvmppc_get_tsr_wrc(v);
> >>>
> >>> Why do you need to declare the cpu as non-runnable when a watchdog
> >>> event occured?
> >>
> >> It's the other way around -- it's always runnable when a watchdog
> >> exit is pending. It's like a pending exception.
> >
> > With the above check, Are we trying to handle the case where watchdog
> > interrupt bit in pending_exception is cleared by guest after final
> > expiry but before the qemu exit?
>
> No, we're just trying to test the actual condition we want to exit on.
> The watchdog interrupt might be masked (either with WIE or CE).
If the interrupt is masked then still the pending_exception will be set. But interrupt will not be delivered to guest and Final expiry can still cause reset etc !! Right?
>
> > And we want that if TSR.WRS update
> > wins the race with clearing of watchdog interrupt condition from guest
> > then anyways let QEMU exit with reason KVM_EXIT_WDT?
>
> What race? If ENW and WIS are both set when the watchdog timer fires, it's a
> final expiration. It's irrelevant what happens to WIS after that point, before
> enforcement kicks in.
What I was thinking of was if we can remove this check in vcpu_runnable() and use watchdog bit in pending_exception to qualify vcpu_runnable()
>
> > What if we do not allow guest clear watchdog interrupt condition if
> > final expiry already happened?
>
> What would that solve?
Removing " ret || kvmppc_get_tsr_wrc(v);" this from vcpu_runnable().
Thanks
-Bharat
>
> -Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 16:56 ` Bhushan Bharat-R65777
@ 2012-07-17 17:00 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 17:00 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 11:56 AM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Tuesday, July 17, 2012 10:08 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>>> - !!(v->arch.pending_exceptions) ||
>>>>>> - v->requests;
>>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>>> + !!(v->arch.pending_exceptions) ||
>>>>>> + v->requests;
>>>>>> +
>>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>>>
>>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>>> event occured?
>>>>
>>>> It's the other way around -- it's always runnable when a watchdog
>>>> exit is pending. It's like a pending exception.
>>>
>>> With the above check, Are we trying to handle the case where watchdog
>>> interrupt bit in pending_exception is cleared by guest after final
>>> expiry but before the qemu exit?
>>
>> No, we're just trying to test the actual condition we want to exit on.
>> The watchdog interrupt might be masked (either with WIE or CE).
>
> If the interrupt is masked then still the pending_exception will be set.
Not if it's masked by WIE -- and even when masked by CE, it's a bug that
we currently consider the vcpu runnable. We shouldn't depend on that bug.
> But interrupt will not be delivered to guest and Final expiry can still cause reset etc !! Right?
Not sure what your point is -- yes, if the interrupt is masked it will
not be delivered to the guest, and the enforcement mechanism of the
watchdog is still active.
>>> And we want that if TSR.WRS update
>>> wins the race with clearing of watchdog interrupt condition from guest
>>> then anyways let QEMU exit with reason KVM_EXIT_WDT?
>>
>> What race? If ENW and WIS are both set when the watchdog timer fires, it's a
>> final expiration. It's irrelevant what happens to WIS after that point, before
>> enforcement kicks in.
>
> What I was thinking of was if we can remove this check in vcpu_runnable() and use watchdog bit in pending_exception to qualify vcpu_runnable()
No, we want a new bit.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 17:00 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 17:00 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 11:56 AM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Tuesday, July 17, 2012 10:08 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>>> - !!(v->arch.pending_exceptions) ||
>>>>>> - v->requests;
>>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>>> + !!(v->arch.pending_exceptions) ||
>>>>>> + v->requests;
>>>>>> +
>>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>>>
>>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>>> event occured?
>>>>
>>>> It's the other way around -- it's always runnable when a watchdog
>>>> exit is pending. It's like a pending exception.
>>>
>>> With the above check, Are we trying to handle the case where watchdog
>>> interrupt bit in pending_exception is cleared by guest after final
>>> expiry but before the qemu exit?
>>
>> No, we're just trying to test the actual condition we want to exit on.
>> The watchdog interrupt might be masked (either with WIE or CE).
>
> If the interrupt is masked then still the pending_exception will be set.
Not if it's masked by WIE -- and even when masked by CE, it's a bug that
we currently consider the vcpu runnable. We shouldn't depend on that bug.
> But interrupt will not be delivered to guest and Final expiry can still cause reset etc !! Right?
Not sure what your point is -- yes, if the interrupt is masked it will
not be delivered to the guest, and the enforcement mechanism of the
watchdog is still active.
>>> And we want that if TSR.WRS update
>>> wins the race with clearing of watchdog interrupt condition from guest
>>> then anyways let QEMU exit with reason KVM_EXIT_WDT?
>>
>> What race? If ENW and WIS are both set when the watchdog timer fires, it's a
>> final expiration. It's irrelevant what happens to WIS after that point, before
>> enforcement kicks in.
>
> What I was thinking of was if we can remove this check in vcpu_runnable() and use watchdog bit in pending_exception to qualify vcpu_runnable()
No, we want a new bit.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 17:00 ` Scott Wood
@ 2012-07-17 17:10 ` Bhushan Bharat-R65777
-1 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 17:10 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogVHVlc2RheSwgSnVseSAxNywgMjAxMiAxMDozMSBQTQ0KPiBUbzogQmh1c2hh
biBCaGFyYXQtUjY1Nzc3DQo+IENjOiBXb29kIFNjb3R0LUIwNzQyMTsgQWxleGFuZGVyIEdyYWY7
IGt2bS1wcGNAdmdlci5rZXJuZWwub3JnOw0KPiBrdm1Admdlci5rZXJuZWwub3JnOyBiaGFyYXRi
LnlhZGF2QGdtYWlsLmNvbTsgQmVuamFtaW4gSGVycmVuc2NobWlkdDsgS3VtYXIgR2FsYQ0KPiBT
dWJqZWN0OiBSZTogW1BBVENIIDIvMiB2Ml0gS1ZNOiBQUEM6IGJvb2tlOiBBZGQgd2F0Y2hkb2cg
ZW11bGF0aW9uDQo+IA0KPiBPbiAwNy8xNy8yMDEyIDExOjU2IEFNLCBCaHVzaGFuIEJoYXJhdC1S
NjU3Nzcgd3JvdGU6DQo+ID4NCj4gPg0KPiA+PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0K
PiA+PiBGcm9tOiBXb29kIFNjb3R0LUIwNzQyMQ0KPiA+PiBTZW50OiBUdWVzZGF5LCBKdWx5IDE3
LCAyMDEyIDEwOjA4IFBNDQo+ID4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPj4gQ2M6
IFdvb2QgU2NvdHQtQjA3NDIxOyBBbGV4YW5kZXIgR3JhZjsga3ZtLXBwY0B2Z2VyLmtlcm5lbC5v
cmc7DQo+ID4+IGt2bUB2Z2VyLmtlcm5lbC5vcmc7IGJoYXJhdGIueWFkYXZAZ21haWwuY29tOyBC
ZW5qYW1pbiBIZXJyZW5zY2htaWR0Ow0KPiA+PiBLdW1hciBHYWxhDQo+ID4+IFN1YmplY3Q6IFJl
OiBbUEFUQ0ggMi8yIHYyXSBLVk06IFBQQzogYm9va2U6IEFkZCB3YXRjaGRvZyBlbXVsYXRpb24N
Cj4gPj4NCj4gPj4gT24gMDcvMTcvMjAxMiAwNjozMSBBTSwgQmh1c2hhbiBCaGFyYXQtUjY1Nzc3
IHdyb3RlOg0KPiA+Pj4+Pj4gIGludCBrdm1fYXJjaF92Y3B1X3J1bm5hYmxlKHN0cnVjdCBrdm1f
dmNwdSAqdikgIHsNCj4gPj4+Pj4+IC0gICAgcmV0dXJuICEodi0+YXJjaC5zaGFyZWQtPm1zciAm
IE1TUl9XRSkgfHwNCj4gPj4+Pj4+IC0gICAgICAgICAgICEhKHYtPmFyY2gucGVuZGluZ19leGNl
cHRpb25zKSB8fA0KPiA+Pj4+Pj4gLSAgICAgICAgICAgdi0+cmVxdWVzdHM7DQo+ID4+Pj4+PiAr
ICAgIGJvb2wgcmV0ID0gISh2LT5hcmNoLnNoYXJlZC0+bXNyICYgTVNSX1dFKSB8fA0KPiA+Pj4+
Pj4gKyAgICAgICAgICAgISEodi0+YXJjaC5wZW5kaW5nX2V4Y2VwdGlvbnMpIHx8DQo+ID4+Pj4+
PiArICAgICAgICAgICB2LT5yZXF1ZXN0czsNCj4gPj4+Pj4+ICsNCj4gPj4+Pj4+ICsgICAgcmV0
ID0gcmV0IHx8IGt2bXBwY19nZXRfdHNyX3dyYyh2KTsNCj4gPj4+Pj4NCj4gPj4+Pj4gV2h5IGRv
IHlvdSBuZWVkIHRvIGRlY2xhcmUgdGhlIGNwdSBhcyBub24tcnVubmFibGUgd2hlbiBhIHdhdGNo
ZG9nDQo+ID4+Pj4+IGV2ZW50IG9jY3VyZWQ/DQo+ID4+Pj4NCj4gPj4+PiBJdCdzIHRoZSBvdGhl
ciB3YXkgYXJvdW5kIC0tIGl0J3MgYWx3YXlzIHJ1bm5hYmxlIHdoZW4gYSB3YXRjaGRvZw0KPiA+
Pj4+IGV4aXQgaXMgcGVuZGluZy4gIEl0J3MgbGlrZSBhIHBlbmRpbmcgZXhjZXB0aW9uLg0KPiA+
Pj4NCj4gPj4+IFdpdGggdGhlIGFib3ZlIGNoZWNrLCBBcmUgd2UgdHJ5aW5nIHRvIGhhbmRsZSB0
aGUgY2FzZSB3aGVyZQ0KPiA+Pj4gd2F0Y2hkb2cgaW50ZXJydXB0IGJpdCBpbiBwZW5kaW5nX2V4
Y2VwdGlvbiBpcyBjbGVhcmVkIGJ5IGd1ZXN0DQo+ID4+PiBhZnRlciBmaW5hbCBleHBpcnkgYnV0
IGJlZm9yZSB0aGUgcWVtdSBleGl0Pw0KPiA+Pg0KPiA+PiBObywgd2UncmUganVzdCB0cnlpbmcg
dG8gdGVzdCB0aGUgYWN0dWFsIGNvbmRpdGlvbiB3ZSB3YW50IHRvIGV4aXQgb24uDQo+ID4+IFRo
ZSB3YXRjaGRvZyBpbnRlcnJ1cHQgbWlnaHQgYmUgbWFza2VkIChlaXRoZXIgd2l0aCBXSUUgb3Ig
Q0UpLg0KPiA+DQo+ID4gSWYgdGhlIGludGVycnVwdCBpcyBtYXNrZWQgdGhlbiBzdGlsbCB0aGUg
cGVuZGluZ19leGNlcHRpb24gd2lsbCBiZSBzZXQuDQo+IA0KPiBOb3QgaWYgaXQncyBtYXNrZWQg
YnkgV0lFIC0tIGFuZCBldmVuIHdoZW4gbWFza2VkIGJ5IENFLCBpdCdzIGEgYnVnIHRoYXQgd2UN
Cj4gY3VycmVudGx5IGNvbnNpZGVyIHRoZSB2Y3B1IHJ1bm5hYmxlLiAgV2Ugc2hvdWxkbid0IGRl
cGVuZCBvbiB0aGF0IGJ1Zy4NCg0KU2NvdHQgY2FuIHlvdSBwbGVhc2UgZGVzY3JpYmUgd2hhdCBp
cyBidWc/IFdoYXQgSSByZW1lbWJlciBpcyB0aGF0IGlmIHZjcHUgaXMgbm90IHJ1bi1hYmxlIHRo
ZW4gd2UgaGFsdCB2Y3B1IGFuZCBjYW5ub3QgY2F1c2UgcWVtdSBleGl0IGFsc28uDQoNClRoYW5r
cw0KLUJoYXJhdA0KDQo
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 17:10 ` Bhushan Bharat-R65777
0 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 17:10 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, July 17, 2012 10:31 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>
> On 07/17/2012 11:56 AM, Bhushan Bharat-R65777 wrote:
> >
> >
> >> -----Original Message-----
> >> From: Wood Scott-B07421
> >> Sent: Tuesday, July 17, 2012 10:08 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
> >> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt;
> >> Kumar Gala
> >> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
> >>
> >> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
> >>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
> >>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
> >>>>>> - !!(v->arch.pending_exceptions) ||
> >>>>>> - v->requests;
> >>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
> >>>>>> + !!(v->arch.pending_exceptions) ||
> >>>>>> + v->requests;
> >>>>>> +
> >>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
> >>>>>
> >>>>> Why do you need to declare the cpu as non-runnable when a watchdog
> >>>>> event occured?
> >>>>
> >>>> It's the other way around -- it's always runnable when a watchdog
> >>>> exit is pending. It's like a pending exception.
> >>>
> >>> With the above check, Are we trying to handle the case where
> >>> watchdog interrupt bit in pending_exception is cleared by guest
> >>> after final expiry but before the qemu exit?
> >>
> >> No, we're just trying to test the actual condition we want to exit on.
> >> The watchdog interrupt might be masked (either with WIE or CE).
> >
> > If the interrupt is masked then still the pending_exception will be set.
>
> Not if it's masked by WIE -- and even when masked by CE, it's a bug that we
> currently consider the vcpu runnable. We shouldn't depend on that bug.
Scott can you please describe what is bug? What I remember is that if vcpu is not run-able then we halt vcpu and cannot cause qemu exit also.
Thanks
-Bharat
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 17:10 ` Bhushan Bharat-R65777
@ 2012-07-17 17:25 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 17:25 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 12:10 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Tuesday, July 17, 2012 10:31 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 11:56 AM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Wood Scott-B07421
>>>> Sent: Tuesday, July 17, 2012 10:08 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>>>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt;
>>>> Kumar Gala
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>>>>> - !!(v->arch.pending_exceptions) ||
>>>>>>>> - v->requests;
>>>>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>>>>> + !!(v->arch.pending_exceptions) ||
>>>>>>>> + v->requests;
>>>>>>>> +
>>>>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>>>>>
>>>>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>>>>> event occured?
>>>>>>
>>>>>> It's the other way around -- it's always runnable when a watchdog
>>>>>> exit is pending. It's like a pending exception.
>>>>>
>>>>> With the above check, Are we trying to handle the case where
>>>>> watchdog interrupt bit in pending_exception is cleared by guest
>>>>> after final expiry but before the qemu exit?
>>>>
>>>> No, we're just trying to test the actual condition we want to exit on.
>>>> The watchdog interrupt might be masked (either with WIE or CE).
>>>
>>> If the interrupt is masked then still the pending_exception will be set.
>>
>> Not if it's masked by WIE -- and even when masked by CE, it's a bug that we
>> currently consider the vcpu runnable. We shouldn't depend on that bug.
>
> Scott can you please describe what is bug?
If an interrupt is masked by EE, CE, ME, etc. it is still in
pending_exceptions, so runnable still returns true, and we can't go idle.
> What I remember is that if
> vcpu is not run-able then we halt vcpu and cannot cause qemu exit
> also.
I agree that we want to be considered runnable if we have a final
expiration with an action. What I disagree with is using the same
pending_exceptions bit as is used for the ordinary watchdog interrupt.
They're not the same thing.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 17:25 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 17:25 UTC (permalink / raw)
To: Bhushan Bharat-R65777
Cc: Wood Scott-B07421, Alexander Graf, kvm-ppc@vger.kernel.org,
kvm@vger.kernel.org, bharatb.yadav@gmail.com,
Benjamin Herrenschmidt, Kumar Gala
On 07/17/2012 12:10 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Tuesday, July 17, 2012 10:31 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt; Kumar Gala
>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>
>> On 07/17/2012 11:56 AM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Wood Scott-B07421
>>>> Sent: Tuesday, July 17, 2012 10:08 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; Alexander Graf; kvm-ppc@vger.kernel.org;
>>>> kvm@vger.kernel.org; bharatb.yadav@gmail.com; Benjamin Herrenschmidt;
>>>> Kumar Gala
>>>> Subject: Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
>>>>
>>>> On 07/17/2012 06:31 AM, Bhushan Bharat-R65777 wrote:
>>>>>>>> int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) {
>>>>>>>> - return !(v->arch.shared->msr & MSR_WE) ||
>>>>>>>> - !!(v->arch.pending_exceptions) ||
>>>>>>>> - v->requests;
>>>>>>>> + bool ret = !(v->arch.shared->msr & MSR_WE) ||
>>>>>>>> + !!(v->arch.pending_exceptions) ||
>>>>>>>> + v->requests;
>>>>>>>> +
>>>>>>>> + ret = ret || kvmppc_get_tsr_wrc(v);
>>>>>>>
>>>>>>> Why do you need to declare the cpu as non-runnable when a watchdog
>>>>>>> event occured?
>>>>>>
>>>>>> It's the other way around -- it's always runnable when a watchdog
>>>>>> exit is pending. It's like a pending exception.
>>>>>
>>>>> With the above check, Are we trying to handle the case where
>>>>> watchdog interrupt bit in pending_exception is cleared by guest
>>>>> after final expiry but before the qemu exit?
>>>>
>>>> No, we're just trying to test the actual condition we want to exit on.
>>>> The watchdog interrupt might be masked (either with WIE or CE).
>>>
>>> If the interrupt is masked then still the pending_exception will be set.
>>
>> Not if it's masked by WIE -- and even when masked by CE, it's a bug that we
>> currently consider the vcpu runnable. We shouldn't depend on that bug.
>
> Scott can you please describe what is bug?
If an interrupt is masked by EE, CE, ME, etc. it is still in
pending_exceptions, so runnable still returns true, and we can't go idle.
> What I remember is that if
> vcpu is not run-able then we halt vcpu and cannot cause qemu exit
> also.
I agree that we want to be considered runnable if we have a final
expiration with an action. What I disagree with is using the same
pending_exceptions bit as is used for the ordinary watchdog interrupt.
They're not the same thing.
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 17:25 ` Scott Wood
@ 2012-07-17 17:29 ` Bhushan Bharat-R65777
-1 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 17:29 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
PiA+Pg0KPiA+PiBOb3QgaWYgaXQncyBtYXNrZWQgYnkgV0lFIC0tIGFuZCBldmVuIHdoZW4gbWFz
a2VkIGJ5IENFLCBpdCdzIGEgYnVnDQo+ID4+IHRoYXQgd2UgY3VycmVudGx5IGNvbnNpZGVyIHRo
ZSB2Y3B1IHJ1bm5hYmxlLiAgV2Ugc2hvdWxkbid0IGRlcGVuZCBvbiB0aGF0DQo+IGJ1Zy4NCj4g
Pg0KPiA+IFNjb3R0IGNhbiB5b3UgcGxlYXNlIGRlc2NyaWJlIHdoYXQgaXMgYnVnPw0KPiANCj4g
SWYgYW4gaW50ZXJydXB0IGlzIG1hc2tlZCBieSBFRSwgQ0UsIE1FLCBldGMuIGl0IGlzIHN0aWxs
IGluIHBlbmRpbmdfZXhjZXB0aW9ucywNCj4gc28gcnVubmFibGUgc3RpbGwgcmV0dXJucyB0cnVl
LCBhbmQgd2UgY2FuJ3QgZ28gaWRsZS4NCg0KSG1tLCBvay4NCg0KPiANCj4gPiBXaGF0IEkgcmVt
ZW1iZXIgaXMgdGhhdCBpZg0KPiA+IHZjcHUgaXMgbm90IHJ1bi1hYmxlIHRoZW4gd2UgaGFsdCB2
Y3B1IGFuZCBjYW5ub3QgY2F1c2UgcWVtdSBleGl0DQo+ID4gYWxzby4NCj4gDQo+IEkgYWdyZWUg
dGhhdCB3ZSB3YW50IHRvIGJlIGNvbnNpZGVyZWQgcnVubmFibGUgaWYgd2UgaGF2ZSBhIGZpbmFs
IGV4cGlyYXRpb24NCj4gd2l0aCBhbiBhY3Rpb24uICBXaGF0IEkgZGlzYWdyZWUgd2l0aCBpcyB1
c2luZyB0aGUgc2FtZSBwZW5kaW5nX2V4Y2VwdGlvbnMgYml0DQo+IGFzIGlzIHVzZWQgZm9yIHRo
ZSBvcmRpbmFyeSB3YXRjaGRvZyBpbnRlcnJ1cHQuDQo+IFRoZXkncmUgbm90IHRoZSBzYW1lIHRo
aW5nLg0KDQpOb3cgSSBhZ3JlZSB3aXRoIHlvdXIgZGlzYWdyZWVtZW50IDopDQoNClRoYW5rcw0K
LUJoYXJhdA0KDQo
^ permalink raw reply [flat|nested] 38+ messages in thread
* RE: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 17:29 ` Bhushan Bharat-R65777
0 siblings, 0 replies; 38+ messages in thread
From: Bhushan Bharat-R65777 @ 2012-07-17 17:29 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Alexander Graf, kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
bharatb.yadav@gmail.com, Benjamin Herrenschmidt, Kumar Gala
> >>
> >> Not if it's masked by WIE -- and even when masked by CE, it's a bug
> >> that we currently consider the vcpu runnable. We shouldn't depend on that
> bug.
> >
> > Scott can you please describe what is bug?
>
> If an interrupt is masked by EE, CE, ME, etc. it is still in pending_exceptions,
> so runnable still returns true, and we can't go idle.
Hmm, ok.
>
> > What I remember is that if
> > vcpu is not run-able then we halt vcpu and cannot cause qemu exit
> > also.
>
> I agree that we want to be considered runnable if we have a final expiration
> with an action. What I disagree with is using the same pending_exceptions bit
> as is used for the ordinary watchdog interrupt.
> They're not the same thing.
Now I agree with your disagreement :)
Thanks
-Bharat
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
2012-07-17 16:51 ` Alexander Graf
@ 2012-07-17 18:00 ` Scott Wood
-1 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 18:00 UTC (permalink / raw)
To: Alexander Graf
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 11:51 AM, Alexander Graf wrote:
> On 07/17/2012 06:27 PM, Scott Wood wrote:
>> Determining whether we're on final expiration is based only on the
>> previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
>> to determine whether we need to actually exit to QEMU (as opposed to a
>> final expiration with no action, which still should result in the timer
>> being stopped), but I don't see any problematic races there as long as
>> we don't try to update TCR from that context.
>
> Yeah, what I was trying to say is that I would like to keep the TSR
> state unmodified for the final expiry case. If user space decides to not
> act upon it, it should be able to call into VCPU_RUN and have the same
> behavior as if the watchdog never expired.
I agree, it would be racy for QEMU to have to clear TSR if it's not
doing a full reset. And that fact is making me wonder if I should have
listened to you and kept TSR owned by the vcpu thread. :-)
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation
@ 2012-07-17 18:00 ` Scott Wood
0 siblings, 0 replies; 38+ messages in thread
From: Scott Wood @ 2012-07-17 18:00 UTC (permalink / raw)
To: Alexander Graf
Cc: Bhushan Bharat-R65777, Wood Scott-B07421,
<kvm-ppc@vger.kernel.org>, <kvm@vger.kernel.org>,
<bharatb.yadav@gmail.com>, Benjamin Herrenschmidt,
Kumar Gala, Avi Kivity
On 07/17/2012 11:51 AM, Alexander Graf wrote:
> On 07/17/2012 06:27 PM, Scott Wood wrote:
>> Determining whether we're on final expiration is based only on the
>> previous value of TSR[ENW,WIS] when the timer expires. We look at TCR
>> to determine whether we need to actually exit to QEMU (as opposed to a
>> final expiration with no action, which still should result in the timer
>> being stopped), but I don't see any problematic races there as long as
>> we don't try to update TCR from that context.
>
> Yeah, what I was trying to say is that I would like to keep the TSR
> state unmodified for the final expiry case. If user space decides to not
> act upon it, it should be able to call into VCPU_RUN and have the same
> behavior as if the watchdog never expired.
I agree, it would be racy for QEMU to have to clear TSR if it's not
doing a full reset. And that fact is making me wonder if I should have
listened to you and kept TSR owned by the vcpu thread. :-)
-Scott
^ permalink raw reply [flat|nested] 38+ messages in thread
end of thread, other threads:[~2012-07-17 18:00 UTC | newest]
Thread overview: 38+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-09 10:34 [PATCH 2/2 v2] KVM: PPC: booke: Add watchdog emulation Bharat Bhushan
2012-07-09 10:46 ` Bharat Bhushan
2012-07-16 17:18 ` Alexander Graf
2012-07-16 17:18 ` Alexander Graf
2012-07-17 1:02 ` Scott Wood
2012-07-17 1:02 ` Scott Wood
2012-07-17 7:20 ` Alexander Graf
2012-07-17 7:20 ` Alexander Graf
2012-07-17 9:57 ` Bhushan Bharat-R65777
2012-07-17 12:51 ` Alexander Graf
2012-07-17 12:51 ` Alexander Graf
2012-07-17 13:15 ` Bhushan Bharat-R65777
2012-07-17 14:01 ` Alexander Graf
2012-07-17 14:01 ` Alexander Graf
2012-07-17 14:13 ` Bhushan Bharat-R65777
2012-07-17 14:35 ` Alexander Graf
2012-07-17 14:35 ` Alexander Graf
2012-07-17 16:10 ` Bhushan Bharat-R65777
2012-07-17 16:27 ` Scott Wood
2012-07-17 16:27 ` Scott Wood
2012-07-17 16:51 ` Alexander Graf
2012-07-17 16:51 ` Alexander Graf
2012-07-17 18:00 ` Scott Wood
2012-07-17 18:00 ` Scott Wood
2012-07-17 11:31 ` Bhushan Bharat-R65777
2012-07-17 11:31 ` Bhushan Bharat-R65777
2012-07-17 16:37 ` Scott Wood
2012-07-17 16:37 ` Scott Wood
2012-07-17 16:56 ` Bhushan Bharat-R65777
2012-07-17 16:56 ` Bhushan Bharat-R65777
2012-07-17 17:00 ` Scott Wood
2012-07-17 17:00 ` Scott Wood
2012-07-17 17:10 ` Bhushan Bharat-R65777
2012-07-17 17:10 ` Bhushan Bharat-R65777
2012-07-17 17:25 ` Scott Wood
2012-07-17 17:25 ` Scott Wood
2012-07-17 17:29 ` Bhushan Bharat-R65777
2012-07-17 17:29 ` Bhushan Bharat-R65777
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.