public inbox for kvm-ppc@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
@ 2012-06-01 10:20 Paul Mackerras
  2012-06-06 12:28 ` Avi Kivity
  2018-01-29 23:51 ` [PATCH] KVM: PPC: Book3S HV: Drop locks before reading guest memory Paul Mackerras
  0 siblings, 2 replies; 6+ messages in thread
From: Paul Mackerras @ 2012-06-01 10:20 UTC (permalink / raw)
  To: Alexander Graf, kvm-ppc; +Cc: kvm

At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa()
with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock.
This is not good, since kvmppc_pin_guest_page() calls down_read() and
get_user_pages_fast(), both of which can sleep.  This bug was introduced
in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area
registration more robust").

This arranges to drop those spinlocks before calling
kvmppc_pin_guest_page() and re-take them afterwards.  Dropping the
vcore lock in kvmppc_run_core() means we have to set the vcore_state
field to VCORE_RUNNING before we drop the lock, so that other vcpus
won't try to run this vcore.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
Since this bug is in Linus' tree, and it can cause a scheduling while
atomic bug message, can we send this to Linus for inclusion in 3.5,
after review of course?

 arch/powerpc/kvm/book3s_hv.c |   96 +++++++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d084e41..83e929e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -268,24 +268,45 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 	return err;
 }
 
-static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 {
+	struct kvm *kvm = vcpu->kvm;
 	void *va;
 	unsigned long nb;
+	unsigned long gpa;
 
-	vpap->update_pending = 0;
-	va = NULL;
-	if (vpap->next_gpa) {
-		va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
-		if (nb < vpap->len) {
-			/*
-			 * If it's now too short, it must be that userspace
-			 * has changed the mappings underlying guest memory,
-			 * so unregister the region.
-			 */
+	/*
+	 * We need to pin the page pointed to by vpap->next_gpa,
+	 * but we can't call kvmppc_pin_guest_page under the lock
+	 * as it does get_user_pages() and down_read().  So we
+	 * have to drop the lock, pin the page, then get the lock
+	 * again and check that a new area didn't get registered
+	 * in the meantime.
+	 */
+	for (;;) {
+		gpa = vpap->next_gpa;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		va = NULL;
+		nb = 0;
+		if (gpa)
+			va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		if (gpa = vpap->next_gpa)
+			break;
+		/* sigh... unpin that one and try again */
+		if (va)
 			kvmppc_unpin_guest_page(kvm, va);
-			va = NULL;
-		}
+	}
+
+	vpap->update_pending = 0;
+	if (va && nb < vpap->len) {
+		/*
+		 * If it's now too short, it must be that userspace
+		 * has changed the mappings underlying guest memory,
+		 * so unregister the region.
+		 */
+		kvmppc_unpin_guest_page(kvm, va);
+		va = NULL;
 	}
 	if (vpap->pinned_addr)
 		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
@@ -296,20 +317,18 @@ static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
 
 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = vcpu->kvm;
-
 	spin_lock(&vcpu->arch.vpa_update_lock);
 	if (vcpu->arch.vpa.update_pending) {
-		kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
+		kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
 		init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
 	}
 	if (vcpu->arch.dtl.update_pending) {
-		kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
+		kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
 		vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
 		vcpu->arch.dtl_index = 0;
 	}
 	if (vcpu->arch.slb_shadow.update_pending)
-		kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
+		kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 }
 
@@ -800,12 +819,39 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	struct kvm_vcpu *vcpu, *vcpu0, *vnext;
 	long ret;
 	u64 now;
-	int ptid, i;
+	int ptid, i, need_vpa_update;
 
 	/* don't start if any threads have a signal pending */
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+	need_vpa_update = 0;
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		if (signal_pending(vcpu->arch.run_task))
 			return 0;
+		need_vpa_update |= vcpu->arch.vpa.update_pending |
+			vcpu->arch.slb_shadow.update_pending |
+			vcpu->arch.dtl.update_pending;
+	}
+
+	/*
+	 * Initialize *vc, in particular vc->vcore_state, so we can
+	 * drop the vcore lock if necessary.
+	 */
+	vc->n_woken = 0;
+	vc->nap_count = 0;
+	vc->entry_exit_count = 0;
+	vc->vcore_state = VCORE_RUNNING;
+	vc->in_guest = 0;
+	vc->napping_threads = 0;
+
+	/*
+	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
+	 * which can't be called with any spinlocks held.
+	 */
+	if (need_vpa_update) {
+		spin_unlock(&vc->lock);
+		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+			kvmppc_update_vpas(vcpu);
+		spin_lock(&vc->lock);
+	}
 
 	/*
 	 * Make sure we are running on thread 0, and that
@@ -838,20 +884,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		if (vcpu->arch.ceded)
 			vcpu->arch.ptid = ptid++;
 
-	vc->n_woken = 0;
-	vc->nap_count = 0;
-	vc->entry_exit_count = 0;
-	vc->vcore_state = VCORE_RUNNING;
 	vc->stolen_tb += mftb() - vc->preempt_tb;
-	vc->in_guest = 0;
 	vc->pcpu = smp_processor_id();
-	vc->napping_threads = 0;
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
-		if (vcpu->arch.vpa.update_pending ||
-		    vcpu->arch.slb_shadow.update_pending ||
-		    vcpu->arch.dtl.update_pending)
-			kvmppc_update_vpas(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
 	}
 	/* Grab any remaining hw threads so they can't go into the kernel */
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
  2012-06-01 10:20 [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page Paul Mackerras
@ 2012-06-06 12:28 ` Avi Kivity
  2012-06-06 15:52   ` Alexander Graf
  2018-01-29 23:51 ` [PATCH] KVM: PPC: Book3S HV: Drop locks before reading guest memory Paul Mackerras
  1 sibling, 1 reply; 6+ messages in thread
From: Avi Kivity @ 2012-06-06 12:28 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: Alexander Graf, kvm-ppc, kvm

On 06/01/2012 01:20 PM, Paul Mackerras wrote:
> At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa()
> with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock.
> This is not good, since kvmppc_pin_guest_page() calls down_read() and
> get_user_pages_fast(), both of which can sleep.  This bug was introduced
> in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area
> registration more robust").
> 
> This arranges to drop those spinlocks before calling
> kvmppc_pin_guest_page() and re-take them afterwards.  Dropping the
> vcore lock in kvmppc_run_core() means we have to set the vcore_state
> field to VCORE_RUNNING before we drop the lock, so that other vcpus
> won't try to run this vcore.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> Since this bug is in Linus' tree, and it can cause a scheduling while
> atomic bug message, can we send this to Linus for inclusion in 3.5,
> after review of course?
> 

Sure, Alex?


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
  2012-06-06 12:28 ` Avi Kivity
@ 2012-06-06 15:52   ` Alexander Graf
  2012-06-19 10:50     ` Alexander Graf
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Graf @ 2012-06-06 15:52 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Paul Mackerras, kvm-ppc, kvm

On 06/06/2012 02:28 PM, Avi Kivity wrote:
> On 06/01/2012 01:20 PM, Paul Mackerras wrote:
>> At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa()
>> with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock.
>> This is not good, since kvmppc_pin_guest_page() calls down_read() and
>> get_user_pages_fast(), both of which can sleep.  This bug was introduced
>> in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area
>> registration more robust").
>>
>> This arranges to drop those spinlocks before calling
>> kvmppc_pin_guest_page() and re-take them afterwards.  Dropping the
>> vcore lock in kvmppc_run_core() means we have to set the vcore_state
>> field to VCORE_RUNNING before we drop the lock, so that other vcpus
>> won't try to run this vcore.
>>
>> Signed-off-by: Paul Mackerras<paulus@samba.org>
>> ---
>> Since this bug is in Linus' tree, and it can cause a scheduling while
>> atomic bug message, can we send this to Linus for inclusion in 3.5,
>> after review of course?
>>
> Sure, Alex?

Yup, reviewed and tested. Looks ready to go into the tree to me. Can you 
pull it in the short way please?


Alex

Acked-by: Alexander Graf <agraf@suse.de>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
  2012-06-06 15:52   ` Alexander Graf
@ 2012-06-19 10:50     ` Alexander Graf
  2012-06-19 12:04       ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Graf @ 2012-06-19 10:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Paul Mackerras, kvm-ppc, kvm


On 06.06.2012, at 17:52, Alexander Graf wrote:

> On 06/06/2012 02:28 PM, Avi Kivity wrote:
>> On 06/01/2012 01:20 PM, Paul Mackerras wrote:
>>> At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa()
>>> with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock.
>>> This is not good, since kvmppc_pin_guest_page() calls down_read() and
>>> get_user_pages_fast(), both of which can sleep.  This bug was introduced
>>> in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area
>>> registration more robust").
>>> 
>>> This arranges to drop those spinlocks before calling
>>> kvmppc_pin_guest_page() and re-take them afterwards.  Dropping the
>>> vcore lock in kvmppc_run_core() means we have to set the vcore_state
>>> field to VCORE_RUNNING before we drop the lock, so that other vcpus
>>> won't try to run this vcore.
>>> 
>>> Signed-off-by: Paul Mackerras<paulus@samba.org>
>>> ---
>>> Since this bug is in Linus' tree, and it can cause a scheduling while
>>> atomic bug message, can we send this to Linus for inclusion in 3.5,
>>> after review of course?
>>> 
>> Sure, Alex?
> 
> Yup, reviewed and tested. Looks ready to go into the tree to me. Can you pull it in the short way please?

Avi?


Alex


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
  2012-06-19 10:50     ` Alexander Graf
@ 2012-06-19 12:04       ` Avi Kivity
  0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2012-06-19 12:04 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Paul Mackerras, kvm-ppc, kvm

On 06/19/2012 01:50 PM, Alexander Graf wrote:
> 
> On 06.06.2012, at 17:52, Alexander Graf wrote:
> 
>> On 06/06/2012 02:28 PM, Avi Kivity wrote:
>>> On 06/01/2012 01:20 PM, Paul Mackerras wrote:
>>>> At the moment we call kvmppc_pin_guest_page() in kvmppc_update_vpa()
>>>> with two spinlocks held: the vcore lock and the vcpu->vpa_update_lock.
>>>> This is not good, since kvmppc_pin_guest_page() calls down_read() and
>>>> get_user_pages_fast(), both of which can sleep.  This bug was introduced
>>>> in 2e25aa5f ("KVM: PPC: Book3S HV: Make virtual processor area
>>>> registration more robust").
>>>> 
>>>> This arranges to drop those spinlocks before calling
>>>> kvmppc_pin_guest_page() and re-take them afterwards.  Dropping the
>>>> vcore lock in kvmppc_run_core() means we have to set the vcore_state
>>>> field to VCORE_RUNNING before we drop the lock, so that other vcpus
>>>> won't try to run this vcore.
>>>> 
>>>> Signed-off-by: Paul Mackerras<paulus@samba.org>
>>>> ---
>>>> Since this bug is in Linus' tree, and it can cause a scheduling while
>>>> atomic bug message, can we send this to Linus for inclusion in 3.5,
>>>> after review of course?
>>>> 
>>> Sure, Alex?
>> 
>> Yup, reviewed and tested. Looks ready to go into the tree to me. Can you pull it in the short way please?
> 
> Avi?

Sorry!  Applied to master, will push upstream shortly.

-- 
error compiling committee.c: too many arguments to function



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] KVM: PPC: Book3S HV: Drop locks before reading guest memory
  2012-06-01 10:20 [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page Paul Mackerras
  2012-06-06 12:28 ` Avi Kivity
@ 2018-01-29 23:51 ` Paul Mackerras
  1 sibling, 0 replies; 6+ messages in thread
From: Paul Mackerras @ 2018-01-29 23:51 UTC (permalink / raw)
  To: kvm, kvm-ppc

Running with CONFIG_DEBUG_ATOMIC_SLEEP reveals that HV KVM tries to
read guest memory, in order to emulate guest instructions, while
preempt is disabled and a vcore lock is held.  This occurs in
kvmppc_handle_exit_hv(), called from post_guest_process(), when
emulating guest doorbell instructions on POWER9 systems, and also
when checking whether we have hit a hypervisor breakpoint.
Reading guest memory can cause a page fault and thus cause the
task to sleep, so we need to avoid reading guest memory while
holding a spinlock or when preempt is disabled.

To fix this, we move the preempt_enable() in kvmppc_run_core() to
before the loop that calls post_guest_process() for each vcore that
has just run, and we drop and re-take the vcore lock around the calls
to kvmppc_emulate_debug_inst() and kvmppc_emulate_doorbell_instr().

Dropping the lock is safe with respect to the iteration over the
runnable vcpus in post_guest_process(); for_each_runnable_thread
is actually safe to use locklessly.  It is possible for a vcpu
to become runnable and add itself to the runnable_threads array
(code near the beginning of kvmppc_run_vcpu()) and then get included
in the iteration in post_guest_process despite the fact that it
has not just run.  This is benign because vcpu->arch.trap and
vcpu->arch.ceded will be zero.

Cc: stable@vger.kernel.org # v4.13+
Fixes: 579006944e0d ("KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e5f81fc..aa6130b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1008,8 +1008,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *tvcpu;
 
-	if (!cpu_has_feature(CPU_FTR_ARCH_300))
-		return EMULATE_FAIL;
 	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
 		return RESUME_GUEST;
 	if (get_op(inst) != 31)
@@ -1059,6 +1057,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+/* Called with vcpu->arch.vcore->lock held */
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -1179,7 +1178,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				swab32(vcpu->arch.emul_inst) :
 				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+			/* Need vcore unlocked to call kvmppc_get_last_inst */
+			spin_unlock(&vcpu->arch.vcore->lock);
 			r = kvmppc_emulate_debug_inst(run, vcpu);
+			spin_lock(&vcpu->arch.vcore->lock);
 		} else {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 			r = RESUME_GUEST;
@@ -1194,8 +1196,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 */
 	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
 		r = EMULATE_FAIL;
-		if ((vcpu->arch.hfscr >> 56) = FSCR_MSGP_LG)
+		if (((vcpu->arch.hfscr >> 56) = FSCR_MSGP_LG) &&
+		    cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/* Need vcore unlocked to call kvmppc_get_last_inst */
+			spin_unlock(&vcpu->arch.vcore->lock);
 			r = kvmppc_emulate_doorbell_instr(vcpu);
+			spin_lock(&vcpu->arch.vcore->lock);
+		}
 		if (r = EMULATE_FAIL) {
 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 			r = RESUME_GUEST;
@@ -2946,13 +2953,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
 
+	preempt_enable();
+
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
 		pvc = core_info.vc[sub];
 		post_guest_process(pvc, pvc = vc);
 	}
 
 	spin_lock(&vc->lock);
-	preempt_enable();
 
  out:
 	vc->vcore_state = VCORE_INACTIVE;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-01-29 23:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-01 10:20 [PATCH] KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page Paul Mackerras
2012-06-06 12:28 ` Avi Kivity
2012-06-06 15:52   ` Alexander Graf
2012-06-19 10:50     ` Alexander Graf
2012-06-19 12:04       ` Avi Kivity
2018-01-29 23:51 ` [PATCH] KVM: PPC: Book3S HV: Drop locks before reading guest memory Paul Mackerras

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox