[Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
@ 2012-06-22 22:45 Jan Kiszka
  2012-06-22 22:55 ` Jan Kiszka
  2012-06-22 22:59 ` Anthony Liguori
  0 siblings, 2 replies; 23+ messages in thread
From: Jan Kiszka @ 2012-06-22 22:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: Liu Ping Fan, Alexander Graf, Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 5772 bytes --]

This sketches a possible path to get rid of the iothread lock on vmexits
in KVM mode. On x86, the the in-kernel irqchips has to be used because
we otherwise need to synchronize APIC and other per-cpu state accesses
that could be changed concurrently. Not yet fully analyzed is the NMI
injection path in the absence of an APIC.

s390x should be fine without specific locking as their pre/post-run
callbacks are empty. Power requires locking for the pre-run callback.

This patch is untested, but a similar version was successfully used in
a x86 setup with a network I/O path that needed no central iothread
locking anymore (required special MMIO exit handling).
---
 kvm-all.c         |   18 ++++++++++++++++--
 target-i386/kvm.c |    7 +++++++
 target-ppc/kvm.c  |    4 ++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index f8e4328..9c3e26f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
         return EXCP_HLT;
     }
 
+    qemu_mutex_unlock_iothread();
+
     do {
         if (env->kvm_vcpu_dirty) {
             kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
@@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
              */
             qemu_cpu_kick_self();
         }
-        qemu_mutex_unlock_iothread();
 
         run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
 
-        qemu_mutex_lock_iothread();
         kvm_arch_post_run(env, run);
 
+        /* TODO: push coalesced mmio flushing to the point where we access
+         * devices that are using it (currently VGA and E1000). */
+        qemu_mutex_lock_iothread();
         kvm_flush_coalesced_mmio_buffer();
+        qemu_mutex_unlock_iothread();
 
         if (run_ret < 0) {
             if (run_ret == -EINTR || run_ret == -EAGAIN) {
@@ -1499,19 +1503,23 @@ int kvm_cpu_exec(CPUArchState *env)
         switch (run->exit_reason) {
         case KVM_EXIT_IO:
             DPRINTF("handle_io\n");
+            qemu_mutex_lock_iothread();
             kvm_handle_io(run->io.port,
                           (uint8_t *)run + run->io.data_offset,
                           run->io.direction,
                           run->io.size,
                           run->io.count);
+            qemu_mutex_unlock_iothread();
             ret = 0;
             break;
         case KVM_EXIT_MMIO:
             DPRINTF("handle_mmio\n");
+            qemu_mutex_lock_iothread();
             cpu_physical_memory_rw(run->mmio.phys_addr,
                                    run->mmio.data,
                                    run->mmio.len,
                                    run->mmio.is_write);
+            qemu_mutex_unlock_iothread();
             ret = 0;
             break;
         case KVM_EXIT_IRQ_WINDOW_OPEN:
@@ -1520,7 +1528,9 @@ int kvm_cpu_exec(CPUArchState *env)
             break;
         case KVM_EXIT_SHUTDOWN:
             DPRINTF("shutdown\n");
+            qemu_mutex_lock_iothread();
             qemu_system_reset_request();
+            qemu_mutex_unlock_iothread();
             ret = EXCP_INTERRUPT;
             break;
         case KVM_EXIT_UNKNOWN:
@@ -1533,11 +1543,15 @@ int kvm_cpu_exec(CPUArchState *env)
             break;
         default:
             DPRINTF("kvm_arch_handle_exit\n");
+            qemu_mutex_lock_iothread();
             ret = kvm_arch_handle_exit(env, run);
+            qemu_mutex_unlock_iothread();
             break;
         }
     } while (ret == 0);
 
+    qemu_mutex_lock_iothread();
+
     if (ret < 0) {
         cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
         vm_stop(RUN_STATE_INTERNAL_ERROR);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 0d0d8f6..0ad64d1 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1631,7 +1631,10 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
 
     /* Inject NMI */
     if (env->interrupt_request & CPU_INTERRUPT_NMI) {
+        qemu_mutex_lock_iothread();
         env->interrupt_request &= ~CPU_INTERRUPT_NMI;
+        qemu_mutex_unlock_iothread();
+
         DPRINTF("injected NMI\n");
         ret = kvm_vcpu_ioctl(env, KVM_NMI);
         if (ret < 0) {
@@ -1641,6 +1644,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
     }
 
     if (!kvm_irqchip_in_kernel()) {
+        qemu_mutex_lock_iothread();
+
         /* Force the VCPU out of its inner loop to process any INIT requests
          * or pending TPR access reports. */
         if (env->interrupt_request &
@@ -1682,6 +1687,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
 
         DPRINTF("setting tpr\n");
         run->cr8 = cpu_get_apic_tpr(env->apic_state);
+
+        qemu_mutex_unlock_iothread();
     }
 }
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index c09cc39..60d91a5 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -471,6 +471,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
     int r;
     unsigned irq;
 
+    qemu_mutex_lock_iothread();
+
     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
     if (!cap_interrupt_level &&
@@ -497,6 +499,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
     /* We don't know if there are more interrupts pending after this. However,
      * the guest will return to userspace in the course of handling this one
      * anyways, so we will get a chance to deliver the rest. */
+
+    qemu_mutex_unlock_iothread();
 }
 
 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
-- 
1.7.3.4


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-22 22:45 [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop Jan Kiszka
@ 2012-06-22 22:55 ` Jan Kiszka
  2012-06-23  0:22   ` Marcelo Tosatti
  2012-06-22 22:59 ` Anthony Liguori
  1 sibling, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-06-22 22:55 UTC (permalink / raw)
  To: qemu-devel
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, Alexander Graf, Avi Kivity,
	Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 6185 bytes --]

Should have declared this [RFC] in the subject and CC'ed kvm...

On 2012-06-23 00:45, Jan Kiszka wrote:
> This sketches a possible path to get rid of the iothread lock on vmexits
> in KVM mode. On x86, the the in-kernel irqchips has to be used because
> we otherwise need to synchronize APIC and other per-cpu state accesses
> that could be changed concurrently. Not yet fully analyzed is the NMI
> injection path in the absence of an APIC.
> 
> s390x should be fine without specific locking as their pre/post-run
> callbacks are empty. Power requires locking for the pre-run callback.
> 
> This patch is untested, but a similar version was successfully used in
> a x86 setup with a network I/O path that needed no central iothread
> locking anymore (required special MMIO exit handling).
> ---
>  kvm-all.c         |   18 ++++++++++++++++--
>  target-i386/kvm.c |    7 +++++++
>  target-ppc/kvm.c  |    4 ++++
>  3 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..9c3e26f 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>          return EXCP_HLT;
>      }
>  
> +    qemu_mutex_unlock_iothread();
> +
>      do {
>          if (env->kvm_vcpu_dirty) {
>              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>               */
>              qemu_cpu_kick_self();
>          }
> -        qemu_mutex_unlock_iothread();
>  
>          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>  
> -        qemu_mutex_lock_iothread();
>          kvm_arch_post_run(env, run);
>  
> +        /* TODO: push coalesced mmio flushing to the point where we access
> +         * devices that are using it (currently VGA and E1000). */
> +        qemu_mutex_lock_iothread();
>          kvm_flush_coalesced_mmio_buffer();
> +        qemu_mutex_unlock_iothread();
>  
>          if (run_ret < 0) {
>              if (run_ret == -EINTR || run_ret == -EAGAIN) {
> @@ -1499,19 +1503,23 @@ int kvm_cpu_exec(CPUArchState *env)
>          switch (run->exit_reason) {
>          case KVM_EXIT_IO:
>              DPRINTF("handle_io\n");
> +            qemu_mutex_lock_iothread();
>              kvm_handle_io(run->io.port,
>                            (uint8_t *)run + run->io.data_offset,
>                            run->io.direction,
>                            run->io.size,
>                            run->io.count);
> +            qemu_mutex_unlock_iothread();
>              ret = 0;
>              break;
>          case KVM_EXIT_MMIO:
>              DPRINTF("handle_mmio\n");
> +            qemu_mutex_lock_iothread();
>              cpu_physical_memory_rw(run->mmio.phys_addr,
>                                     run->mmio.data,
>                                     run->mmio.len,
>                                     run->mmio.is_write);
> +            qemu_mutex_unlock_iothread();
>              ret = 0;
>              break;
>          case KVM_EXIT_IRQ_WINDOW_OPEN:
> @@ -1520,7 +1528,9 @@ int kvm_cpu_exec(CPUArchState *env)
>              break;
>          case KVM_EXIT_SHUTDOWN:
>              DPRINTF("shutdown\n");
> +            qemu_mutex_lock_iothread();
>              qemu_system_reset_request();
> +            qemu_mutex_unlock_iothread();
>              ret = EXCP_INTERRUPT;
>              break;
>          case KVM_EXIT_UNKNOWN:
> @@ -1533,11 +1543,15 @@ int kvm_cpu_exec(CPUArchState *env)
>              break;
>          default:
>              DPRINTF("kvm_arch_handle_exit\n");
> +            qemu_mutex_lock_iothread();
>              ret = kvm_arch_handle_exit(env, run);
> +            qemu_mutex_unlock_iothread();
>              break;
>          }
>      } while (ret == 0);
>  
> +    qemu_mutex_lock_iothread();
> +
>      if (ret < 0) {
>          cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
>          vm_stop(RUN_STATE_INTERNAL_ERROR);
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 0d0d8f6..0ad64d1 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1631,7 +1631,10 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>  
>      /* Inject NMI */
>      if (env->interrupt_request & CPU_INTERRUPT_NMI) {
> +        qemu_mutex_lock_iothread();
>          env->interrupt_request &= ~CPU_INTERRUPT_NMI;
> +        qemu_mutex_unlock_iothread();
> +
>          DPRINTF("injected NMI\n");
>          ret = kvm_vcpu_ioctl(env, KVM_NMI);
>          if (ret < 0) {
> @@ -1641,6 +1644,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>      }
>  
>      if (!kvm_irqchip_in_kernel()) {
> +        qemu_mutex_lock_iothread();
> +
>          /* Force the VCPU out of its inner loop to process any INIT requests
>           * or pending TPR access reports. */
>          if (env->interrupt_request &
> @@ -1682,6 +1687,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>  
>          DPRINTF("setting tpr\n");
>          run->cr8 = cpu_get_apic_tpr(env->apic_state);
> +
> +        qemu_mutex_unlock_iothread();
>      }
>  }
>  
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index c09cc39..60d91a5 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -471,6 +471,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
>      int r;
>      unsigned irq;
>  
> +    qemu_mutex_lock_iothread();
> +
>      /* PowerPC QEMU tracks the various core input pins (interrupt, critical
>       * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
>      if (!cap_interrupt_level &&
> @@ -497,6 +499,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
>      /* We don't know if there are more interrupts pending after this. However,
>       * the guest will return to userspace in the course of handling this one
>       * anyways, so we will get a chance to deliver the rest. */
> +
> +    qemu_mutex_unlock_iothread();
>  }
>  
>  void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
> 





[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-22 22:55 ` Jan Kiszka
@ 2012-06-23  0:22   ` Marcelo Tosatti
  2012-06-23  9:06     ` Marcelo Tosatti
  2012-06-23  9:22     ` Jan Kiszka
  0 siblings, 2 replies; 23+ messages in thread
From: Marcelo Tosatti @ 2012-06-23  0:22 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Alexander Graf, Avi Kivity,
	Anthony Liguori

On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
> Should have declared this [RFC] in the subject and CC'ed kvm...
> 
> On 2012-06-23 00:45, Jan Kiszka wrote:
> > This sketches a possible path to get rid of the iothread lock on vmexits
> > in KVM mode. On x86, the the in-kernel irqchips has to be used because
> > we otherwise need to synchronize APIC and other per-cpu state accesses
> > that could be changed concurrently. Not yet fully analyzed is the NMI
> > injection path in the absence of an APIC.
> > 
> > s390x should be fine without specific locking as their pre/post-run
> > callbacks are empty. Power requires locking for the pre-run callback.
> > 
> > This patch is untested, but a similar version was successfully used in
> > a x86 setup with a network I/O path that needed no central iothread
> > locking anymore (required special MMIO exit handling).
> > ---
> >  kvm-all.c         |   18 ++++++++++++++++--
> >  target-i386/kvm.c |    7 +++++++
> >  target-ppc/kvm.c  |    4 ++++
> >  3 files changed, 27 insertions(+), 2 deletions(-)
> > 
> > diff --git a/kvm-all.c b/kvm-all.c
> > index f8e4328..9c3e26f 100644
> > --- a/kvm-all.c
> > +++ b/kvm-all.c
> > @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
> >          return EXCP_HLT;
> >      }
> >  
> > +    qemu_mutex_unlock_iothread();
> > +
> >      do {
> >          if (env->kvm_vcpu_dirty) {
> >              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
> > @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
> >               */
> >              qemu_cpu_kick_self();
> >          }
> > -        qemu_mutex_unlock_iothread();
> >  
> >          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
> >  
> > -        qemu_mutex_lock_iothread();
> >          kvm_arch_post_run(env, run);

target-i386/kvm.c

void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
{       
    if (run->if_flag) {
        env->eflags |= IF_MASK;
    } else {
        env->eflags &= ~IF_MASK;
    }
    cpu_set_apic_tpr(env->apic_state, run->cr8);
    cpu_set_apic_base(env->apic_state, run->apic_base);
}

Clearly there is no structure to any of the writes around the writes
in x86's kvm_arch_post_run, so it is unsafe.

In kvm_flush_coalesced_mmio_buffer, however, the first and last pointers 
can be read safely without the global lock (so you could move the lock
after reading run->exit_reason, in that case).

> > +        /* TODO: push coalesced mmio flushing to the point where we access
> > +         * devices that are using it (currently VGA and E1000). */
> > +        qemu_mutex_lock_iothread();
> >          kvm_flush_coalesced_mmio_buffer();
> > +        qemu_mutex_unlock_iothread();

But you have to flush first to then figure out which device the
coalesced mmio belongs to (don't get that comment).

It would be good to move the global lock acquision to as late
as possible, but acquiring/reacquiring is not very useful 
(can result in a slowdown, actually).

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-23  0:22   ` Marcelo Tosatti
@ 2012-06-23  9:06     ` Marcelo Tosatti
  2012-06-23 11:45       ` Jan Kiszka
  2012-06-23  9:22     ` Jan Kiszka
  1 sibling, 1 reply; 23+ messages in thread
From: Marcelo Tosatti @ 2012-06-23  9:06 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Alexander Graf, Avi Kivity,
	Anthony Liguori

On Fri, Jun 22, 2012 at 09:22:59PM -0300, Marcelo Tosatti wrote:
> On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
> > Should have declared this [RFC] in the subject and CC'ed kvm...
> > 
> > On 2012-06-23 00:45, Jan Kiszka wrote:
> > > This sketches a possible path to get rid of the iothread lock on vmexits
> > > in KVM mode. On x86, the the in-kernel irqchips has to be used because
> > > we otherwise need to synchronize APIC and other per-cpu state accesses
> > > that could be changed concurrently. Not yet fully analyzed is the NMI
> > > injection path in the absence of an APIC.
> > > 
> > > s390x should be fine without specific locking as their pre/post-run
> > > callbacks are empty. Power requires locking for the pre-run callback.
> > > 
> > > This patch is untested, but a similar version was successfully used in
> > > a x86 setup with a network I/O path that needed no central iothread
> > > locking anymore (required special MMIO exit handling).
> > > ---
> > >  kvm-all.c         |   18 ++++++++++++++++--
> > >  target-i386/kvm.c |    7 +++++++
> > >  target-ppc/kvm.c  |    4 ++++
> > >  3 files changed, 27 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/kvm-all.c b/kvm-all.c
> > > index f8e4328..9c3e26f 100644
> > > --- a/kvm-all.c
> > > +++ b/kvm-all.c
> > > @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
> > >          return EXCP_HLT;
> > >      }
> > >  
> > > +    qemu_mutex_unlock_iothread();
> > > +
> > >      do {
> > >          if (env->kvm_vcpu_dirty) {
> > >              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
> > > @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
> > >               */
> > >              qemu_cpu_kick_self();
> > >          }
> > > -        qemu_mutex_unlock_iothread();
> > >  
> > >          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
> > >  
> > > -        qemu_mutex_lock_iothread();
> > >          kvm_arch_post_run(env, run);
> 
> target-i386/kvm.c
> 
> void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
> {       
>     if (run->if_flag) {
>         env->eflags |= IF_MASK;
>     } else {
>         env->eflags &= ~IF_MASK;
>     }
>     cpu_set_apic_tpr(env->apic_state, run->cr8);
>     cpu_set_apic_base(env->apic_state, run->apic_base);
> }
> 
> Clearly there is no structure to any of the writes around the writes
> in x86's kvm_arch_post_run, so it is unsafe.

No access protocol to the CPUState and apic devices (who can write when,
who can read when).

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-23  9:06     ` Marcelo Tosatti
@ 2012-06-23 11:45       ` Jan Kiszka
  2012-06-24  8:49         ` Avi Kivity
  2012-06-24 13:34         ` liu ping fan
  0 siblings, 2 replies; 23+ messages in thread
From: Jan Kiszka @ 2012-06-23 11:45 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Liu Ping Fan, kvm, qemu-devel, Alexander Graf, Avi Kivity,
	Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 2899 bytes --]

On 2012-06-23 11:06, Marcelo Tosatti wrote:
> On Fri, Jun 22, 2012 at 09:22:59PM -0300, Marcelo Tosatti wrote:
>> On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
>>> Should have declared this [RFC] in the subject and CC'ed kvm...
>>>
>>> On 2012-06-23 00:45, Jan Kiszka wrote:
>>>> This sketches a possible path to get rid of the iothread lock on vmexits
>>>> in KVM mode. On x86, the the in-kernel irqchips has to be used because
>>>> we otherwise need to synchronize APIC and other per-cpu state accesses
>>>> that could be changed concurrently. Not yet fully analyzed is the NMI
>>>> injection path in the absence of an APIC.
>>>>
>>>> s390x should be fine without specific locking as their pre/post-run
>>>> callbacks are empty. Power requires locking for the pre-run callback.
>>>>
>>>> This patch is untested, but a similar version was successfully used in
>>>> a x86 setup with a network I/O path that needed no central iothread
>>>> locking anymore (required special MMIO exit handling).
>>>> ---
>>>>  kvm-all.c         |   18 ++++++++++++++++--
>>>>  target-i386/kvm.c |    7 +++++++
>>>>  target-ppc/kvm.c  |    4 ++++
>>>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/kvm-all.c b/kvm-all.c
>>>> index f8e4328..9c3e26f 100644
>>>> --- a/kvm-all.c
>>>> +++ b/kvm-all.c
>>>> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>          return EXCP_HLT;
>>>>      }
>>>>  
>>>> +    qemu_mutex_unlock_iothread();
>>>> +
>>>>      do {
>>>>          if (env->kvm_vcpu_dirty) {
>>>>              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
>>>> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>               */
>>>>              qemu_cpu_kick_self();
>>>>          }
>>>> -        qemu_mutex_unlock_iothread();
>>>>  
>>>>          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>>>>  
>>>> -        qemu_mutex_lock_iothread();
>>>>          kvm_arch_post_run(env, run);
>>
>> target-i386/kvm.c
>>
>> void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
>> {       
>>     if (run->if_flag) {
>>         env->eflags |= IF_MASK;
>>     } else {
>>         env->eflags &= ~IF_MASK;
>>     }
>>     cpu_set_apic_tpr(env->apic_state, run->cr8);
>>     cpu_set_apic_base(env->apic_state, run->apic_base);
>> }
>>
>> Clearly there is no structure to any of the writes around the writes
>> in x86's kvm_arch_post_run, so it is unsafe.
> 
> No access protocol to the CPUState and apic devices (who can write when,
> who can read when).
> 

Hmm, we may need the iothread lock around cpu_set_apic_tpr for
!kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
can be but there as well.

With in-kernel irqchip, there is no such need. Also, no one accesses
eflags outside of the vcpu thread, independent of the irqchip mode.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-23 11:45       ` Jan Kiszka
@ 2012-06-24  8:49         ` Avi Kivity
  2012-06-24 14:08           ` Jan Kiszka
  2012-06-24 13:34         ` liu ping fan
  1 sibling, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2012-06-24  8:49 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, qemu-devel, Alexander Graf,
	Anthony Liguori

On 06/23/2012 02:45 PM, Jan Kiszka wrote:
> 
> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
> can be but there as well.
> 
> With in-kernel irqchip, there is no such need. Also, no one accesses
> eflags outside of the vcpu thread, independent of the irqchip mode.

In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
injection needs to be done atomically, but currently we check the tpr
from the injecting thread, which means the cpu thread can race with it.
 We need to move the check to the vcpu thread so that the guest vcpu is
halted.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24  8:49         ` Avi Kivity
@ 2012-06-24 14:08           ` Jan Kiszka
  2012-06-24 14:31             ` Avi Kivity
  2012-07-06 17:16             ` Jan Kiszka
  0 siblings, 2 replies; 23+ messages in thread
From: Jan Kiszka @ 2012-06-24 14:08 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, qemu-devel, Alexander Graf,
	Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 819 bytes --]

On 2012-06-24 10:49, Avi Kivity wrote:
> On 06/23/2012 02:45 PM, Jan Kiszka wrote:
>>
>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>> can be but there as well.
>>
>> With in-kernel irqchip, there is no such need. Also, no one accesses
>> eflags outside of the vcpu thread, independent of the irqchip mode.
> 
> In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
> injection needs to be done atomically, but currently we check the tpr
> from the injecting thread, which means the cpu thread can race with it.
>  We need to move the check to the vcpu thread so that the guest vcpu is
> halted.

So apic_set_irq basically needs to be deferred to vcpu context, right?
Will have a look.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:08           ` Jan Kiszka
@ 2012-06-24 14:31             ` Avi Kivity
  2012-07-06 17:16             ` Jan Kiszka
  1 sibling, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2012-06-24 14:31 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, Gleb Natapov, kvm, Marcelo Tosatti, qemu-devel,
	Alexander Graf, Anthony Liguori

On 06/24/2012 05:08 PM, Jan Kiszka wrote:
> On 2012-06-24 10:49, Avi Kivity wrote:
>> On 06/23/2012 02:45 PM, Jan Kiszka wrote:
>>>
>>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>>> can be but there as well.
>>>
>>> With in-kernel irqchip, there is no such need. Also, no one accesses
>>> eflags outside of the vcpu thread, independent of the irqchip mode.
>> 
>> In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
>> injection needs to be done atomically, but currently we check the tpr
>> from the injecting thread, which means the cpu thread can race with it.
>>  We need to move the check to the vcpu thread so that the guest vcpu is
>> halted.
> 
> So apic_set_irq basically needs to be deferred to vcpu context, right?
> Will have a look.

Correct. IIRC, the kernel's 0a5fff192388d2 made the problem much worse,
but did not create it.  It was either Vista or XP-64 which triggered the
problem reliably.  Copying Gleb in case he remembers more.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:08           ` Jan Kiszka
  2012-06-24 14:31             ` Avi Kivity
@ 2012-07-06 17:16             ` Jan Kiszka
  2012-07-06 18:06               ` Jan Kiszka
  1 sibling, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-07-06 17:16 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, qemu-devel, Alexander Graf,
	Anthony Liguori

On 2012-06-24 16:08, Jan Kiszka wrote:
> On 2012-06-24 10:49, Avi Kivity wrote:
>> On 06/23/2012 02:45 PM, Jan Kiszka wrote:
>>>
>>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>>> can be but there as well.
>>>
>>> With in-kernel irqchip, there is no such need. Also, no one accesses
>>> eflags outside of the vcpu thread, independent of the irqchip mode.
>>
>> In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
>> injection needs to be done atomically, but currently we check the tpr
>> from the injecting thread, which means the cpu thread can race with it.
>>  We need to move the check to the vcpu thread so that the guest vcpu is
>> halted.
> 
> So apic_set_irq basically needs to be deferred to vcpu context, right?
> Will have a look.

Tried to wrap my head around this, but only found different issues
(patches under construction).

First of all, a simple run_on_cpu doesn't work as it may drops the BQL
at unexpected points inside device models.

Then I thought about what could actually race here: The testing of the
userspace TPR value under BQL vs. some modification by the CPU while in
KVM mode. So we may either inject while the CPU is trying to prevent
this - harmless as it happens on real hw as well - or not inject while
the CPU is enabling this. But the latter is quickly resolved because all
such TPR changes in userspace APIC mode are trapped and then processed
under BQL. At that point we will also reevaluate the pending interrupts
and inject what was deferred before (kvm_arch_post_run ->
cpu_set_apic_tpr -> apic_set_tpr -> apic_update_irq, or via
apic_mem_writel).

So where is a race?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-07-06 17:16             ` Jan Kiszka
@ 2012-07-06 18:06               ` Jan Kiszka
  2012-07-08  7:49                 ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-07-06 18:06 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, Gleb Natapov, kvm, Marcelo Tosatti, qemu-devel,
	Alexander Graf, Anthony Liguori

On 2012-07-06 19:16, Jan Kiszka wrote:
> On 2012-06-24 16:08, Jan Kiszka wrote:
>> On 2012-06-24 10:49, Avi Kivity wrote:
>>> On 06/23/2012 02:45 PM, Jan Kiszka wrote:
>>>>
>>>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>>>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>>>> can be but there as well.
>>>>
>>>> With in-kernel irqchip, there is no such need. Also, no one accesses
>>>> eflags outside of the vcpu thread, independent of the irqchip mode.
>>>
>>> In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
>>> injection needs to be done atomically, but currently we check the tpr
>>> from the injecting thread, which means the cpu thread can race with it.
>>>  We need to move the check to the vcpu thread so that the guest vcpu is
>>> halted.
>>
>> So apic_set_irq basically needs to be deferred to vcpu context, right?
>> Will have a look.
> 
> Tried to wrap my head around this, but only found different issues
> (patches under construction).
> 
> First of all, a simple run_on_cpu doesn't work as it may drops the BQL
> at unexpected points inside device models.
> 
> Then I thought about what could actually race here: The testing of the
> userspace TPR value under BQL vs. some modification by the CPU while in
> KVM mode. So we may either inject while the CPU is trying to prevent
> this - harmless as it happens on real hw as well - or not inject while

Hmm, this could actually be a problem as the race window is extended
beyond the instruction that raises TPR. So we may generate unexpected
spurious interrupts for the guest. And that's because the userspace APIC
fails to update the CPU interrupt pin properly when the TPR is modified.
Here is the bug...

Jan

> the CPU is enabling this. But the latter is quickly resolved because all
> such TPR changes in userspace APIC mode are trapped and then processed
> under BQL. At that point we will also reevaluate the pending interrupts
> and inject what was deferred before (kvm_arch_post_run ->
> cpu_set_apic_tpr -> apic_set_tpr -> apic_update_irq, or via
> apic_mem_writel).
> 
> So where is a race?
> 
> Jan
> 

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-07-06 18:06               ` Jan Kiszka
@ 2012-07-08  7:49                 ` Avi Kivity
  0 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2012-07-08  7:49 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, Gleb Natapov, kvm, Marcelo Tosatti, qemu-devel,
	Alexander Graf, Anthony Liguori

On 07/06/2012 09:06 PM, Jan Kiszka wrote:
> On 2012-07-06 19:16, Jan Kiszka wrote:
>> On 2012-06-24 16:08, Jan Kiszka wrote:
>>> On 2012-06-24 10:49, Avi Kivity wrote:
>>>> On 06/23/2012 02:45 PM, Jan Kiszka wrote:
>>>>>
>>>>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>>>>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>>>>> can be but there as well.
>>>>>
>>>>> With in-kernel irqchip, there is no such need. Also, no one accesses
>>>>> eflags outside of the vcpu thread, independent of the irqchip mode.
>>>>
>>>> In fact !kvm_irqchip_in_kernel() is broken wrt the tpr.  Interrupt
>>>> injection needs to be done atomically, but currently we check the tpr
>>>> from the injecting thread, which means the cpu thread can race with it.
>>>>  We need to move the check to the vcpu thread so that the guest vcpu is
>>>> halted.
>>>
>>> So apic_set_irq basically needs to be deferred to vcpu context, right?
>>> Will have a look.
>> 
>> Tried to wrap my head around this, but only found different issues
>> (patches under construction).
>> 
>> First of all, a simple run_on_cpu doesn't work as it may drops the BQL
>> at unexpected points inside device models.
>> 
>> Then I thought about what could actually race here: The testing of the
>> userspace TPR value under BQL vs. some modification by the CPU while in
>> KVM mode. So we may either inject while the CPU is trying to prevent
>> this - harmless as it happens on real hw as well - or not inject while
> 
> Hmm, this could actually be a problem as the race window is extended
> beyond the instruction that raises TPR. So we may generate unexpected
> spurious interrupts for the guest. And that's because the userspace APIC
> fails to update the CPU interrupt pin properly when the TPR is modified.
> Here is the bug...

Exactly.

To avoid dropping the lock we can do something similar to kvm - queue a
request to reevaluate IRR, then signal that vcpu to exit.  No need to
drop the lock.  Possibly a check in apic_update_irq() whether we're
running in the vcpu thread; if not, signal and return.  That function's
effects are asynchronous if run outside the vcpu, so it's a safe change.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-23 11:45       ` Jan Kiszka
  2012-06-24  8:49         ` Avi Kivity
@ 2012-06-24 13:34         ` liu ping fan
  2012-06-24 14:08           ` Jan Kiszka
  1 sibling, 1 reply; 23+ messages in thread
From: liu ping fan @ 2012-06-24 13:34 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, qemu-devel, Alexander Graf,
	Avi Kivity, Anthony Liguori

On Sat, Jun 23, 2012 at 7:45 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2012-06-23 11:06, Marcelo Tosatti wrote:
>> On Fri, Jun 22, 2012 at 09:22:59PM -0300, Marcelo Tosatti wrote:
>>> On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
>>>> Should have declared this [RFC] in the subject and CC'ed kvm...
>>>>
>>>> On 2012-06-23 00:45, Jan Kiszka wrote:
>>>>> This sketches a possible path to get rid of the iothread lock on vmexits
>>>>> in KVM mode. On x86, the the in-kernel irqchips has to be used because
>>>>> we otherwise need to synchronize APIC and other per-cpu state accesses
>>>>> that could be changed concurrently. Not yet fully analyzed is the NMI
>>>>> injection path in the absence of an APIC.
>>>>>
>>>>> s390x should be fine without specific locking as their pre/post-run
>>>>> callbacks are empty. Power requires locking for the pre-run callback.
>>>>>
>>>>> This patch is untested, but a similar version was successfully used in
>>>>> a x86 setup with a network I/O path that needed no central iothread
>>>>> locking anymore (required special MMIO exit handling).
>>>>> ---
>>>>>  kvm-all.c         |   18 ++++++++++++++++--
>>>>>  target-i386/kvm.c |    7 +++++++
>>>>>  target-ppc/kvm.c  |    4 ++++
>>>>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/kvm-all.c b/kvm-all.c
>>>>> index f8e4328..9c3e26f 100644
>>>>> --- a/kvm-all.c
>>>>> +++ b/kvm-all.c
>>>>> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>>          return EXCP_HLT;
>>>>>      }
>>>>>
>>>>> +    qemu_mutex_unlock_iothread();
>>>>> +
>>>>>      do {
>>>>>          if (env->kvm_vcpu_dirty) {
>>>>>              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
>>>>> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>>               */
>>>>>              qemu_cpu_kick_self();
>>>>>          }
>>>>> -        qemu_mutex_unlock_iothread();
>>>>>
>>>>>          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>>>>>
>>>>> -        qemu_mutex_lock_iothread();
>>>>>          kvm_arch_post_run(env, run);
>>>
>>> target-i386/kvm.c
>>>
>>> void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
>>> {
>>>     if (run->if_flag) {
>>>         env->eflags |= IF_MASK;
>>>     } else {
>>>         env->eflags &= ~IF_MASK;
>>>     }
>>>     cpu_set_apic_tpr(env->apic_state, run->cr8);
>>>     cpu_set_apic_base(env->apic_state, run->apic_base);
>>> }
>>>
>>> Clearly there is no structure to any of the writes around the writes
>>> in x86's kvm_arch_post_run, so it is unsafe.
>>
>> No access protocol to the CPUState and apic devices (who can write when,
>> who can read when).
>>
>
> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
> can be but there as well.
>
As to !kvm_irqchip_in_kernel(),I think there is quite a few fields in
CPUState (interrupt_request,eflags,halted,exit_request,exception_injected),
which we must care about. They  are heavily depended on
env->apic_state, so we consider them as a atomic context, that is say
during the updating of env's these context, we do not allow apic_state
changed.
And that is why I introduce cpu_lock in the patch "[PATCH 2/2] kvm:
use per-cpu lock to free vcpu thread out of the big lock"

Regards,
pingfan


> With in-kernel irqchip, there is no such need. Also, no one accesses
> eflags outside of the vcpu thread, independent of the irqchip mode.
>
> Jan
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 13:34         ` liu ping fan
@ 2012-06-24 14:08           ` Jan Kiszka
  2012-06-24 14:35             ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-06-24 14:08 UTC (permalink / raw)
  To: liu ping fan
  Cc: Liu Ping Fan, kvm, Marcelo Tosatti, qemu-devel, Alexander Graf,
	Avi Kivity, Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 3956 bytes --]

On 2012-06-24 15:34, liu ping fan wrote:
> On Sat, Jun 23, 2012 at 7:45 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
>> On 2012-06-23 11:06, Marcelo Tosatti wrote:
>>> On Fri, Jun 22, 2012 at 09:22:59PM -0300, Marcelo Tosatti wrote:
>>>> On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
>>>>> Should have declared this [RFC] in the subject and CC'ed kvm...
>>>>>
>>>>> On 2012-06-23 00:45, Jan Kiszka wrote:
>>>>>> This sketches a possible path to get rid of the iothread lock on vmexits
>>>>>> in KVM mode. On x86, the the in-kernel irqchips has to be used because
>>>>>> we otherwise need to synchronize APIC and other per-cpu state accesses
>>>>>> that could be changed concurrently. Not yet fully analyzed is the NMI
>>>>>> injection path in the absence of an APIC.
>>>>>>
>>>>>> s390x should be fine without specific locking as their pre/post-run
>>>>>> callbacks are empty. Power requires locking for the pre-run callback.
>>>>>>
>>>>>> This patch is untested, but a similar version was successfully used in
>>>>>> a x86 setup with a network I/O path that needed no central iothread
>>>>>> locking anymore (required special MMIO exit handling).
>>>>>> ---
>>>>>>  kvm-all.c         |   18 ++++++++++++++++--
>>>>>>  target-i386/kvm.c |    7 +++++++
>>>>>>  target-ppc/kvm.c  |    4 ++++
>>>>>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/kvm-all.c b/kvm-all.c
>>>>>> index f8e4328..9c3e26f 100644
>>>>>> --- a/kvm-all.c
>>>>>> +++ b/kvm-all.c
>>>>>> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>>>          return EXCP_HLT;
>>>>>>      }
>>>>>>
>>>>>> +    qemu_mutex_unlock_iothread();
>>>>>> +
>>>>>>      do {
>>>>>>          if (env->kvm_vcpu_dirty) {
>>>>>>              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
>>>>>> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>>>>>>               */
>>>>>>              qemu_cpu_kick_self();
>>>>>>          }
>>>>>> -        qemu_mutex_unlock_iothread();
>>>>>>
>>>>>>          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>>>>>>
>>>>>> -        qemu_mutex_lock_iothread();
>>>>>>          kvm_arch_post_run(env, run);
>>>>
>>>> target-i386/kvm.c
>>>>
>>>> void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
>>>> {
>>>>     if (run->if_flag) {
>>>>         env->eflags |= IF_MASK;
>>>>     } else {
>>>>         env->eflags &= ~IF_MASK;
>>>>     }
>>>>     cpu_set_apic_tpr(env->apic_state, run->cr8);
>>>>     cpu_set_apic_base(env->apic_state, run->apic_base);
>>>> }
>>>>
>>>> Clearly there is no structure to any of the writes around the writes
>>>> in x86's kvm_arch_post_run, so it is unsafe.
>>>
>>> No access protocol to the CPUState and apic devices (who can write when,
>>> who can read when).
>>>
>>
>> Hmm, we may need the iothread lock around cpu_set_apic_tpr for
>> !kvm_irqchip_in_kernel(). And as we are at it, apic_base manipulation
>> can be but there as well.
>>
> As to !kvm_irqchip_in_kernel(),I think there is quite a few fields in
> CPUState (interrupt_request,eflags,halted,exit_request,exception_injected),
> which we must care about. They  are heavily depended on
> env->apic_state, so we consider them as a atomic context, that is say
> during the updating of env's these context, we do not allow apic_state
> changed.
> And that is why I introduce cpu_lock in the patch "[PATCH 2/2] kvm:
> use per-cpu lock to free vcpu thread out of the big lock"

This per-cpu lock is optimizing the slow path (!kvm_irqchip_in_kernel)
at the expense of the fast path. It's better to optimize for in-kernel
irqchip first. One day, when the key problems are solved, we may look at
userspace irqchip code path as well. Until then, BQL protection for
those paths is perfectly fine.

As a first step, I will post a series later that gets rid of
kvm_flush_coalesced_mmio_buffer in the common vmexit path.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:08           ` Jan Kiszka
@ 2012-06-24 14:35             ` Avi Kivity
  2012-06-24 14:40               ` Jan Kiszka
  0 siblings, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2012-06-24 14:35 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

On 06/24/2012 05:08 PM, Jan Kiszka wrote:
> As a first step, I will post a series later that gets rid of
> kvm_flush_coalesced_mmio_buffer in the common vmexit path.

If you defer this, I can think of two places that need to flush:
- anything that accesses those memory areas (such as DMA to the
framebuffer, or updating the display)
- anything that modifies the memory map and possibly changes flushed
addresses


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:35             ` Avi Kivity
@ 2012-06-24 14:40               ` Jan Kiszka
  2012-06-24 14:46                 ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-06-24 14:40 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 862 bytes --]

On 2012-06-24 16:35, Avi Kivity wrote:
> On 06/24/2012 05:08 PM, Jan Kiszka wrote:
>> As a first step, I will post a series later that gets rid of
>> kvm_flush_coalesced_mmio_buffer in the common vmexit path.
> 
> If you defer this, I can think of two places that need to flush:
> - anything that accesses those memory areas (such as DMA to the
> framebuffer, or updating the display)

- anything that accesses related areas (in case of VGA: PIO accesses to
the control ports). I'm providing memory_region_set_flush_coalesced that
allows to flush on non-coalesced region accesses as well. Some PIO
accesses unfortunately still need open-coded
qemu_flush_coalesced_mmio_buffer as they do not use memory regions yet.

> - anything that modifies the memory map and possibly changes flushed
> addresses

Good point, need to address this.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:40               ` Jan Kiszka
@ 2012-06-24 14:46                 ` Avi Kivity
  2012-06-24 14:51                   ` Jan Kiszka
  0 siblings, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2012-06-24 14:46 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

On 06/24/2012 05:40 PM, Jan Kiszka wrote:
> On 2012-06-24 16:35, Avi Kivity wrote:
>> On 06/24/2012 05:08 PM, Jan Kiszka wrote:
>>> As a first step, I will post a series later that gets rid of
>>> kvm_flush_coalesced_mmio_buffer in the common vmexit path.
>> 
>> If you defer this, I can think of two places that need to flush:
>> - anything that accesses those memory areas (such as DMA to the
>> framebuffer, or updating the display)
> 
> - anything that accesses related areas (in case of VGA: PIO accesses to
> the control ports). I'm providing memory_region_set_flush_coalesced that
> allows to flush on non-coalesced region accesses as well. Some PIO
> accesses unfortunately still need open-coded
> qemu_flush_coalesced_mmio_buffer as they do not use memory regions yet.

Framebuffer access will bypass the MemoryRegionOps callbacks, did you
intend to hook those?

I'm not sure the problem is general enough to merit a check in our
generic mmio dispatch code (granted, now it has a check in the vcpu exit
path which is much worse).

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:46                 ` Avi Kivity
@ 2012-06-24 14:51                   ` Jan Kiszka
  2012-06-24 14:56                     ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-06-24 14:51 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 1431 bytes --]

On 2012-06-24 16:46, Avi Kivity wrote:
> On 06/24/2012 05:40 PM, Jan Kiszka wrote:
>> On 2012-06-24 16:35, Avi Kivity wrote:
>>> On 06/24/2012 05:08 PM, Jan Kiszka wrote:
>>>> As a first step, I will post a series later that gets rid of
>>>> kvm_flush_coalesced_mmio_buffer in the common vmexit path.
>>>
>>> If you defer this, I can think of two places that need to flush:
>>> - anything that accesses those memory areas (such as DMA to the
>>> framebuffer, or updating the display)
>>
>> - anything that accesses related areas (in case of VGA: PIO accesses to
>> the control ports). I'm providing memory_region_set_flush_coalesced that
>> allows to flush on non-coalesced region accesses as well. Some PIO
>> accesses unfortunately still need open-coded
>> qemu_flush_coalesced_mmio_buffer as they do not use memory regions yet.
> 
> Framebuffer access will bypass the MemoryRegionOps callbacks, did you
> intend to hook those?

Are there really cases where the framebuffer is accessible both via MMIO
and RAM-like mappings at the same time? If so, the current flushing on
vmexit would help either as the direct mappings would not trigger exits.
Or what do you mean?

> 
> I'm not sure the problem is general enough to merit a check in our
> generic mmio dispatch code (granted, now it has a check in the vcpu exit
> path which is much worse).

The current situation is indeed much worse.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:51                   ` Jan Kiszka
@ 2012-06-24 14:56                     ` Avi Kivity
  2012-06-24 14:58                       ` Jan Kiszka
  0 siblings, 1 reply; 23+ messages in thread
From: Avi Kivity @ 2012-06-24 14:56 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

On 06/24/2012 05:51 PM, Jan Kiszka wrote:
> On 2012-06-24 16:46, Avi Kivity wrote:
>> On 06/24/2012 05:40 PM, Jan Kiszka wrote:
>>> On 2012-06-24 16:35, Avi Kivity wrote:
>>>> On 06/24/2012 05:08 PM, Jan Kiszka wrote:
>>>>> As a first step, I will post a series later that gets rid of
>>>>> kvm_flush_coalesced_mmio_buffer in the common vmexit path.
>>>>
>>>> If you defer this, I can think of two places that need to flush:
>>>> - anything that accesses those memory areas (such as DMA to the
>>>> framebuffer, or updating the display)
>>>
>>> - anything that accesses related areas (in case of VGA: PIO accesses to
>>> the control ports). I'm providing memory_region_set_flush_coalesced that
>>> allows to flush on non-coalesced region accesses as well. Some PIO
>>> accesses unfortunately still need open-coded
>>> qemu_flush_coalesced_mmio_buffer as they do not use memory regions yet.
>> 
>> Framebuffer access will bypass the MemoryRegionOps callbacks, did you
>> intend to hook those?
> 
> Are there really cases where the framebuffer is accessible both via MMIO
> and RAM-like mappings at the same time? If so, the current flushing on
> vmexit would help either as the direct mappings would not trigger exits.
> Or what do you mean?

I meant accesses by the display code to put the framebuffer in front of
the user's eyes.  Those just access the memory directly.  We could wrap
them with memory_region_get/put_ram_ptr() (as Xen requires anyway) and
have those issue the flush.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:56                     ` Avi Kivity
@ 2012-06-24 14:58                       ` Jan Kiszka
  2012-06-24 14:59                         ` Avi Kivity
  0 siblings, 1 reply; 23+ messages in thread
From: Jan Kiszka @ 2012-06-24 14:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 1696 bytes --]

On 2012-06-24 16:56, Avi Kivity wrote:
> On 06/24/2012 05:51 PM, Jan Kiszka wrote:
>> On 2012-06-24 16:46, Avi Kivity wrote:
>>> On 06/24/2012 05:40 PM, Jan Kiszka wrote:
>>>> On 2012-06-24 16:35, Avi Kivity wrote:
>>>>> On 06/24/2012 05:08 PM, Jan Kiszka wrote:
>>>>>> As a first step, I will post a series later that gets rid of
>>>>>> kvm_flush_coalesced_mmio_buffer in the common vmexit path.
>>>>>
>>>>> If you defer this, I can think of two places that need to flush:
>>>>> - anything that accesses those memory areas (such as DMA to the
>>>>> framebuffer, or updating the display)
>>>>
>>>> - anything that accesses related areas (in case of VGA: PIO accesses to
>>>> the control ports). I'm providing memory_region_set_flush_coalesced that
>>>> allows to flush on non-coalesced region accesses as well. Some PIO
>>>> accesses unfortunately still need open-coded
>>>> qemu_flush_coalesced_mmio_buffer as they do not use memory regions yet.
>>>
>>> Framebuffer access will bypass the MemoryRegionOps callbacks, did you
>>> intend to hook those?
>>
>> Are there really cases where the framebuffer is accessible both via MMIO
>> and RAM-like mappings at the same time? If so, the current flushing on
>> vmexit would help either as the direct mappings would not trigger exits.
>> Or what do you mean?
> 
> I meant accesses by the display code to put the framebuffer in front of
> the user's eyes.  Those just access the memory directly.  We could wrap
> them with memory_region_get/put_ram_ptr() (as Xen requires anyway) and
> have those issue the flush.

That's a non issue, we already have to flush on display updates. See
e.g. vga_update_display.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-24 14:58                       ` Jan Kiszka
@ 2012-06-24 14:59                         ` Avi Kivity
  0 siblings, 0 replies; 23+ messages in thread
From: Avi Kivity @ 2012-06-24 14:59 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Liu Ping Fan, kvm, qemu-devel, Marcelo Tosatti, Alexander Graf,
	liu ping fan, Anthony Liguori

On 06/24/2012 05:58 PM, Jan Kiszka wrote:

>>> Are there really cases where the framebuffer is accessible both via MMIO
>>> and RAM-like mappings at the same time? If so, the current flushing on
>>> vmexit would help either as the direct mappings would not trigger exits.
>>> Or what do you mean?
>> 
>> I meant accesses by the display code to put the framebuffer in front of
>> the user's eyes.  Those just access the memory directly.  We could wrap
>> them with memory_region_get/put_ram_ptr() (as Xen requires anyway) and
>> have those issue the flush.
> 
> That's a non issue, we already have to flush on display updates. See
> e.g. vga_update_display.
> 

Ah, of course.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-23  0:22   ` Marcelo Tosatti
  2012-06-23  9:06     ` Marcelo Tosatti
@ 2012-06-23  9:22     ` Jan Kiszka
  1 sibling, 0 replies; 23+ messages in thread
From: Jan Kiszka @ 2012-06-23  9:22 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Liu Ping Fan, kvm, qemu-devel, Alexander Graf, Avi Kivity,
	Anthony Liguori

[-- Attachment #1: Type: text/plain, Size: 3779 bytes --]

On 2012-06-23 02:22, Marcelo Tosatti wrote:
> On Sat, Jun 23, 2012 at 12:55:49AM +0200, Jan Kiszka wrote:
>> Should have declared this [RFC] in the subject and CC'ed kvm...
>>
>> On 2012-06-23 00:45, Jan Kiszka wrote:
>>> This sketches a possible path to get rid of the iothread lock on vmexits
>>> in KVM mode. On x86, the the in-kernel irqchips has to be used because
>>> we otherwise need to synchronize APIC and other per-cpu state accesses
>>> that could be changed concurrently. Not yet fully analyzed is the NMI
>>> injection path in the absence of an APIC.
>>>
>>> s390x should be fine without specific locking as their pre/post-run
>>> callbacks are empty. Power requires locking for the pre-run callback.
>>>
>>> This patch is untested, but a similar version was successfully used in
>>> a x86 setup with a network I/O path that needed no central iothread
>>> locking anymore (required special MMIO exit handling).
>>> ---
>>>  kvm-all.c         |   18 ++++++++++++++++--
>>>  target-i386/kvm.c |    7 +++++++
>>>  target-ppc/kvm.c  |    4 ++++
>>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/kvm-all.c b/kvm-all.c
>>> index f8e4328..9c3e26f 100644
>>> --- a/kvm-all.c
>>> +++ b/kvm-all.c
>>> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>>>          return EXCP_HLT;
>>>      }
>>>  
>>> +    qemu_mutex_unlock_iothread();
>>> +
>>>      do {
>>>          if (env->kvm_vcpu_dirty) {
>>>              kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
>>> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>>>               */
>>>              qemu_cpu_kick_self();
>>>          }
>>> -        qemu_mutex_unlock_iothread();
>>>  
>>>          run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>>>  
>>> -        qemu_mutex_lock_iothread();
>>>          kvm_arch_post_run(env, run);
> 
> target-i386/kvm.c
> 
> void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
> {       
>     if (run->if_flag) {
>         env->eflags |= IF_MASK;
>     } else {
>         env->eflags &= ~IF_MASK;
>     }
>     cpu_set_apic_tpr(env->apic_state, run->cr8);
>     cpu_set_apic_base(env->apic_state, run->apic_base);
> }
> 
> Clearly there is no structure to any of the writes around the writes
> in x86's kvm_arch_post_run, so it is unsafe.

Can't parse this yet.

None of the fields touched above should be modified outside of the vcpu
thread context (as long as that thread is inside the inner loop).
Therefore, it should be safe to run that functions without any lock. Am
I missing something?

> 
> In kvm_flush_coalesced_mmio_buffer, however, the first and last pointers 
> can be read safely without the global lock (so you could move the lock
> after reading run->exit_reason, in that case).
> 
>>> +        /* TODO: push coalesced mmio flushing to the point where we access
>>> +         * devices that are using it (currently VGA and E1000). */
>>> +        qemu_mutex_lock_iothread();
>>>          kvm_flush_coalesced_mmio_buffer();
>>> +        qemu_mutex_unlock_iothread();
> 
> But you have to flush first to then figure out which device the
> coalesced mmio belongs to (don't get that comment).

kvm_flush must not be called unconditionally on vmexit, that is my
point. I'm playing with the idea to tag memory regions that require
flushing (as they are coalescing themselves or logically depend on
coalesced regions). Then we would flush in the memory layer once a read
or write is about to be performed on such a region.

BTW, two more users arrived in the meantime: the G364 framebuffer and
the i82378 PCI-ISA bridge (not sure yet what requests that bridge
coalesces, if it's only VGA, but it looks a bit fishy).

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-22 22:45 [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop Jan Kiszka
  2012-06-22 22:55 ` Jan Kiszka
@ 2012-06-22 22:59 ` Anthony Liguori
  2012-06-23  9:11   ` Jan Kiszka
  1 sibling, 1 reply; 23+ messages in thread
From: Anthony Liguori @ 2012-06-22 22:59 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Liu Ping Fan, qemu-devel, Alexander Graf

On 06/22/2012 05:45 PM, Jan Kiszka wrote:
> This sketches a possible path to get rid of the iothread lock on vmexits
> in KVM mode. On x86, the the in-kernel irqchips has to be used because
> we otherwise need to synchronize APIC and other per-cpu state accesses
> that could be changed concurrently. Not yet fully analyzed is the NMI
> injection path in the absence of an APIC.
>
> s390x should be fine without specific locking as their pre/post-run
> callbacks are empty. Power requires locking for the pre-run callback.
>
> This patch is untested, but a similar version was successfully used in
> a x86 setup with a network I/O path that needed no central iothread
> locking anymore (required special MMIO exit handling).
> ---
>   kvm-all.c         |   18 ++++++++++++++++--
>   target-i386/kvm.c |    7 +++++++
>   target-ppc/kvm.c  |    4 ++++
>   3 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..9c3e26f 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>           return EXCP_HLT;
>       }
>
> +    qemu_mutex_unlock_iothread();
> +
>       do {
>           if (env->kvm_vcpu_dirty) {
>               kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>                */
>               qemu_cpu_kick_self();
>           }
> -        qemu_mutex_unlock_iothread();
>
>           run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>
> -        qemu_mutex_lock_iothread();
>           kvm_arch_post_run(env, run);
>
> +        /* TODO: push coalesced mmio flushing to the point where we access
> +         * devices that are using it (currently VGA and E1000). */
> +        qemu_mutex_lock_iothread();
>           kvm_flush_coalesced_mmio_buffer();
> +        qemu_mutex_unlock_iothread();
>
>           if (run_ret<  0) {
>               if (run_ret == -EINTR || run_ret == -EAGAIN) {
> @@ -1499,19 +1503,23 @@ int kvm_cpu_exec(CPUArchState *env)
>           switch (run->exit_reason) {
>           case KVM_EXIT_IO:
>               DPRINTF("handle_io\n");
> +            qemu_mutex_lock_iothread();
>               kvm_handle_io(run->io.port,
>                             (uint8_t *)run + run->io.data_offset,
>                             run->io.direction,
>                             run->io.size,
>                             run->io.count);
> +            qemu_mutex_unlock_iothread();
>               ret = 0;
>               break;
>           case KVM_EXIT_MMIO:
>               DPRINTF("handle_mmio\n");
> +            qemu_mutex_lock_iothread();
>               cpu_physical_memory_rw(run->mmio.phys_addr,
>                                      run->mmio.data,
>                                      run->mmio.len,
>                                      run->mmio.is_write);
> +            qemu_mutex_unlock_iothread();
>               ret = 0;
>               break;
>           case KVM_EXIT_IRQ_WINDOW_OPEN:
> @@ -1520,7 +1528,9 @@ int kvm_cpu_exec(CPUArchState *env)
>               break;
>           case KVM_EXIT_SHUTDOWN:
>               DPRINTF("shutdown\n");
> +            qemu_mutex_lock_iothread();
>               qemu_system_reset_request();
> +            qemu_mutex_unlock_iothread();
>               ret = EXCP_INTERRUPT;
>               break;
>           case KVM_EXIT_UNKNOWN:
> @@ -1533,11 +1543,15 @@ int kvm_cpu_exec(CPUArchState *env)
>               break;
>           default:
>               DPRINTF("kvm_arch_handle_exit\n");
> +            qemu_mutex_lock_iothread();
>               ret = kvm_arch_handle_exit(env, run);
> +            qemu_mutex_unlock_iothread();
>               break;
>           }
>       } while (ret == 0);
>
> +    qemu_mutex_lock_iothread();
> +
>       if (ret<  0) {
>           cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
>           vm_stop(RUN_STATE_INTERNAL_ERROR);
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 0d0d8f6..0ad64d1 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1631,7 +1631,10 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>
>       /* Inject NMI */
>       if (env->interrupt_request&  CPU_INTERRUPT_NMI) {

Strictly speaking, wouldn't we need to use testbit() and setbit()?  I would 
expect in the very least a barrier would be needed.

Looks pretty nice overall.  I'll need to apply and spend some time carefully 
walking through the code.

Thanks for sharing!

Regards,

Anthony Liguori

> +        qemu_mutex_lock_iothread();
>           env->interrupt_request&= ~CPU_INTERRUPT_NMI;
> +        qemu_mutex_unlock_iothread();
> +
>           DPRINTF("injected NMI\n");
>           ret = kvm_vcpu_ioctl(env, KVM_NMI);
>           if (ret<  0) {
> @@ -1641,6 +1644,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>       }
>
>       if (!kvm_irqchip_in_kernel()) {
> +        qemu_mutex_lock_iothread();
> +
>           /* Force the VCPU out of its inner loop to process any INIT requests
>            * or pending TPR access reports. */
>           if (env->interrupt_request&
> @@ -1682,6 +1687,8 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
>
>           DPRINTF("setting tpr\n");
>           run->cr8 = cpu_get_apic_tpr(env->apic_state);
> +
> +        qemu_mutex_unlock_iothread();
>       }
>   }
>
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index c09cc39..60d91a5 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -471,6 +471,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
>       int r;
>       unsigned irq;
>
> +    qemu_mutex_lock_iothread();
> +
>       /* PowerPC QEMU tracks the various core input pins (interrupt, critical
>        * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
>       if (!cap_interrupt_level&&
> @@ -497,6 +499,8 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
>       /* We don't know if there are more interrupts pending after this. However,
>        * the guest will return to userspace in the course of handling this one
>        * anyways, so we will get a chance to deliver the rest. */
> +
> +    qemu_mutex_unlock_iothread();
>   }
>
>   void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop
  2012-06-22 22:59 ` Anthony Liguori
@ 2012-06-23  9:11   ` Jan Kiszka
  0 siblings, 0 replies; 23+ messages in thread
From: Jan Kiszka @ 2012-06-23  9:11 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Liu Ping Fan, qemu-devel, Alexander Graf

[-- Attachment #1: Type: text/plain, Size: 4994 bytes --]

On 2012-06-23 00:59, Anthony Liguori wrote:
> On 06/22/2012 05:45 PM, Jan Kiszka wrote:
>> This sketches a possible path to get rid of the iothread lock on vmexits
>> in KVM mode. On x86, the the in-kernel irqchips has to be used because
>> we otherwise need to synchronize APIC and other per-cpu state accesses
>> that could be changed concurrently. Not yet fully analyzed is the NMI
>> injection path in the absence of an APIC.
>>
>> s390x should be fine without specific locking as their pre/post-run
>> callbacks are empty. Power requires locking for the pre-run callback.
>>
>> This patch is untested, but a similar version was successfully used in
>> a x86 setup with a network I/O path that needed no central iothread
>> locking anymore (required special MMIO exit handling).
>> ---
>>   kvm-all.c         |   18 ++++++++++++++++--
>>   target-i386/kvm.c |    7 +++++++
>>   target-ppc/kvm.c  |    4 ++++
>>   3 files changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/kvm-all.c b/kvm-all.c
>> index f8e4328..9c3e26f 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -1460,6 +1460,8 @@ int kvm_cpu_exec(CPUArchState *env)
>>           return EXCP_HLT;
>>       }
>>
>> +    qemu_mutex_unlock_iothread();
>> +
>>       do {
>>           if (env->kvm_vcpu_dirty) {
>>               kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
>> @@ -1476,14 +1478,16 @@ int kvm_cpu_exec(CPUArchState *env)
>>                */
>>               qemu_cpu_kick_self();
>>           }
>> -        qemu_mutex_unlock_iothread();
>>
>>           run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>>
>> -        qemu_mutex_lock_iothread();
>>           kvm_arch_post_run(env, run);
>>
>> +        /* TODO: push coalesced mmio flushing to the point where we
>> access
>> +         * devices that are using it (currently VGA and E1000). */
>> +        qemu_mutex_lock_iothread();
>>           kvm_flush_coalesced_mmio_buffer();
>> +        qemu_mutex_unlock_iothread();
>>
>>           if (run_ret<  0) {
>>               if (run_ret == -EINTR || run_ret == -EAGAIN) {
>> @@ -1499,19 +1503,23 @@ int kvm_cpu_exec(CPUArchState *env)
>>           switch (run->exit_reason) {
>>           case KVM_EXIT_IO:
>>               DPRINTF("handle_io\n");
>> +            qemu_mutex_lock_iothread();
>>               kvm_handle_io(run->io.port,
>>                             (uint8_t *)run + run->io.data_offset,
>>                             run->io.direction,
>>                             run->io.size,
>>                             run->io.count);
>> +            qemu_mutex_unlock_iothread();
>>               ret = 0;
>>               break;
>>           case KVM_EXIT_MMIO:
>>               DPRINTF("handle_mmio\n");
>> +            qemu_mutex_lock_iothread();
>>               cpu_physical_memory_rw(run->mmio.phys_addr,
>>                                      run->mmio.data,
>>                                      run->mmio.len,
>>                                      run->mmio.is_write);
>> +            qemu_mutex_unlock_iothread();
>>               ret = 0;
>>               break;
>>           case KVM_EXIT_IRQ_WINDOW_OPEN:
>> @@ -1520,7 +1528,9 @@ int kvm_cpu_exec(CPUArchState *env)
>>               break;
>>           case KVM_EXIT_SHUTDOWN:
>>               DPRINTF("shutdown\n");
>> +            qemu_mutex_lock_iothread();
>>               qemu_system_reset_request();
>> +            qemu_mutex_unlock_iothread();
>>               ret = EXCP_INTERRUPT;
>>               break;
>>           case KVM_EXIT_UNKNOWN:
>> @@ -1533,11 +1543,15 @@ int kvm_cpu_exec(CPUArchState *env)
>>               break;
>>           default:
>>               DPRINTF("kvm_arch_handle_exit\n");
>> +            qemu_mutex_lock_iothread();
>>               ret = kvm_arch_handle_exit(env, run);
>> +            qemu_mutex_unlock_iothread();
>>               break;
>>           }
>>       } while (ret == 0);
>>
>> +    qemu_mutex_lock_iothread();
>> +
>>       if (ret<  0) {
>>           cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
>>           vm_stop(RUN_STATE_INTERNAL_ERROR);
>> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
>> index 0d0d8f6..0ad64d1 100644
>> --- a/target-i386/kvm.c
>> +++ b/target-i386/kvm.c
>> @@ -1631,7 +1631,10 @@ void kvm_arch_pre_run(CPUX86State *env, struct
>> kvm_run *run)
>>
>>       /* Inject NMI */
>>       if (env->interrupt_request&  CPU_INTERRUPT_NMI) {
> 
> Strictly speaking, wouldn't we need to use testbit() and setbit()?  I
> would expect in the very least a barrier would be needed.

I need to think about this as well. We ignored it so far, just saw it
when hacking up this patch.

> 
> Looks pretty nice overall.  I'll need to apply and spend some time
> carefully walking through the code.

Without getting the coalesced mmio flushing out of the way, it does not
buy us that much yet. But I have some idea...

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2012-07-08  7:49 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-22 22:45 [Qemu-devel] [PATCH] kvm: First step to push iothread lock out of inner run loop Jan Kiszka
2012-06-22 22:55 ` Jan Kiszka
2012-06-23  0:22   ` Marcelo Tosatti
2012-06-23  9:06     ` Marcelo Tosatti
2012-06-23 11:45       ` Jan Kiszka
2012-06-24  8:49         ` Avi Kivity
2012-06-24 14:08           ` Jan Kiszka
2012-06-24 14:31             ` Avi Kivity
2012-07-06 17:16             ` Jan Kiszka
2012-07-06 18:06               ` Jan Kiszka
2012-07-08  7:49                 ` Avi Kivity
2012-06-24 13:34         ` liu ping fan
2012-06-24 14:08           ` Jan Kiszka
2012-06-24 14:35             ` Avi Kivity
2012-06-24 14:40               ` Jan Kiszka
2012-06-24 14:46                 ` Avi Kivity
2012-06-24 14:51                   ` Jan Kiszka
2012-06-24 14:56                     ` Avi Kivity
2012-06-24 14:58                       ` Jan Kiszka
2012-06-24 14:59                         ` Avi Kivity
2012-06-23  9:22     ` Jan Kiszka
2012-06-22 22:59 ` Anthony Liguori
2012-06-23  9:11   ` Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).