[patch 0/3] save/restore in-progress PIO

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [patch 0/3] save/restore in-progress PIO
@ 2010-01-28 19:03 Marcelo Tosatti
  2010-01-28 19:03 ` [patch 1/3] KVM: x86: add ioctls to get/set PIO state Marcelo Tosatti
                   ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-01-28 19:03 UTC (permalink / raw)
  To: kvm; +Cc: quintela, jan.kiszka

qemu patches against uq/master.

MMIO suffers from the same problem.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-01-28 19:03 [patch 0/3] save/restore in-progress PIO Marcelo Tosatti
@ 2010-01-28 19:03 ` Marcelo Tosatti
  2010-02-04 19:16   ` Avi Kivity
  2010-01-28 19:03 ` [patch 2/3] uqmaster: save/restore pio state Marcelo Tosatti
  2010-01-28 19:03 ` [patch 3/3] uqmaster: save/restore PIO page Marcelo Tosatti
  2 siblings, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-01-28 19:03 UTC (permalink / raw)
  To: kvm; +Cc: quintela, Marcelo Tosatti

[-- Attachment #1: kernel-get-set-pio --]
[-- Type: text/plain, Size: 3748 bytes --]

A vcpu can be stopped after handling IO in userspace,
but before returning to kernel to finish processing.

Add ioctls to get/set the PIO state.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f46b79f..c1b2b8c 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -284,4 +284,18 @@ struct kvm_vcpu_events {
 	__u32 reserved[10];
 };
 
+struct kvm_pio_request {
+	__u64 guest_gva;
+	__u32 count;
+	__u32 cur_count;
+	__u16 port;
+	__u8 size;
+	__u8 in;
+	__u8 string;
+	__u8 down;
+	__u8 rep;
+	__u8 pad;
+};
+
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1522337..28f31e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -222,18 +222,6 @@ struct kvm_pv_mmu_op_buffer {
 	char buf[512] __aligned(sizeof(long));
 };
 
-struct kvm_pio_request {
-	unsigned long count;
-	int cur_count;
-	gva_t guest_gva;
-	int in;
-	int port;
-	int size;
-	int string;
-	int down;
-	int rep;
-};
-
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ac8672f..99d991a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1575,6 +1575,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_PIO:
+		r = KVM_PIO_PAGE_OFFSET;
+		break;
 	case KVM_CAP_VAPIC:
 		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
 		break;
@@ -2175,6 +2178,26 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static void kvm_vcpu_ioctl_x86_get_pio(struct kvm_vcpu *vcpu,
+				       struct kvm_pio_request *pio)
+{
+	vcpu_load(vcpu);
+	memcpy(pio, &vcpu->arch.pio, sizeof(struct kvm_pio_request));
+	vcpu_put(vcpu);
+}
+
+static int kvm_vcpu_ioctl_x86_set_pio(struct kvm_vcpu *vcpu,
+				      struct kvm_pio_request *pio)
+{
+	if (!pio->string && pio->size > 4)
+		return -EINVAL;
+
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.pio, pio, sizeof(struct kvm_pio_request));
+	vcpu_put(vcpu);
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2353,6 +2376,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
 		break;
 	}
+	case KVM_SET_VCPU_PIO: {
+		struct kvm_pio_request pio;
+
+		r = -EFAULT;
+		if (copy_from_user(&pio, argp, sizeof(struct kvm_pio_request)))
+			break;
+
+		r = kvm_vcpu_ioctl_x86_set_pio(vcpu, &pio);
+		break;
+	}
+	case KVM_GET_VCPU_PIO: {
+		struct kvm_pio_request pio;
+
+		kvm_vcpu_ioctl_x86_get_pio(vcpu, &pio);
+		
+		r = -EFAULT;
+		if (copy_to_user(argp, &pio, sizeof(struct kvm_pio_request)))
+			break;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4c4937e..0b56d41 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -500,6 +500,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_HYPERV 44
 #define KVM_CAP_HYPERV_VAPIC 45
 #define KVM_CAP_HYPERV_SPIN 46
+#define KVM_CAP_PIO 47
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -686,6 +687,8 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
+#define KVM_GET_VCPU_PIO          _IOR(KVMIO,  0xa1, struct kvm_pio_request)
+#define KVM_SET_VCPU_PIO          _IOW(KVMIO,  0xa2, struct kvm_pio_request)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-01-28 19:03 ` [patch 1/3] KVM: x86: add ioctls to get/set PIO state Marcelo Tosatti
@ 2010-02-04 19:16   ` Avi Kivity
  2010-02-04 21:36     ` Marcelo Tosatti
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2010-02-04 19:16 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela

On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> A vcpu can be stopped after handling IO in userspace,
> but before returning to kernel to finish processing.
>
>    

Is this strictly needed?  If we teach qemu to migrate before executing 
the pio request, I think we'll be all right?  should work at least for 
IN/INS, not sure about OUT/OUTS.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-04 19:16   ` Avi Kivity
@ 2010-02-04 21:36     ` Marcelo Tosatti
  2010-02-04 21:46       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-04 21:36 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela

On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
> On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> >A vcpu can be stopped after handling IO in userspace,
> >but before returning to kernel to finish processing.
> >
> 
> Is this strictly needed?  If we teach qemu to migrate before
> executing the pio request, I think we'll be all right?  should work
> at least for IN/INS, not sure about OUT/OUTS.

It would be nice (instead of more state to keep track of between
kernel/user) but the drawbacks i see are:

You'd have to add a limitation so that any IN which was processed
by device emulation has to re-entry kernel to complete it (so it
complicates vcpu stop in userspace).

And for OUTS larger than page size (== arch->pio_data size) you need to
know the current position to continue it on the destination (or roll
back the entire effect of the instruction in device emulation, and RIP).

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-04 21:36     ` Marcelo Tosatti
@ 2010-02-04 21:46       ` Avi Kivity
  2010-02-04 22:12         ` Marcelo Tosatti
  2010-02-08 22:41         ` Marcelo Tosatti
  0 siblings, 2 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-04 21:46 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela

On 02/04/2010 11:36 PM, Marcelo Tosatti wrote:
> On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
>    
>> On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
>>      
>>> A vcpu can be stopped after handling IO in userspace,
>>> but before returning to kernel to finish processing.
>>>
>>>        
>> Is this strictly needed?  If we teach qemu to migrate before
>> executing the pio request, I think we'll be all right?  should work
>> at least for IN/INS, not sure about OUT/OUTS.
>>      
> It would be nice (instead of more state to keep track of between
> kernel/user) but the drawbacks i see are:
>
> You'd have to add a limitation so that any IN which was processed
> by device emulation has to re-entry kernel to complete it (so it
> complicates vcpu stop in userspace).
>
>    

You could fix that by moving the IN emulation to before guest entry.  It 
complicates the vcpu loop a bit, but is backwards compatible and all that.

> And for OUTS larger than page size (== arch->pio_data size) you need to
> know the current position to continue it on the destination (or roll
> back the entire effect of the instruction in device emulation, and RIP).
>    

What to you mean by current position?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-04 21:46       ` Avi Kivity
@ 2010-02-04 22:12         ` Marcelo Tosatti
  2010-02-04 22:45           ` Marcelo Tosatti
  2010-02-08 22:41         ` Marcelo Tosatti
  1 sibling, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-04 22:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela

On Thu, Feb 04, 2010 at 11:46:25PM +0200, Avi Kivity wrote:
> On 02/04/2010 11:36 PM, Marcelo Tosatti wrote:
> >On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
> >>On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> >>>A vcpu can be stopped after handling IO in userspace,
> >>>but before returning to kernel to finish processing.
> >>>
> >>Is this strictly needed?  If we teach qemu to migrate before
> >>executing the pio request, I think we'll be all right?  should work
> >>at least for IN/INS, not sure about OUT/OUTS.
> >It would be nice (instead of more state to keep track of between
> >kernel/user) but the drawbacks i see are:
> >
> >You'd have to add a limitation so that any IN which was processed
> >by device emulation has to re-entry kernel to complete it (so it
> >complicates vcpu stop in userspace).
> >
> 
> You could fix that by moving the IN emulation to before guest entry.
> It complicates the vcpu loop a bit, but is backwards compatible and
> all that.
> 
> >And for OUTS larger than page size (== arch->pio_data size) you need to
> >know the current position to continue it on the destination (or roll
> >back the entire effect of the instruction in device emulation, and RIP).
> 
> What to you mean by current position?

outs larger than PAGE_SIZE is processed in (size / PAGE_SIZE) exits to
userspace, because thats the size of the pio_data buffer, right?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-04 22:12         ` Marcelo Tosatti
@ 2010-02-04 22:45           ` Marcelo Tosatti
  0 siblings, 0 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-04 22:45 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela

On Thu, Feb 04, 2010 at 08:12:07PM -0200, Marcelo Tosatti wrote:
> > >>On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> > >>>A vcpu can be stopped after handling IO in userspace,
> > >>>but before returning to kernel to finish processing.
> > >>>
> > >>Is this strictly needed?  If we teach qemu to migrate before
> > >>executing the pio request, I think we'll be all right?  should work
> > >>at least for IN/INS, not sure about OUT/OUTS.
> > >It would be nice (instead of more state to keep track of between
> > >kernel/user) but the drawbacks i see are:
> > >
> > >You'd have to add a limitation so that any IN which was processed
> > >by device emulation has to re-entry kernel to complete it (so it
> > >complicates vcpu stop in userspace).
> > >
> > 
> > You could fix that by moving the IN emulation to before guest entry.
> > It complicates the vcpu loop a bit, but is backwards compatible and
> > all that.
> > 
> > >And for OUTS larger than page size (== arch->pio_data size) you need to
> > >know the current position to continue it on the destination (or roll
> > >back the entire effect of the instruction in device emulation, and RIP).
> > 
> > What to you mean by current position?
> 
> outs larger than PAGE_SIZE is processed in (size / PAGE_SIZE) exits to
> userspace, because thats the size of the pio_data buffer, right?

Nevermind, the count is in ecx which is migrated. OK i'll look into 
your suggestion.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-04 21:46       ` Avi Kivity
  2010-02-04 22:12         ` Marcelo Tosatti
@ 2010-02-08 22:41         ` Marcelo Tosatti
  2010-02-09  6:38           ` Avi Kivity
  1 sibling, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-08 22:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela

On Thu, Feb 04, 2010 at 11:46:25PM +0200, Avi Kivity wrote:
> On 02/04/2010 11:36 PM, Marcelo Tosatti wrote:
> >On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
> >>On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> >>>A vcpu can be stopped after handling IO in userspace,
> >>>but before returning to kernel to finish processing.
> >>>
> >>Is this strictly needed?  If we teach qemu to migrate before
> >>executing the pio request, I think we'll be all right?  should work
> >>at least for IN/INS, not sure about OUT/OUTS.
> >It would be nice (instead of more state to keep track of between
> >kernel/user) but the drawbacks i see are:
> >
> >You'd have to add a limitation so that any IN which was processed
> >by device emulation has to re-entry kernel to complete it (so it
> >complicates vcpu stop in userspace).
> >
> 
> You could fix that by moving the IN emulation to before guest entry.
> It complicates the vcpu loop a bit, but is backwards compatible and
> all that.

Under such scheme, to avoid a stream of IN's from temporarily blocking
vcpu stop capability, you'd have to requeue a signal to stop the vcpu
(so the next IN in the stream is not executed, but complete_pio does).

Or not process the stop signal in the first place (new state for main
loop, "pending pio/mmio").

Or even just copy the result from QEMU device to RAX in userspace, which
is somewhat nasty since you'd have either userspace or kernel finishing
the op.

For REP OUTS larger than page size, the current position is held in RCX,
but complete_pio uses vcpu->arch.pio.cur_count and count to hold the
position. So you either make it possible to writeback vcpu->arch.pio
to the kernel, or wait for the operation to finish (with similar
complications regarding signal processing).

As i see it, the benefit of backward compatibility is not worthwhile
compared to the complications introduced to vcpu loop processing (and
potential for damaging vcpu stop -> vcpu stopped latency).

Are you certain its worth avoiding the restore ioctl for pio/mmio?

> >And for OUTS larger than page size (== arch->pio_data size) you need to
> >know the current position to continue it on the destination (or roll
> >back the entire effect of the instruction in device emulation, and RIP).
> 
> What to you mean by current position?
> 
> -- 
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-08 22:41         ` Marcelo Tosatti
@ 2010-02-09  6:38           ` Avi Kivity
  2010-02-09 18:23             ` Marcelo Tosatti
  2010-02-09 20:58             ` qemu-kvm: do not allow vcpu stop with in progress PIO Marcelo Tosatti
  0 siblings, 2 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-09  6:38 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela

On 02/09/2010 12:41 AM, Marcelo Tosatti wrote:
> On Thu, Feb 04, 2010 at 11:46:25PM +0200, Avi Kivity wrote:
>    
>> On 02/04/2010 11:36 PM, Marcelo Tosatti wrote:
>>      
>>> On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
>>>        
>>>> On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
>>>>          
>>>>> A vcpu can be stopped after handling IO in userspace,
>>>>> but before returning to kernel to finish processing.
>>>>>
>>>>>            
>>>> Is this strictly needed?  If we teach qemu to migrate before
>>>> executing the pio request, I think we'll be all right?  should work
>>>> at least for IN/INS, not sure about OUT/OUTS.
>>>>          
>>> It would be nice (instead of more state to keep track of between
>>> kernel/user) but the drawbacks i see are:
>>>
>>> You'd have to add a limitation so that any IN which was processed
>>> by device emulation has to re-entry kernel to complete it (so it
>>> complicates vcpu stop in userspace).
>>>
>>>        
>> You could fix that by moving the IN emulation to before guest entry.
>> It complicates the vcpu loop a bit, but is backwards compatible and
>> all that.
>>      
> Under such scheme, to avoid a stream of IN's from temporarily blocking
> vcpu stop capability, you'd have to requeue a signal to stop the vcpu
> (so the next IN in the stream is not executed, but complete_pio does).
>
> Or not process the stop signal in the first place (new state for main
> loop, "pending pio/mmio").
>    

Why?  you would handle stops exactly the same way:

vcpu_loop:
      while running:
          process_last_in()
          run_vcpu()
          handle_exit_except_in()

An IN that is stopped would simply be unprocessed, and the next entry, 
if at a new host, will simply re-execute it.

> Or even just copy the result from QEMU device to RAX in userspace, which
> is somewhat nasty since you'd have either userspace or kernel finishing
> the op.
>    

Definitely bad.

> For REP OUTS larger than page size, the current position is held in RCX,
> but complete_pio uses vcpu->arch.pio.cur_count and count to hold the
> position. So you either make it possible to writeback vcpu->arch.pio
> to the kernel, or wait for the operation to finish (with similar
> complications regarding signal processing).
>    

RCX is always consistent, no?  So if we migrate in the middle of REP 
OUTS, the operation will restart at the correct place?

> As i see it, the benefit of backward compatibility is not worthwhile
> compared to the complications introduced to vcpu loop processing (and
> potential for damaging vcpu stop ->  vcpu stopped latency).
>
> Are you certain its worth avoiding the restore ioctl for pio/mmio?
>    

First, let's see if it's feasible or not.  If it's feasible, it's 
probably just a matter of rearranging things to get userspace sane.  A 
small price to pay for backward compatibility.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 1/3] KVM: x86: add ioctls to get/set PIO state
  2010-02-09  6:38           ` Avi Kivity
@ 2010-02-09 18:23             ` Marcelo Tosatti
  2010-02-09 20:58             ` qemu-kvm: do not allow vcpu stop with in progress PIO Marcelo Tosatti
  1 sibling, 0 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-09 18:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela

On Tue, Feb 09, 2010 at 08:38:56AM +0200, Avi Kivity wrote:
> On 02/09/2010 12:41 AM, Marcelo Tosatti wrote:
> >On Thu, Feb 04, 2010 at 11:46:25PM +0200, Avi Kivity wrote:
> >>On 02/04/2010 11:36 PM, Marcelo Tosatti wrote:
> >>>On Thu, Feb 04, 2010 at 09:16:47PM +0200, Avi Kivity wrote:
> >>>>On 01/28/2010 09:03 PM, Marcelo Tosatti wrote:
> >>>>>A vcpu can be stopped after handling IO in userspace,
> >>>>>but before returning to kernel to finish processing.
> >>>>>
> >>>>Is this strictly needed?  If we teach qemu to migrate before
> >>>>executing the pio request, I think we'll be all right?  should work
> >>>>at least for IN/INS, not sure about OUT/OUTS.
> >>>It would be nice (instead of more state to keep track of between
> >>>kernel/user) but the drawbacks i see are:
> >>>
> >>>You'd have to add a limitation so that any IN which was processed
> >>>by device emulation has to re-entry kernel to complete it (so it
> >>>complicates vcpu stop in userspace).
> >>>
> >>You could fix that by moving the IN emulation to before guest entry.
> >>It complicates the vcpu loop a bit, but is backwards compatible and
> >>all that.
> >Under such scheme, to avoid a stream of IN's from temporarily blocking
> >vcpu stop capability, you'd have to requeue a signal to stop the vcpu
> >(so the next IN in the stream is not executed, but complete_pio does).
> >
> >Or not process the stop signal in the first place (new state for main
> >loop, "pending pio/mmio").
> 
> Why?  you would handle stops exactly the same way:
> 
> vcpu_loop:
>      while running:
>          process_last_in()
>          run_vcpu()
>          handle_exit_except_in()
> 
> An IN that is stopped would simply be unprocessed, and the next
> entry, if at a new host, will simply re-execute it.

Its not so simple.

The kernel advances RIP before exiting to userspace with EXIT_IO (for
IN). So simply skipping an IN exit is not possible.

In the case of an IN, you have to make sure kernel re-entry is performed
(to complete the operation). This is what complicates vcpu stop (you
need a new state which says "do not stop vcpu, re-enter kernel first").

And then you must re-raise the stop signal before entering the kernel.

Does that make sense?

> >Or even just copy the result from QEMU device to RAX in userspace, which
> >is somewhat nasty since you'd have either userspace or kernel finishing
> >the op.
> 
> Definitely bad.
> 
> >For REP OUTS larger than page size, the current position is held in RCX,
> >but complete_pio uses vcpu->arch.pio.cur_count and count to hold the
> >position. So you either make it possible to writeback vcpu->arch.pio
> >to the kernel, or wait for the operation to finish (with similar
> >complications regarding signal processing).
> 
> RCX is always consistent, no?  So if we migrate in the middle of REP
> OUTS, the operation will restart at the correct place?

On a second though, yeah, the state held in vcpu->arch.pio will be 
reinstatiated on the destination with updates values from RCX.

> >As i see it, the benefit of backward compatibility is not worthwhile
> >compared to the complications introduced to vcpu loop processing (and
> >potential for damaging vcpu stop ->  vcpu stopped latency).
> >
> >Are you certain its worth avoiding the restore ioctl for pio/mmio?
> 
> First, let's see if it's feasible or not.  If it's feasible, it's
> probably just a matter of rearranging things to get userspace sane.
> A small price to pay for backward compatibility.
> 
> 
> -- 
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-09  6:38           ` Avi Kivity
  2010-02-09 18:23             ` Marcelo Tosatti
@ 2010-02-09 20:58             ` Marcelo Tosatti
  2010-02-10  7:02               ` Avi Kivity
  2010-02-18 13:24               ` qemu-kvm: do not allow vcpu stop with in progress PIO Avi Kivity
  1 sibling, 2 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-09 20:58 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela, Gleb Natapov


You're right... this should be enough to avoid a stop with uncomplete
PIO (and this is what happens for MMIO already). The signal will not
be dequeued, so KVM will complete_pio and exit before entering with
-EAGAIN. Please review and queue for stable.

qemu upstream needs a bit more work.

-------

Re-enter the kernel to complete in progress PIO. Otherwise the 
operation can be lost during migration.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm/qemu-kvm.c
===================================================================
--- qemu-kvm.orig/qemu-kvm.c
+++ qemu-kvm/qemu-kvm.c
@@ -967,6 +967,7 @@ int kvm_run(CPUState *env)
                                 run->io.direction,
                                 run->io.size,
                                 run->io.count);
+            r = 0;
             break;
         case KVM_EXIT_DEBUG:
             r = handle_debug(env);

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-09 20:58             ` qemu-kvm: do not allow vcpu stop with in progress PIO Marcelo Tosatti
@ 2010-02-10  7:02               ` Avi Kivity
  2010-02-10 16:25                 ` Marcelo Tosatti
  2010-02-18 13:24               ` qemu-kvm: do not allow vcpu stop with in progress PIO Avi Kivity
  1 sibling, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2010-02-10  7:02 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela, Gleb Natapov

On 02/09/2010 10:58 PM, Marcelo Tosatti wrote:
> You're right... this should be enough to avoid a stop with uncomplete
> PIO (and this is what happens for MMIO already). The signal will not
> be dequeued, so KVM will complete_pio and exit before entering with
> -EAGAIN. Please review and queue for stable.
>
>    

Not right enough.  This is very fragile, we depend on the kernel 
noticing the signal after completing pio but before starting execution.  
I don't think we guarantee that.

Maybe we should turn complete_pio/complete_mmio to an ioctl, so that we 
can control what happens exactly.  Or maybe it's simplest to document it 
as a feature and guarantee it.  There's some merit in it - only guest 
execution is the nonatomic part, so we only interrupt that.

> qemu upstream needs a bit more work.
>    

Could be as simple as raising a blocked exception that is unmasked by 
kvm, then entering the guest.

> -------
>
> Re-enter the kernel to complete in progress PIO. Otherwise the
> operation can be lost during migration.
>
> Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
>
> Index: qemu-kvm/qemu-kvm.c
> ===================================================================
> --- qemu-kvm.orig/qemu-kvm.c
> +++ qemu-kvm/qemu-kvm.c
> @@ -967,6 +967,7 @@ int kvm_run(CPUState *env)
>                                   run->io.direction,
>                                   run->io.size,
>                                   run->io.count);
> +            r = 0;
>               break;
>           case KVM_EXIT_DEBUG:
>               r = handle_debug(env);
>    


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10  7:02               ` Avi Kivity
@ 2010-02-10 16:25                 ` Marcelo Tosatti
  2010-02-10 16:40                   ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-10 16:25 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela, Gleb Natapov

On Wed, Feb 10, 2010 at 09:02:00AM +0200, Avi Kivity wrote:
> On 02/09/2010 10:58 PM, Marcelo Tosatti wrote:
> >You're right... this should be enough to avoid a stop with uncomplete
> >PIO (and this is what happens for MMIO already). The signal will not
> >be dequeued, so KVM will complete_pio and exit before entering with
> >-EAGAIN. Please review and queue for stable.
> >
> 
> Not right enough.  This is very fragile, we depend on the kernel
> noticing the signal after completing pio but before starting
> execution.  I don't think we guarantee that.

As long as the signal is blocked, we do (and for older kernels too).

> Maybe we should turn complete_pio/complete_mmio to an ioctl, so that
> we can control what happens exactly.  Or maybe it's simplest to
> document it as a feature and guarantee it.  There's some merit in it
> - only guest execution is the nonatomic part, so we only interrupt
> that.

Right. So would you like a patch to x86.c to comment on this, on top of
complete_pio / mmio completion?

> >qemu upstream needs a bit more work.
> 
> Could be as simple as raising a blocked exception that is unmasked
> by kvm, then entering the guest.

The vcpu inner loop is not atomic in upstream as it is in qemu-kvm. It
breaks out to process pending events way too easily.

> 
> >-------
> >
> >Re-enter the kernel to complete in progress PIO. Otherwise the
> >operation can be lost during migration.
> >
> >Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
> >
> >Index: qemu-kvm/qemu-kvm.c
> >===================================================================
> >--- qemu-kvm.orig/qemu-kvm.c
> >+++ qemu-kvm/qemu-kvm.c
> >@@ -967,6 +967,7 @@ int kvm_run(CPUState *env)
> >                                  run->io.direction,
> >                                  run->io.size,
> >                                  run->io.count);
> >+            r = 0;
> >              break;
> >          case KVM_EXIT_DEBUG:
> >              r = handle_debug(env);
> 
> 
> -- 
> Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 16:25                 ` Marcelo Tosatti
@ 2010-02-10 16:40                   ` Avi Kivity
  2010-02-10 16:52                     ` Alexander Graf
  2010-02-13 18:10                     ` KVM: add doc note about PIO/MMIO completion API Marcelo Tosatti
  0 siblings, 2 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-10 16:40 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela, Gleb Natapov, Alexander Graf

On 02/10/2010 06:25 PM, Marcelo Tosatti wrote:
> On Wed, Feb 10, 2010 at 09:02:00AM +0200, Avi Kivity wrote:
>    
>> On 02/09/2010 10:58 PM, Marcelo Tosatti wrote:
>>      
>>> You're right... this should be enough to avoid a stop with uncomplete
>>> PIO (and this is what happens for MMIO already). The signal will not
>>> be dequeued, so KVM will complete_pio and exit before entering with
>>> -EAGAIN. Please review and queue for stable.
>>>
>>>        
>> Not right enough.  This is very fragile, we depend on the kernel
>> noticing the signal after completing pio but before starting
>> execution.  I don't think we guarantee that.
>>      
> As long as the signal is blocked, we do (and for older kernels too).
>    

I meant, that was not an intentional part of the design, but rather a 
side effect of the implementation.  We can pretend it was all part of a 
master plan and document it, though.

>> Maybe we should turn complete_pio/complete_mmio to an ioctl, so that
>> we can control what happens exactly.  Or maybe it's simplest to
>> document it as a feature and guarantee it.  There's some merit in it
>> - only guest execution is the nonatomic part, so we only interrupt
>> that.
>>      
> Right. So would you like a patch to x86.c to comment on this, on top of
> complete_pio / mmio completion?
>    

Documentation/kvm/api.txt.  Note it's not x86 specific.  Alex, can you 
check if ppc complies?

>>> qemu upstream needs a bit more work.
>>>        
>> Could be as simple as raising a blocked exception that is unmasked
>> by kvm, then entering the guest.
>>      
> The vcpu inner loop is not atomic in upstream as it is in qemu-kvm. It
> breaks out to process pending events way too easily.
>    

Hmm.  We can add an explicit call to KVM_RUN.

Note we need to loop there.  My 16-byte mmio patches (which never saw 
the light of day) split 16-byte mmios into two 8-byte mmios issued back 
to back, and we have to be prepared for that eventuality.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 16:40                   ` Avi Kivity
@ 2010-02-10 16:52                     ` Alexander Graf
  2010-02-10 17:01                       ` Avi Kivity
  2010-02-13 18:10                     ` KVM: add doc note about PIO/MMIO completion API Marcelo Tosatti
  1 sibling, 1 reply; 28+ messages in thread
From: Alexander Graf @ 2010-02-10 16:52 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, quintela, Gleb Natapov

Avi Kivity wrote:
> On 02/10/2010 06:25 PM, Marcelo Tosatti wrote:
>> On Wed, Feb 10, 2010 at 09:02:00AM +0200, Avi Kivity wrote:
>>   
>>> On 02/09/2010 10:58 PM, Marcelo Tosatti wrote:
>>>     
>>>> You're right... this should be enough to avoid a stop with uncomplete
>>>> PIO (and this is what happens for MMIO already). The signal will not
>>>> be dequeued, so KVM will complete_pio and exit before entering with
>>>> -EAGAIN. Please review and queue for stable.
>>>>
>>>>        
>>> Not right enough.  This is very fragile, we depend on the kernel
>>> noticing the signal after completing pio but before starting
>>> execution.  I don't think we guarantee that.
>>>      
>> As long as the signal is blocked, we do (and for older kernels too).
>>    
>
> I meant, that was not an intentional part of the design, but rather a
> side effect of the implementation.  We can pretend it was all part of
> a master plan and document it, though.
>
>>> Maybe we should turn complete_pio/complete_mmio to an ioctl, so that
>>> we can control what happens exactly.  Or maybe it's simplest to
>>> document it as a feature and guarantee it.  There's some merit in it
>>> - only guest execution is the nonatomic part, so we only interrupt
>>> that.
>>>      
>> Right. So would you like a patch to x86.c to comment on this, on top of
>> complete_pio / mmio completion?
>>    
>
> Documentation/kvm/api.txt.  Note it's not x86 specific.  Alex, can you
> check if ppc complies?

Hrm, trying to read the thread I'm still somewhat lost. What exactly do
you want to document?


Alex

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 16:52                     ` Alexander Graf
@ 2010-02-10 17:01                       ` Avi Kivity
  2010-02-10 17:03                         ` Alexander Graf
  2010-02-10 17:07                         ` Gleb Natapov
  0 siblings, 2 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-10 17:01 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Marcelo Tosatti, kvm, quintela, Gleb Natapov

On 02/10/2010 06:52 PM, Alexander Graf wrote:
>
> Hrm, trying to read the thread I'm still somewhat lost. What exactly do
> you want to document?
>
>    

The problem: if KVM_RUN exits with KVM_EXIT_MMIO or KVM_EXIT_IO, then 
the internal state is inconsistent.  The instruction is only half 
completed, and we need to reissue KVM_RUN to complete it.

However, if we're migrating, then we don't want to execute any more 
guest code.  Luckily, if you KVM_RUN with a pending signal, then the 
pending mmio or io will be completed, and then, if the pending signal is 
unmasked in kvm's signal mask, KVM_RUN will exit immediately.

I would like to document the fact that the signal check happens between 
the mmio completion and guest entry, and the above sequence as a way to 
get consistent state after mmio.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 17:01                       ` Avi Kivity
@ 2010-02-10 17:03                         ` Alexander Graf
  2010-02-10 17:08                           ` Avi Kivity
  2010-02-10 17:07                         ` Gleb Natapov
  1 sibling, 1 reply; 28+ messages in thread
From: Alexander Graf @ 2010-02-10 17:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, quintela, Gleb Natapov

Avi Kivity wrote:
> On 02/10/2010 06:52 PM, Alexander Graf wrote:
>>
>> Hrm, trying to read the thread I'm still somewhat lost. What exactly do
>> you want to document?
>>
>>    
>
> The problem: if KVM_RUN exits with KVM_EXIT_MMIO or KVM_EXIT_IO, then
> the internal state is inconsistent.  The instruction is only half
> completed, and we need to reissue KVM_RUN to complete it.
>
> However, if we're migrating, then we don't want to execute any more
> guest code.  Luckily, if you KVM_RUN with a pending signal, then the
> pending mmio or io will be completed, and then, if the pending signal
> is unmasked in kvm's signal mask, KVM_RUN will exit immediately.
>
> I would like to document the fact that the signal check happens
> between the mmio completion and guest entry, and the above sequence as
> a way to get consistent state after mmio.
>

I see. Yes, that works for PPC Book3S too. We check for signals on the
beginning of vcpu_run. I'm not sure about BookE though.

Either way - wouldn't it make more sense to just move the check to
generic code?


Alex

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 17:03                         ` Alexander Graf
@ 2010-02-10 17:08                           ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-10 17:08 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Marcelo Tosatti, kvm, quintela, Gleb Natapov

On 02/10/2010 07:03 PM, Alexander Graf wrote:
>
> I see. Yes, that works for PPC Book3S too. We check for signals on the
> beginning of vcpu_run. I'm not sure about BookE though.
>
> Either way - wouldn't it make more sense to just move the check to
> generic code?
>    

Well, the check happens very deep in x86 code (vcpu_enter_guest(), 
during the critical section switching into guest mode).  It should be 
possible to refactor it into a skeleton that does this (and checks 
vcpu->requests etc.) and calls arch code to do the actual work.  A small 
gain for a lot of churn, but maybe worthwhile.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 17:01                       ` Avi Kivity
  2010-02-10 17:03                         ` Alexander Graf
@ 2010-02-10 17:07                         ` Gleb Natapov
  2010-02-10 17:08                           ` Avi Kivity
  1 sibling, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2010-02-10 17:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Alexander Graf, Marcelo Tosatti, kvm, quintela

On Wed, Feb 10, 2010 at 07:01:46PM +0200, Avi Kivity wrote:
> On 02/10/2010 06:52 PM, Alexander Graf wrote:
> >
> >Hrm, trying to read the thread I'm still somewhat lost. What exactly do
> >you want to document?
> >
> 
> The problem: if KVM_RUN exits with KVM_EXIT_MMIO or KVM_EXIT_IO,
> then the internal state is inconsistent.  The instruction is only
> half completed, and we need to reissue KVM_RUN to complete it.
> 
> However, if we're migrating, then we don't want to execute any more
> guest code.  Luckily, if you KVM_RUN with a pending signal, then the
> pending mmio or io will be completed, and then, if the pending
> signal is unmasked in kvm's signal mask, KVM_RUN will exit
> immediately.
> 
Can we be sure that pending signal checking will not be introduces
somewhere in ioctl generic code before kvm specific code is called?

> I would like to document the fact that the signal check happens
> between the mmio completion and guest entry, and the above sequence
> as a way to get consistent state after mmio.
> 
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-10 17:07                         ` Gleb Natapov
@ 2010-02-10 17:08                           ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-10 17:08 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Alexander Graf, Marcelo Tosatti, kvm, quintela

On 02/10/2010 07:07 PM, Gleb Natapov wrote:
> On Wed, Feb 10, 2010 at 07:01:46PM +0200, Avi Kivity wrote:
>    
>> On 02/10/2010 06:52 PM, Alexander Graf wrote:
>>      
>>> Hrm, trying to read the thread I'm still somewhat lost. What exactly do
>>> you want to document?
>>>
>>>        
>> The problem: if KVM_RUN exits with KVM_EXIT_MMIO or KVM_EXIT_IO,
>> then the internal state is inconsistent.  The instruction is only
>> half completed, and we need to reissue KVM_RUN to complete it.
>>
>> However, if we're migrating, then we don't want to execute any more
>> guest code.  Luckily, if you KVM_RUN with a pending signal, then the
>> pending mmio or io will be completed, and then, if the pending
>> signal is unmasked in kvm's signal mask, KVM_RUN will exit
>> immediately.
>>
>>      
> Can we be sure that pending signal checking will not be introduces
> somewhere in ioctl generic code before kvm specific code is called?
>    

I think only blocking syscalls and ioctls can be expected to return -EINTR.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* KVM: add doc note about PIO/MMIO completion API
  2010-02-10 16:40                   ` Avi Kivity
  2010-02-10 16:52                     ` Alexander Graf
@ 2010-02-13 18:10                     ` Marcelo Tosatti
  2010-02-14  8:17                       ` Avi Kivity
  1 sibling, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-02-13 18:10 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, quintela, Gleb Natapov, Alexander Graf


How's that? Feel free to upgrade it.

------


Document that partially emulated instructions leave the guest state
unconsistent, and that the kernel must complete operations before
checking for pending signals.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index c6416a3..ec0e72b 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -820,6 +820,11 @@ executed a memory-mapped I/O instruction which could not be satisfied
 by kvm.  The 'data' member contains the written data if 'is_write' is
 true, and should be filled by application code otherwise.
 
+NOTE: For KVM_EXIT_IO and KVM_EXIT_MMIO, the corresponding operations
+are complete (and guest state is consistent) only after userspace has
+re-entered the kernel with KVM_RUN. The kernel side must first finish
+uncomplete operations and then check for pending signals.
+
 		/* KVM_EXIT_HYPERCALL */
 		struct {
 			__u64 nr;

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: KVM: add doc note about PIO/MMIO completion API
  2010-02-13 18:10                     ` KVM: add doc note about PIO/MMIO completion API Marcelo Tosatti
@ 2010-02-14  8:17                       ` Avi Kivity
  2010-02-17  9:22                         ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2010-02-14  8:17 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela, Gleb Natapov, Alexander Graf

On 02/13/2010 08:10 PM, Marcelo Tosatti wrote:
> How's that? Feel free to upgrade it.
>
> ------
>
>
> Document that partially emulated instructions leave the guest state
> unconsistent, and that the kernel must complete operations before
> checking for pending signals.
>
> Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
>
> diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
> index c6416a3..ec0e72b 100644
> --- a/Documentation/kvm/api.txt
> +++ b/Documentation/kvm/api.txt
> @@ -820,6 +820,11 @@ executed a memory-mapped I/O instruction which could not be satisfied
>   by kvm.  The 'data' member contains the written data if 'is_write' is
>   true, and should be filled by application code otherwise.
>
> +NOTE: For KVM_EXIT_IO and KVM_EXIT_MMIO, the corresponding operations
> +are complete (and guest state is consistent) only after userspace has
> +re-entered the kernel with KVM_RUN. The kernel side must first finish
> +uncomplete operations and then check for pending signals.
> +
>    

Well, s/must/will/, the document is written from userspace's point of view.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: KVM: add doc note about PIO/MMIO completion API
  2010-02-14  8:17                       ` Avi Kivity
@ 2010-02-17  9:22                         ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-17  9:22 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela, Gleb Natapov, Alexander Graf

On 02/14/2010 10:17 AM, Avi Kivity wrote:
>> --- a/Documentation/kvm/api.txt
>> +++ b/Documentation/kvm/api.txt
>> @@ -820,6 +820,11 @@ executed a memory-mapped I/O instruction which 
>> could not be satisfied
>>   by kvm.  The 'data' member contains the written data if 'is_write' is
>>   true, and should be filled by application code otherwise.
>>
>> +NOTE: For KVM_EXIT_IO and KVM_EXIT_MMIO, the corresponding operations
>> +are complete (and guest state is consistent) only after userspace has
>> +re-entered the kernel with KVM_RUN. The kernel side must first finish
>> +uncomplete operations and then check for pending signals.
>> +
>
>
> Well, s/must/will/, the document is written from userspace's point of 
> view.
>

Applied with this change, thanks.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: qemu-kvm: do not allow vcpu stop with in progress PIO
  2010-02-09 20:58             ` qemu-kvm: do not allow vcpu stop with in progress PIO Marcelo Tosatti
  2010-02-10  7:02               ` Avi Kivity
@ 2010-02-18 13:24               ` Avi Kivity
  1 sibling, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2010-02-18 13:24 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela, Gleb Natapov

On 02/09/2010 10:58 PM, Marcelo Tosatti wrote:
> You're right... this should be enough to avoid a stop with uncomplete
> PIO (and this is what happens for MMIO already). The signal will not
> be dequeued, so KVM will complete_pio and exit before entering with
> -EAGAIN. Please review and queue for stable.
>
> qemu upstream needs a bit more work.
>
> -------
>
> Re-enter the kernel to complete in progress PIO. Otherwise the
> operation can be lost during migration.
>
>    

Thanks - applied to master and 0.12.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 2/3] uqmaster: save/restore pio state
  2010-01-28 19:03 [patch 0/3] save/restore in-progress PIO Marcelo Tosatti
  2010-01-28 19:03 ` [patch 1/3] KVM: x86: add ioctls to get/set PIO state Marcelo Tosatti
@ 2010-01-28 19:03 ` Marcelo Tosatti
  2010-01-28 19:03 ` [patch 3/3] uqmaster: save/restore PIO page Marcelo Tosatti
  2 siblings, 0 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-01-28 19:03 UTC (permalink / raw)
  To: kvm; +Cc: quintela, Marcelo Tosatti

[-- Attachment #1: kvm-get-put-pio --]
[-- Type: text/plain, Size: 6012 bytes --]

Save/restore in-kernel KVM PIO state. This is necessary to allow
migration with in-progress PIO operation.

FIXME: adjust CPUState and VMState version.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm/target-i386/cpu.h
===================================================================
--- qemu-kvm.orig/target-i386/cpu.h
+++ qemu-kvm/target-i386/cpu.h
@@ -570,6 +570,18 @@ typedef struct {
     uint64_t mask;
 } MTRRVar;
 
+typedef struct {
+    uint64_t guest_gva;
+    uint32_t count;
+    uint32_t cur_count;
+    uint16_t port;
+    uint8_t size;
+    uint8_t in;
+    uint8_t string;
+    uint8_t down;
+    uint8_t rep;
+} KVMPIOState;
+
 #define CPU_NB_REGS64 16
 #define CPU_NB_REGS32 8
 
@@ -702,7 +714,9 @@ typedef struct CPUX86State {
     uint8_t has_error_code;
     uint32_t sipi_vector;
     uint32_t cpuid_kvm_features;
-    
+
+    KVMPIOState kvm_pio;
+
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
Index: qemu-kvm/target-i386/kvm.c
===================================================================
--- qemu-kvm.orig/target-i386/kvm.c
+++ qemu-kvm/target-i386/kvm.c
@@ -289,6 +289,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
     env->interrupt_injected = -1;
     env->nmi_injected = 0;
     env->nmi_pending = 0;
+    memset(&env->kvm_pio, 0, sizeof(KVMPIOState));
 }
 
 static int kvm_has_msr_star(CPUState *env)
@@ -837,6 +838,57 @@ static int kvm_get_vcpu_events(CPUState 
     return 0;
 }
 
+static int kvm_put_pio(CPUState *env)
+{
+#ifdef KVM_CAP_PIO
+    struct kvm_pio_request pio;
+
+    if (!kvm_has_vcpu_pio())
+        return 0;
+
+    pio.count = env->kvm_pio.count;
+    pio.cur_count = env->kvm_pio.cur_count;
+    pio.guest_gva = env->kvm_pio.guest_gva;
+    pio.in = env->kvm_pio.in;
+    pio.port = env->kvm_pio.port;
+    pio.size = env->kvm_pio.size;
+    pio.string = env->kvm_pio.string;
+    pio.down = env->kvm_pio.down;
+    pio.rep = env->kvm_pio.rep;
+
+    return kvm_vcpu_ioctl(env, KVM_SET_VCPU_PIO, &pio);
+#else
+    return 0;
+#endif
+}
+
+static int kvm_get_pio(CPUState *env)
+{
+#ifdef KVM_CAP_PIO
+    struct kvm_pio_request pio;
+    int ret;
+
+    if (!kvm_has_vcpu_pio())
+        return 0;
+
+    ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_PIO, &pio);
+    if (ret < 0)
+        return ret;
+
+    env->kvm_pio.count = pio.count;
+    env->kvm_pio.cur_count = pio.cur_count;
+    env->kvm_pio.guest_gva = pio.guest_gva;
+    env->kvm_pio.in = pio.in;
+    env->kvm_pio.port = pio.port;
+    env->kvm_pio.size = pio.size;
+    env->kvm_pio.string = pio.string;
+    env->kvm_pio.down = pio.down;
+    env->kvm_pio.rep = pio.rep;
+#endif
+
+    return 0;
+}
+
 int kvm_arch_put_registers(CPUState *env)
 {
     int ret;
@@ -865,6 +917,10 @@ int kvm_arch_put_registers(CPUState *env
     if (ret < 0)
         return ret;
 
+    ret = kvm_put_pio(env);
+    if (ret < 0)
+        return ret;
+
     return 0;
 }
 
@@ -896,6 +952,10 @@ int kvm_arch_get_registers(CPUState *env
     if (ret < 0)
         return ret;
 
+    ret = kvm_get_pio(env);
+    if (ret < 0)
+        return ret;
+
     return 0;
 }
 
Index: qemu-kvm/target-i386/machine.c
===================================================================
--- qemu-kvm.orig/target-i386/machine.c
+++ qemu-kvm/target-i386/machine.c
@@ -62,6 +62,34 @@ static const VMStateDescription vmstate_
 #define VMSTATE_MTRR_VARS(_field, _state, _n, _v)                    \
     VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar)
 
+static const VMStateDescription vmstate_kvm_pio = {
+    .name = "kvm_pio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(guest_gva, KVMPIOState),
+        VMSTATE_UINT32(count, KVMPIOState),
+        VMSTATE_UINT32(cur_count, KVMPIOState),
+        VMSTATE_UINT8(in, KVMPIOState),
+        VMSTATE_UINT16(port, KVMPIOState),
+        VMSTATE_UINT8(size, KVMPIOState),
+        VMSTATE_UINT8(string, KVMPIOState),
+        VMSTATE_UINT8(down, KVMPIOState),
+        VMSTATE_UINT8(rep, KVMPIOState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define VMSTATE_KVM_PIO(_field, _state, _version) {                 \
+    .name       = (stringify(_field)),                              \
+    .version_id = _version,                                         \
+    .size       = sizeof(KVMPIOState),                              \
+    .vmsd       = &vmstate_kvm_pio,                                 \
+    .flags      = VMS_STRUCT,                                       \
+    .offset     = vmstate_offset_value(_state, _field, KVMPIOState),\
+}
+
 static void put_fpreg_error(QEMUFile *f, void *opaque, size_t size)
 {
     fprintf(stderr, "call put_fpreg() with invalid arguments\n");
@@ -464,6 +492,9 @@ static const VMStateDescription vmstate_
         /* KVM pvclock msr */
         VMSTATE_UINT64_V(system_time_msr, CPUState, 11),
         VMSTATE_UINT64_V(wall_clock_msr, CPUState, 11),
+
+        VMSTATE_KVM_PIO(kvm_pio, CPUState, 11),
+
         VMSTATE_END_OF_LIST()
         /* The above list is not sorted /wrt version numbers, watch out! */
     }
Index: qemu-kvm/kvm.h
===================================================================
--- qemu-kvm.orig/kvm.h
+++ qemu-kvm/kvm.h
@@ -48,6 +48,7 @@ int kvm_set_migration_log(int enable);
 
 int kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
+int kvm_has_vcpu_pio(void);
 
 void kvm_setup_guest_memory(void *start, size_t size);
 
Index: qemu-kvm/kvm-all.c
===================================================================
--- qemu-kvm.orig/kvm-all.c
+++ qemu-kvm/kvm-all.c
@@ -881,6 +881,17 @@ int kvm_has_sync_mmu(void)
 #endif
 }
 
+int kvm_has_vcpu_pio(void)
+{
+#ifdef KVM_CAP_PIO
+    KVMState *s = kvm_state;
+
+    return kvm_check_extension(s, KVM_CAP_PIO);
+#else
+    return 0;
+#endif
+}
+
 int kvm_has_vcpu_events(void)
 {
     return kvm_state->vcpu_events;



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 3/3] uqmaster: save/restore PIO page
  2010-01-28 19:03 [patch 0/3] save/restore in-progress PIO Marcelo Tosatti
  2010-01-28 19:03 ` [patch 1/3] KVM: x86: add ioctls to get/set PIO state Marcelo Tosatti
  2010-01-28 19:03 ` [patch 2/3] uqmaster: save/restore pio state Marcelo Tosatti
@ 2010-01-28 19:03 ` Marcelo Tosatti
  2010-01-28 20:24   ` Anthony Liguori
  2 siblings, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2010-01-28 19:03 UTC (permalink / raw)
  To: kvm; +Cc: quintela, Marcelo Tosatti

[-- Attachment #1: kvm-save-pio-page --]
[-- Type: text/plain, Size: 3217 bytes --]

KVM uses a page mapped by userspace as a buffer for PIO.

Save/restore it.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm/target-i386/cpu.h
===================================================================
--- qemu-kvm.orig/target-i386/cpu.h
+++ qemu-kvm/target-i386/cpu.h
@@ -716,6 +716,7 @@ typedef struct CPUX86State {
     uint32_t cpuid_kvm_features;
 
     KVMPIOState kvm_pio;
+    uint8_t kvm_pio_page[4096];
 
     /* in order to simplify APIC support, we leave this pointer to the
        user */
Index: qemu-kvm/target-i386/machine.c
===================================================================
--- qemu-kvm.orig/target-i386/machine.c
+++ qemu-kvm/target-i386/machine.c
@@ -350,6 +350,8 @@ static void cpu_pre_save(void *opaque)
     int i;
 
     cpu_synchronize_state(env);
+    if (kvm_enabled())
+        kvm_get_pio_page(env);
 
     /* FPU */
     env->fpus_vmstate = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
@@ -378,6 +380,9 @@ static int cpu_post_load(void *opaque, i
     CPUState *env = opaque;
     int i;
 
+    if (kvm_enabled())
+        kvm_put_pio_page(env);
+
     /* XXX: restore FPU round state */
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
     env->fpus = env->fpus_vmstate & ~0x3800;
@@ -494,6 +499,7 @@ static const VMStateDescription vmstate_
         VMSTATE_UINT64_V(wall_clock_msr, CPUState, 11),
 
         VMSTATE_KVM_PIO(kvm_pio, CPUState, 11),
+        VMSTATE_UINT8_ARRAY_V(kvm_pio_page, CPUState, TARGET_PAGE_SIZE, 11),
 
         VMSTATE_END_OF_LIST()
         /* The above list is not sorted /wrt version numbers, watch out! */
Index: qemu-kvm/kvm.h
===================================================================
--- qemu-kvm.orig/kvm.h
+++ qemu-kvm/kvm.h
@@ -132,6 +132,9 @@ uint32_t kvm_arch_get_supported_cpuid(CP
                                       int reg);
 void kvm_cpu_synchronize_state(CPUState *env);
 
+void kvm_put_pio_page(CPUState *env);
+void kvm_get_pio_page(CPUState *env);
+
 /* generic hooks - to be moved/refactored once there are more users */
 
 static inline void cpu_synchronize_state(CPUState *env)
Index: qemu-kvm/target-i386/kvm.c
===================================================================
--- qemu-kvm.orig/target-i386/kvm.c
+++ qemu-kvm/target-i386/kvm.c
@@ -889,6 +889,39 @@ static int kvm_get_pio(CPUState *env)
     return 0;
 }
 
+static int kvm_pio_page_offset(CPUState *env)
+{
+    int ret = 0;
+
+#ifdef KVM_CAP_PIO
+    ret = kvm_check_extension(env->kvm_state, KVM_CAP_PIO);
+#endif
+
+    return ret;
+}
+
+void kvm_put_pio_page(CPUState *env)
+{
+    uint8_t *pio_page;
+    int pio_page_off = kvm_pio_page_offset(env);
+
+    if (pio_page_off) {
+        pio_page = (uint8_t *)env->kvm_run + (pio_page_off * TARGET_PAGE_SIZE);
+        memcpy(pio_page, env->kvm_pio_page, TARGET_PAGE_SIZE);
+    }
+}
+
+void kvm_get_pio_page(CPUState *env)
+{
+    uint8_t *pio_page;
+    int pio_page_off = kvm_pio_page_offset(env);
+
+    if (pio_page_off) {
+        pio_page = (uint8_t *)env->kvm_run + (pio_page_off * TARGET_PAGE_SIZE);
+        memcpy(env->kvm_pio_page, pio_page, TARGET_PAGE_SIZE);
+    }
+}
+
 int kvm_arch_put_registers(CPUState *env)
 {
     int ret;



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] uqmaster: save/restore PIO page
  2010-01-28 19:03 ` [patch 3/3] uqmaster: save/restore PIO page Marcelo Tosatti
@ 2010-01-28 20:24   ` Anthony Liguori
  2010-01-28 21:10     ` Marcelo Tosatti
  0 siblings, 1 reply; 28+ messages in thread
From: Anthony Liguori @ 2010-01-28 20:24 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, quintela

On 01/28/2010 01:03 PM, Marcelo Tosatti wrote:
> KVM uses a page mapped by userspace as a buffer for PIO.
>
> Save/restore it.
>
> Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
>
> Index: qemu-kvm/target-i386/cpu.h
> ===================================================================
> --- qemu-kvm.orig/target-i386/cpu.h
> +++ qemu-kvm/target-i386/cpu.h
> @@ -716,6 +716,7 @@ typedef struct CPUX86State {
>       uint32_t cpuid_kvm_features;
>
>       KVMPIOState kvm_pio;
> +    uint8_t kvm_pio_page[4096];
>    

TARGET_PAGE_SIZE?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] uqmaster: save/restore PIO page
  2010-01-28 20:24   ` Anthony Liguori
@ 2010-01-28 21:10     ` Marcelo Tosatti
  0 siblings, 0 replies; 28+ messages in thread
From: Marcelo Tosatti @ 2010-01-28 21:10 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm, quintela

On Thu, Jan 28, 2010 at 02:24:28PM -0600, Anthony Liguori wrote:
> On 01/28/2010 01:03 PM, Marcelo Tosatti wrote:
> >KVM uses a page mapped by userspace as a buffer for PIO.
> >
> >Save/restore it.
> >
> >Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
> >
> >Index: qemu-kvm/target-i386/cpu.h
> >===================================================================
> >--- qemu-kvm.orig/target-i386/cpu.h
> >+++ qemu-kvm/target-i386/cpu.h
> >@@ -716,6 +716,7 @@ typedef struct CPUX86State {
> >      uint32_t cpuid_kvm_features;
> >
> >      KVMPIOState kvm_pio;
> >+    uint8_t kvm_pio_page[4096];
> 
> TARGET_PAGE_SIZE?

Yep, will do on next respin.


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2010-02-18 13:24 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-28 19:03 [patch 0/3] save/restore in-progress PIO Marcelo Tosatti
2010-01-28 19:03 ` [patch 1/3] KVM: x86: add ioctls to get/set PIO state Marcelo Tosatti
2010-02-04 19:16   ` Avi Kivity
2010-02-04 21:36     ` Marcelo Tosatti
2010-02-04 21:46       ` Avi Kivity
2010-02-04 22:12         ` Marcelo Tosatti
2010-02-04 22:45           ` Marcelo Tosatti
2010-02-08 22:41         ` Marcelo Tosatti
2010-02-09  6:38           ` Avi Kivity
2010-02-09 18:23             ` Marcelo Tosatti
2010-02-09 20:58             ` qemu-kvm: do not allow vcpu stop with in progress PIO Marcelo Tosatti
2010-02-10  7:02               ` Avi Kivity
2010-02-10 16:25                 ` Marcelo Tosatti
2010-02-10 16:40                   ` Avi Kivity
2010-02-10 16:52                     ` Alexander Graf
2010-02-10 17:01                       ` Avi Kivity
2010-02-10 17:03                         ` Alexander Graf
2010-02-10 17:08                           ` Avi Kivity
2010-02-10 17:07                         ` Gleb Natapov
2010-02-10 17:08                           ` Avi Kivity
2010-02-13 18:10                     ` KVM: add doc note about PIO/MMIO completion API Marcelo Tosatti
2010-02-14  8:17                       ` Avi Kivity
2010-02-17  9:22                         ` Avi Kivity
2010-02-18 13:24               ` qemu-kvm: do not allow vcpu stop with in progress PIO Avi Kivity
2010-01-28 19:03 ` [patch 2/3] uqmaster: save/restore pio state Marcelo Tosatti
2010-01-28 19:03 ` [patch 3/3] uqmaster: save/restore PIO page Marcelo Tosatti
2010-01-28 20:24   ` Anthony Liguori
2010-01-28 21:10     ` Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox