From: David Gibson <david@gibson.dropbear.id.au>
To: "Cédric Le Goater" <clg@kaod.org>
Cc: kvm-ppc@vger.kernel.org, Paul Mackerras <paulus@samba.org>,
kvm@vger.kernel.org
Subject: Re: [RFC PATCH v4 17/17] KVM: PPC: Book3S HV: XIVE: introduce a 'release' device operation
Date: Mon, 15 Apr 2019 13:32:19 +1000 [thread overview]
Message-ID: <20190415033219.GC32705@umbus.fritz.box> (raw)
In-Reply-To: <20190409141347.3029-2-clg@kaod.org>
[-- Attachment #1: Type: text/plain, Size: 7109 bytes --]
On Tue, Apr 09, 2019 at 04:13:47PM +0200, Cédric Le Goater wrote:
> When the VM boots, the CAS negotiation process determines which
> interrupt mode to use and invokes a machine reset. At that time, any
> links to the previous KVM interrupt device should be 'destroyed'
> before the new chosen one is created.
>
> To perform the necessary cleanups in KVM, we extend the KVM device
> interface with a new 'release' operation which is called when the file
> descriptor of the device is closed.
>
> Such operations are defined for the XICS-on-XIVE and the XIVE native
> KVM devices. They clear the vCPU interrupt presenters that could be
> attached and then destroy the device.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> include/linux/kvm_host.h | 1 +
> arch/powerpc/kvm/book3s_xive.c | 50 +++++++++++++++++++++++++--
> arch/powerpc/kvm/book3s_xive_native.c | 23 ++++++++++++
> virt/kvm/kvm_main.c | 13 +++++++
> 4 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 831d963451d8..3b444620d8fc 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1246,6 +1246,7 @@ struct kvm_device_ops {
> long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
> unsigned long arg);
> int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
> + void (*release)(struct kvm_device *dev);
> };
>
> void kvm_device_get(struct kvm_device *dev);
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index 4d4e1730de84..ba777db849d7 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -1100,11 +1100,19 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
> void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
> {
> struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> - struct kvmppc_xive *xive = xc->xive;
> + struct kvmppc_xive *xive;
> int i;
>
> + if (!kvmppc_xics_enabled(vcpu))
> + return;
> +
> + if (!xc)
> + return;
> +
> pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
>
> + xive = xc->xive;
> +
> /* Ensure no interrupt is still routed to that VP */
> xc->valid = false;
> kvmppc_xive_disable_vcpu_interrupts(vcpu);
> @@ -1141,6 +1149,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
> }
> /* Free the VP */
> kfree(xc);
> +
> + /* Cleanup the vcpu */
> + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
> + vcpu->arch.xive_vcpu = NULL;
> }
>
> int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
> @@ -1158,7 +1170,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
> }
> if (xive->kvm != vcpu->kvm)
> return -EPERM;
> - if (vcpu->arch.irq_type)
> + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
> return -EBUSY;
> if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
> pr_devel("Duplicate !\n");
> @@ -1855,6 +1867,39 @@ static void kvmppc_xive_free(struct kvm_device *dev)
> kfree(dev);
> }
>
> +static void kvmppc_xive_release(struct kvm_device *dev)
> +{
> + struct kvmppc_xive *xive = dev->private;
> + struct kvm *kvm = xive->kvm;
> + struct kvm_vcpu *vcpu;
> + int i;
> +
> + pr_devel("Releasing xive device\n");
> +
> + /*
> + * When releasing the KVM device fd, the vCPUs can still be
> + * running and we should clean up the vCPU interrupt
> + * presenters first.
> + */
> + if (atomic_read(&kvm->online_vcpus) != 0) {
What prevents online_vcpus from becoming non-zero after this test, but
before the kvmppc_xive_free()?
Is the test actually necessary? The operations below should be safe
even if there are no online cpus, yes?
> + /*
> + * call kick_all_cpus_sync() to ensure that all CPUs
> + * have executed any pending interrupts
> + */
> + if (is_kvmppc_hv_enabled(kvm))
> + kick_all_cpus_sync();
> +
> + /*
> + * TODO: There is still a race window with the early
> + * checks in kvmppc_native_connect_vcpu()
> + */
That's... not reassuring. What are the consequences of that race, and
what do you plan to do about it?
> + kvm_for_each_vcpu(i, vcpu, kvm)
> + kvmppc_xive_cleanup_vcpu(vcpu);
> + }
> +
> + kvmppc_xive_free(dev);
> +}
> +
> struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
> {
> struct kvmppc_xive *xive;
> @@ -2043,6 +2088,7 @@ struct kvm_device_ops kvm_xive_ops = {
> .name = "kvm-xive",
> .create = kvmppc_xive_create,
> .init = kvmppc_xive_init,
> + .release = kvmppc_xive_release,
> .destroy = kvmppc_xive_free,
> .set_attr = xive_set_attr,
> .get_attr = xive_get_attr,
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
> index 092db0efe628..629da7bf2a89 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -996,6 +996,28 @@ static void kvmppc_xive_native_free(struct kvm_device *dev)
> kfree(dev);
> }
>
> +static void kvmppc_xive_native_release(struct kvm_device *dev)
> +{
> + struct kvmppc_xive *xive = dev->private;
> + struct kvm *kvm = xive->kvm;
> + struct kvm_vcpu *vcpu;
> + int i;
> +
> + pr_devel("Releasing xive native device\n");
> +
> + /*
> + * When releasing the KVM device fd, the vCPUs can still be
> + * running and we should clean up the vCPU interrupt
> + * presenters first.
> + */
> + if (atomic_read(&kvm->online_vcpus) != 0) {
Likewise here.
> + kvm_for_each_vcpu(i, vcpu, kvm)
> + kvmppc_xive_native_cleanup_vcpu(vcpu);
> + }
> +
> + kvmppc_xive_native_free(dev);
> +}
> +
> static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
> {
> struct kvmppc_xive *xive;
> @@ -1187,6 +1209,7 @@ struct kvm_device_ops kvm_xive_native_ops = {
> .name = "kvm-xive-native",
> .create = kvmppc_xive_native_create,
> .init = kvmppc_xive_native_init,
> + .release = kvmppc_xive_native_release,
> .destroy = kvmppc_xive_native_free,
> .set_attr = kvmppc_xive_native_set_attr,
> .get_attr = kvmppc_xive_native_get_attr,
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index ea2018ae1cd7..ea2619d5ca98 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2938,6 +2938,19 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
> struct kvm_device *dev = filp->private_data;
> struct kvm *kvm = dev->kvm;
>
> + if (!dev)
> + return -ENODEV;
> +
> + if (dev->kvm != kvm)
> + return -EPERM;
> +
> + if (dev->ops->release) {
> + mutex_lock(&kvm->lock);
> + list_del(&dev->vm_node);
> + dev->ops->release(dev);
> + mutex_unlock(&kvm->lock);
> + }
> +
Wasn't there a big comment that explained that release replaced
destroy somewhere?
> kvm_put_kvm(kvm);
> return 0;
> }
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2019-04-15 3:32 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-20 8:37 [PATCH v4 00/17] KVM: PPC: Book3S HV: add XIVE native exploitation mode Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 01/17] powerpc/xive: add OPAL extensions for the XIVE native exploitation support Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 02/17] KVM: PPC: Book3S HV: add a new KVM device for the XIVE native exploitation mode Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 03/17] KVM: PPC: Book3S HV: XIVE: introduce a new capability KVM_CAP_PPC_IRQ_XIVE Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 04/17] KVM: PPC: Book3S HV: XIVE: add a control to initialize a source Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 05/17] KVM: PPC: Book3S HV: XIVE: add a control to configure " Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 06/17] KVM: PPC: Book3S HV: XIVE: add controls for the EQ configuration Cédric Le Goater
2019-03-20 23:09 ` David Gibson
2019-03-21 8:48 ` Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 07/17] KVM: PPC: Book3S HV: XIVE: add a global reset control Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 08/17] KVM: PPC: Book3S HV: XIVE: add a control to sync the sources Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 09/17] KVM: PPC: Book3S HV: XIVE: add a control to dirty the XIVE EQ pages Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 10/17] KVM: PPC: Book3S HV: XIVE: add get/set accessors for the VP XIVE state Cédric Le Goater
2019-04-09 6:19 ` Paul Mackerras
2019-04-09 9:18 ` Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 11/17] KVM: introduce a 'mmap' method for KVM devices Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 12/17] KVM: PPC: Book3S HV: XIVE: add a TIMA mapping Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 13/17] KVM: PPC: Book3S HV: XIVE: add a mapping for the source ESB pages Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 14/17] KVM: PPC: Book3S HV: XIVE: add passthrough support Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 15/17] KVM: PPC: Book3S HV: XIVE: activate XIVE exploitation mode Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 16/17] KVM: introduce a KVM_DESTROY_DEVICE ioctl Cédric Le Goater
2019-04-09 14:12 ` Cédric Le Goater
2019-03-20 8:37 ` [PATCH v4 17/17] KVM: PPC: Book3S HV: XIVE: clear the vCPU interrupt presenters Cédric Le Goater
2019-04-09 14:13 ` [RFC PATCH v4.1 16/17] KVM: PPC: Book3S HV: XIVE: introduce a xive_devices array under the VM Cédric Le Goater
2019-04-09 14:13 ` [RFC PATCH v4 17/17] KVM: PPC: Book3S HV: XIVE: introduce a 'release' device operation Cédric Le Goater
2019-04-15 3:32 ` David Gibson [this message]
2019-04-15 9:25 ` Paul Mackerras
2019-04-15 13:56 ` Cédric Le Goater
2019-04-15 13:48 ` Cédric Le Goater
2019-04-17 2:05 ` David Gibson
2019-04-15 3:26 ` [RFC PATCH v4.1 16/17] KVM: PPC: Book3S HV: XIVE: introduce a xive_devices array under the VM David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190415033219.GC32705@umbus.fritz.box \
--to=david@gibson.dropbear.id.au \
--cc=clg@kaod.org \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox