All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pierre Morel <pmorel@linux.ibm.com>
To: Matthew Rosato <mjrosato@linux.ibm.com>, qemu-s390x@nongnu.org
Cc: farman@linux.ibm.com, kvm@vger.kernel.org,
	schnelle@linux.ibm.com, cohuck@redhat.com,
	richard.henderson@linaro.org, thuth@redhat.com,
	qemu-devel@nongnu.org, pasic@linux.ibm.com,
	alex.williamson@redhat.com, mst@redhat.com, pbonzini@redhat.com,
	david@redhat.com, borntraeger@linux.ibm.com
Subject: Re: [PATCH 07/12] s390x/pci: enable for load/store intepretation
Date: Wed, 15 Dec 2021 08:44:35 +0100	[thread overview]
Message-ID: <69925992-e6a0-5536-8190-00a435de72f0@linux.ibm.com> (raw)
In-Reply-To: <20211207210425.150923-8-mjrosato@linux.ibm.com>



On 12/7/21 22:04, Matthew Rosato wrote:
> Use the associated vfio feature ioctl to enable interpretation for devices
> when requested.  As part of this process, we must use the host function
> handle rather than a QEMU-generated one -- this is provided as part of the
> ioctl payload.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   hw/s390x/s390-pci-bus.c          | 69 +++++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-inst.c         | 63 ++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-vfio.c         | 55 +++++++++++++++++++++++++
>   include/hw/s390x/s390-pci-bus.h  |  1 +
>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>   5 files changed, 201 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 01b58ebc70..451bd32d92 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
> @@ -971,12 +971,57 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
>       }
>   }
>   
> +static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
> +{
> +    uint32_t idx;
> +    int rc;
> +
> +    rc = s390_pci_probe_interp(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    /*
> +     * The host device is in an enabled state, but the device must
> +     * begin as disabled for the guest so mask off the enable bit
> +     * from the passthrough handle.
> +     */

I think you should explain why the device must be seen disabled for the 
guest.
AFAIU it is because we need the guest to issue a CLP_ENABLE for us to 
activate interpretation.
This is due to the choice of activate/deactivate interpretation during 
device enable/disable.

Just curious: why not activate/deactivate interpretation on plug/unplug?

> +    pbdev->fh &= ~FH_MASK_ENABLE;
> +
> +    /* Next, see if the idx is already in-use */
> +    idx = pbdev->fh & FH_MASK_INDEX;
> +    if (pbdev->idx != idx) {
> +        if (s390_pci_find_dev_by_idx(s, idx)) {
> +            return -EINVAL;
> +        }
> +        /*
> +         * Update the idx entry with the passed through idx
> +         * If the relinquised idx is lower than next_idx, use it
> +         * to replace next_idx
> +         */
> +        g_hash_table_remove(s->zpci_table, &pbdev->idx);
> +        if (idx < s->next_idx) {
> +            s->next_idx = idx;
> +        }
> +        pbdev->idx = idx;
> +        g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
> +    }
> +
> +    return 0;
> +}
> +
>   static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>                                 Error **errp)
>   {
>       S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
>       PCIDevice *pdev = NULL;
>       S390PCIBusDevice *pbdev = NULL;
> +    int rc;
>   
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
>           PCIBridge *pb = PCI_BRIDGE(dev);
> @@ -1022,12 +1067,33 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>           set_pbdev_info(pbdev);
>   
>           if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> -            pbdev->fh |= FH_SHM_VFIO;
> +            /*
> +             * By default, interpretation is always requested; if the available
> +             * facilities indicate it is not available, fallback to the
> +             * intercept model.
> +             */
> +            if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
> +                    DPRINTF("zPCI interpretation facilities missing.\n");
> +                    pbdev->interp = false;
> +                }
> +            if (pbdev->interp) {
> +                rc = s390_pci_interp_plug(s, pbdev);
> +                if (rc) {
> +                    error_setg(errp, "zpci interp plug failed: %d", rc);
> +                    return;
> +                }
> +            }
>               pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
>               /* Fill in CLP information passed via the vfio region */
>               s390_pci_get_clp_info(pbdev);
> +            if (!pbdev->interp) {
> +                /* Do vfio passthrough but intercept for I/O */
> +                pbdev->fh |= FH_SHM_VFIO;
> +            }
>           } else {
>               pbdev->fh |= FH_SHM_EMUL;
> +            /* Always intercept emulated devices */
> +            pbdev->interp = false;
>           }
>   
>           if (s390_pci_msix_init(pbdev)) {
> @@ -1360,6 +1426,7 @@ static Property s390_pci_device_properties[] = {
>       DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
>       DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
>       DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
> +    DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> index 0cef7fbace..ba4017474e 100644
> --- a/hw/s390x/s390-pci-inst.c
> +++ b/hw/s390x/s390-pci-inst.c
> @@ -18,6 +18,7 @@
>   #include "sysemu/hw_accel.h"
>   #include "hw/s390x/s390-pci-inst.h"
>   #include "hw/s390x/s390-pci-bus.h"
> +#include "hw/s390x/s390-pci-vfio.h"
>   #include "hw/s390x/tod.h"
>   
>   #ifndef DEBUG_S390PCI_INST
> @@ -156,6 +157,47 @@ out:
>       return rc;
>   }
>   
> +static int clp_enable_interp(S390PCIBusDevice *pbdev)
> +{
> +    int rc;
> +
> +    rc = s390_pci_set_interp(pbdev, true);
> +    if (rc) {
> +        DPRINTF("Failed to enable interpretation\n");
> +        return rc;
> +    }
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        DPRINTF("Failed to update passthrough fh\n");
> +        return rc;
> +    }
> +    if (!(pbdev->fh & FH_MASK_ENABLE)) {
> +        DPRINTF("Passthrough handle is not enabled\n");
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
> +static int clp_disable_interp(S390PCIBusDevice *pbdev)
> +{
> +    int rc;
> +
> +    rc = s390_pci_set_interp(pbdev, false);
> +    if (rc) {
> +        DPRINTF("Failed to disable interpretation\n");
> +        return rc;
> +    }
> +
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        DPRINTF("Failed to update passthrough fh\n");
> +        return rc;
> +    }
> +
> +    return 0;
> +}
> +
>   int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>   {
>       ClpReqHdr *reqh;
> @@ -246,7 +288,19 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>                   goto out;
>               }
>   
> -            pbdev->fh |= FH_MASK_ENABLE;
> +            /*
> +             * If interpretation is specified, attempt to enable this now and
> +             * update with the host fh
> +             */
> +            if (pbdev->interp) {
> +                if (clp_enable_interp(pbdev)) {
> +                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
> +                    goto out;
> +                }
> +            } else {
> +                pbdev->fh |= FH_MASK_ENABLE;
> +            }
> +
>               pbdev->state = ZPCI_FS_ENABLED;
>               stl_p(&ressetpci->fh, pbdev->fh);
>               stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
> @@ -257,6 +311,13 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>                   goto out;
>               }
>               device_legacy_reset(DEVICE(pbdev));
> +            if (pbdev->interp) {
> +                if (clp_disable_interp(pbdev)) {
> +                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
> +                    goto out;
> +                }
> +            }
> +            /* Mask off the enabled bit for interpreted devices too */
>               pbdev->fh &= ~FH_MASK_ENABLE;
>               pbdev->state = ZPCI_FS_DISABLED;
>               stl_p(&ressetpci->fh, pbdev->fh);
> diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
> index 6f80a47e29..78093aaac7 100644
> --- a/hw/s390x/s390-pci-vfio.c
> +++ b/hw/s390x/s390-pci-vfio.c
> @@ -97,6 +97,61 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
>       }
>   }
>   
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
> +{
> +    VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
> +    struct vfio_device_feature feat = {
> +        .argsz = sizeof(struct vfio_device_feature),
> +        .flags = VFIO_DEVICE_FEATURE_PROBE + VFIO_DEVICE_FEATURE_ZPCI_INTERP
> +    };
> +
> +    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, &feat);
> +}
> +
> +int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
> +{
> +    VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
> +    g_autofree struct vfio_device_feature *feat;
> +    struct vfio_device_zpci_interp *data;
> +    int size;
> +
> +    size = sizeof(*feat) + sizeof(*data);
> +    feat = g_malloc0(size);
> +    feat->argsz = size;
> +    feat->flags = VFIO_DEVICE_FEATURE_SET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;

I personaly do not like the use of "+" instead of "|" to act on bitmap 
or flags. But... yes, my opinion.

> +
> +    data = (struct vfio_device_zpci_interp *)&feat->data;
> +    if (enable) {
> +        data->flags = VFIO_DEVICE_ZPCI_FLAG_INTERP;
> +    } else {
> +        data->flags = 0;
> +    }
> +
> +    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
> +}
> +
> +int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
> +{
> +    VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
> +    g_autofree struct vfio_device_feature *feat;
> +    struct vfio_device_zpci_interp *data;
> +    int size, rc;
> +
> +    size = sizeof(*feat) + sizeof(*data);
> +    feat = g_malloc0(size);
> +    feat->argsz = size;
> +    feat->flags = VFIO_DEVICE_FEATURE_GET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
> +
> +    rc = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    data = (struct vfio_device_zpci_interp *)&feat->data;
> +    pbdev->fh = data->fh;
> +    return 0;
> +}
> +
>   static void s390_pci_read_base(S390PCIBusDevice *pbdev,
>                                  struct vfio_device_info *info)
>   {
> diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
> index da3cde2bb4..a9843dfe97 100644
> --- a/include/hw/s390x/s390-pci-bus.h
> +++ b/include/hw/s390x/s390-pci-bus.h
> @@ -350,6 +350,7 @@ struct S390PCIBusDevice {
>       IndAddr *indicator;
>       bool pci_unplug_request_processed;
>       bool unplug_requested;
> +    bool interp;
>       QTAILQ_ENTRY(S390PCIBusDevice) link;
>   };
>   
> diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
> index ff708aef50..42533e38f7 100644
> --- a/include/hw/s390x/s390-pci-vfio.h
> +++ b/include/hw/s390x/s390-pci-vfio.h
> @@ -20,6 +20,9 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
>   S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>                                             S390PCIBusDevice *pbdev);
>   void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev);
> +int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable);
> +int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev);
>   void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
>   #else
>   static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
> @@ -33,6 +36,18 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>   }
>   static inline void s390_pci_end_dma_count(S390pciState *s,
>                                             S390PCIDMACount *cnt) { }
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
> +{
> +    return -EINVAL;
> +}
>   static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
>   #endif
>   
> 

-- 
Pierre Morel
IBM Lab Boeblingen

  parent reply	other threads:[~2021-12-15  7:43 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-07 21:04 [PATCH 00/12] s390x/pci: zPCI interpretation support Matthew Rosato
2021-12-07 21:04 ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 01/12] s390x/pci: use a reserved ID for the default PCI group Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 10:30   ` Thomas Huth
2021-12-08 10:30     ` Thomas Huth
2021-12-07 21:04 ` [PATCH 02/12] s390x/pci: don't use hard-coded dma range in reg_ioat Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 10:32   ` Thomas Huth
2021-12-08 10:32     ` Thomas Huth
2021-12-07 21:04 ` [PATCH 03/12] s390x/pci: add supported DT information to clp response Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 04/12] Update linux headers Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 05/12] virtio-gpu: do not byteswap padding Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 06/12] target/s390x: add zpci-interp to cpu models Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 10:16   ` Christian Borntraeger
2021-12-08 10:16     ` Christian Borntraeger
2021-12-08 18:00     ` Matthew Rosato
2021-12-08 18:00       ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 07/12] s390x/pci: enable for load/store intepretation Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 10:56   ` Thomas Huth
2021-12-08 10:56     ` Thomas Huth
2021-12-15  7:44   ` Pierre Morel [this message]
2021-12-15 16:32     ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 08/12] s390x/pci: don't fence interpreted devices without MSI-X Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 11:04   ` Thomas Huth
2021-12-08 11:04     ` Thomas Huth
2021-12-15  6:26   ` Pierre Morel
2021-12-15  6:26     ` Pierre Morel
2021-12-07 21:04 ` [PATCH 09/12] s390x/pci: enable adapter event notification for interpreted devices Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-08 11:29   ` Thomas Huth
2021-12-08 11:29     ` Thomas Huth
2021-12-08 19:09     ` Matthew Rosato
2021-12-08 19:09       ` Matthew Rosato
2021-12-07 21:04 ` [PATCH 10/12] s390x/pci: use I/O Address Translation assist when interpreting Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-16  8:03   ` Pierre Morel
2021-12-16  8:03     ` Pierre Morel
2021-12-07 21:04 ` [PATCH 11/12] s390x/pci: use dtsm provided from vfio capabilities for interpreted devices Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-15  7:47   ` Pierre Morel
2021-12-15  7:47     ` Pierre Morel
2021-12-07 21:04 ` [PATCH 12/12] s390x/pci: let intercept devices have separate PCI groups Matthew Rosato
2021-12-07 21:04   ` Matthew Rosato
2021-12-16  8:15   ` Pierre Morel
2021-12-16 15:16     ` Matthew Rosato
2021-12-17  9:56       ` Pierre Morel
2021-12-15  7:35 ` [PATCH 00/12] s390x/pci: zPCI interpretation support Pierre Morel
2021-12-15  7:35   ` Pierre Morel
2021-12-15 15:53   ` Matthew Rosato
2021-12-15 15:53     ` Matthew Rosato
2021-12-17  9:17     ` Christian Borntraeger
2021-12-17  9:17       ` Christian Borntraeger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=69925992-e6a0-5536-8190-00a435de72f0@linux.ibm.com \
    --to=pmorel@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=farman@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=mjrosato@linux.ibm.com \
    --cc=mst@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=schnelle@linux.ibm.com \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.