Re: [PATCH] [RFC] vpci: allow BAR write while mapped

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Stewart Hildebrand <stewart.hildebrand@amd.com>
Cc: xen-devel@lists.xenproject.org
Subject: Re: [PATCH] [RFC] vpci: allow BAR write while mapped
Date: Thu, 13 Mar 2025 18:48:21 +0100	[thread overview]
Message-ID: <Z9MaZVMtI86eAmDJ@macbook.local> (raw)
In-Reply-To: <20250312195019.382926-1-stewart.hildebrand@amd.com>

On Wed, Mar 12, 2025 at 03:50:17PM -0400, Stewart Hildebrand wrote:
> Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If firmware
> initialized the BAR to a bad address, Linux will try to write a new
> address to the BAR without disabling memory decoding. Allow the write
> by updating p2m right away in the vPCI BAR write handler.

IIRC it's only 32bit BARs that Linux will attempt to reposition
without toggling memory decoding off.  For 64bit BARs it will in
general (unless pci_dev->mmio_always_on is set) toggle memory decoding
off and then update the BAR registers.

> 
> Resolves: https://gitlab.com/xen-project/xen/-/issues/197
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> RFC: Currently the deferred mapping machinery supports only map or
>      unmap, not both. It might be better to rework the mapping machinery
>      to support unmap-then-map operations, but please let me know your
>      thoughts.
> RFC: This patch has not yet made an attempt to distinguish between
>      32-bit and 64-bit writes. It probably should.
> ---
>  xen/drivers/vpci/header.c | 65 +++++++++++++++++++++++++++++++--------
>  1 file changed, 53 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index ef6c965c081c..66adb2183cfe 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -173,7 +173,7 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
>          ASSERT_UNREACHABLE();
>  }
>  
> -bool vpci_process_pending(struct vcpu *v)
> +static bool process_pending(struct vcpu *v, bool need_lock)
>  {
>      struct pci_dev *pdev = v->vpci.pdev;
>      struct vpci_header *header = NULL;
> @@ -182,12 +182,14 @@ bool vpci_process_pending(struct vcpu *v)
>      if ( !pdev )
>          return false;
>  
> -    read_lock(&v->domain->pci_lock);
> +    if ( need_lock )
> +        read_lock(&v->domain->pci_lock);

The addition of need_lock would better be done in a pre-patch.

>  
>      if ( !pdev->vpci || (v->domain != pdev->domain) )
>      {
>          v->vpci.pdev = NULL;
> -        read_unlock(&v->domain->pci_lock);
> +        if ( need_lock )
> +            read_unlock(&v->domain->pci_lock);
>          return false;
>      }
>  
> @@ -209,17 +211,20 @@ bool vpci_process_pending(struct vcpu *v)
>  
>          if ( rc == -ERESTART )
>          {
> -            read_unlock(&v->domain->pci_lock);
> +            if ( need_lock )
> +                read_unlock(&v->domain->pci_lock);
>              return true;
>          }
>  
>          if ( rc )
>          {
> -            spin_lock(&pdev->vpci->lock);
> +            if ( need_lock )
> +                spin_lock(&pdev->vpci->lock);
>              /* Disable memory decoding unconditionally on failure. */
>              modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
>                              false);
> -            spin_unlock(&pdev->vpci->lock);
> +            if ( need_lock )
> +                spin_unlock(&pdev->vpci->lock);
>  
>              /* Clean all the rangesets */
>              for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> @@ -228,7 +233,8 @@ bool vpci_process_pending(struct vcpu *v)
>  
>              v->vpci.pdev = NULL;
>  
> -            read_unlock(&v->domain->pci_lock);
> +            if ( need_lock )
> +                read_unlock(&v->domain->pci_lock);
>  
>              if ( !is_hardware_domain(v->domain) )
>                  domain_crash(v->domain);
> @@ -238,15 +244,23 @@ bool vpci_process_pending(struct vcpu *v)
>      }
>      v->vpci.pdev = NULL;
>  
> -    spin_lock(&pdev->vpci->lock);
> +    if ( need_lock )
> +        spin_lock(&pdev->vpci->lock);
>      modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
> -    spin_unlock(&pdev->vpci->lock);
> +    if ( need_lock )
> +        spin_unlock(&pdev->vpci->lock);
>  
> -    read_unlock(&v->domain->pci_lock);
> +    if ( need_lock )
> +        read_unlock(&v->domain->pci_lock);
>  
>      return false;
>  }
>  
> +bool vpci_process_pending(struct vcpu *v)
> +{
> +    return process_pending(v, true);
> +}
> +
>  static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
>                              uint16_t cmd)
>  {
> @@ -565,6 +579,8 @@ static void cf_check bar_write(
>  {
>      struct vpci_bar *bar = data;
>      bool hi = false;
> +    bool reenable = false;
> +    uint32_t cmd = 0;
>  
>      ASSERT(is_hardware_domain(pdev->domain));
>  
> @@ -585,10 +601,31 @@ static void cf_check bar_write(
>      {
>          /* If the value written is the current one avoid printing a warning. */
>          if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
> +        {
>              gprintk(XENLOG_WARNING,
> -                    "%pp: ignored BAR %zu write while mapped\n",
> +                    "%pp: allowing BAR %zu write while mapped\n",
>                      &pdev->sbdf, bar - pdev->vpci->header.bars + hi);

If Xen now handles BARs writes with memory decoding enabled the
message can be removed.  It's only purpose was to signal this missing
handling.

> -        return;
> +            ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
> +            ASSERT(spin_is_locked(&pdev->vpci->lock));
> +            reenable = true;
> +            cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
> +            /*
> +             * Write-while-mapped: unmap the old BAR in p2m. We want this to
> +             * finish right away since the deferral machinery only supports
> +             * unmap OR map, not unmap-then-remap. Ultimately, it probably would
> +             * be better to defer the write-while-mapped case just like regular
> +             * BAR writes (but still only allow it for 32-bit BAR writes).
> +             */
> +            /* Disable memory decoding */
> +            modify_bars(pdev, cmd & ~PCI_COMMAND_MEMORY, false);

I think if the guest explicitly avoids toggling memory decoding Xen
should also to the same, and not touch the bit.

> +            /* Call process pending here to ensure P2M operations are done */
> +            while ( process_pending(current, false) )
> +            {
> +                /* Pre-empted, try again */
> +            }

I'm afraid this is not how I would expect this to be done.  We
explicitly do the p2m modifications in a deferred context to avoid
long running operations.  We should continue to do so to perform this
unmap + map operation.

I think you need to introduce a way to queue an operation that will do
a map + unmap in the deferred context processing, or signal that after
the currently queued operation finishes a new call to modify_bars()
should be issued.

It would be nice if we had a more generic way to queue guest vCPU p2m
(map and unmap) operations, but that's likely to require a much better
interface than what we currently have.

Thanks, Roger.

next prev parent reply	other threads:[~2025-03-13 17:48 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-12 19:50 [PATCH] [RFC] vpci: allow BAR write while mapped Stewart Hildebrand
2025-03-13 15:14 ` Alejandro Vallejo
2025-03-13 17:43   ` Stewart Hildebrand
2025-03-14 10:39     ` Alejandro Vallejo
2025-03-17 12:32       ` Roger Pau Monné
2025-03-17 13:49         ` Alejandro Vallejo
2025-03-13 17:58   ` Roger Pau Monné
2025-03-13 17:48 ` Roger Pau Monné [this message]
2025-03-13 18:28   ` Stewart Hildebrand
2025-03-14  8:04   ` Jan Beulich
2025-03-14  8:21     ` Roger Pau Monné
2025-03-14  8:26       ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z9MaZVMtI86eAmDJ@macbook.local \
    --to=roger.pau@citrix.com \
    --cc=stewart.hildebrand@amd.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.