Re: [PATCH v2 4/8] vpci: add a wait operation to the vpci vcpu pending actions

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Mykyta Poturai <Mykyta_Poturai@epam.com>
To: Jan Beulich <jbeulich@suse.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>,
	"Stewart Hildebrand" <stewart.hildebrand@amd.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Subject: Re: [PATCH v2 4/8] vpci: add a wait operation to the vpci vcpu pending actions
Date: Wed, 1 Apr 2026 07:59:48 +0000	[thread overview]
Message-ID: <0275be7d-d1d9-47ee-bec2-bfbbb90b4bae@epam.com> (raw)
In-Reply-To: <f546ae19-2107-469e-847b-7a4cde2c95fd@suse.com>

On 3/31/26 17:55, Jan Beulich wrote:
> On 09.03.2026 12:08, Mykyta Poturai wrote:
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -175,76 +175,92 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
>>   
>>   bool vpci_process_pending(struct vcpu *v)
>>   {
>> -    const struct pci_dev *pdev = v->vpci.pdev;
>> -    struct vpci_header *header = NULL;
>> -    unsigned int i;
>> -
>> -    if ( !pdev )
>> -        return false;
>> -
>> -    read_lock(&v->domain->pci_lock);
>> -
>> -    if ( !pdev->vpci || (v->domain != pdev->domain) )
>> +    switch ( v->vpci.task )
>>       {
>> -        v->vpci.pdev = NULL;
>> -        read_unlock(&v->domain->pci_lock);
>> -        return false;
>> -    }
>> -
>> -    header = &pdev->vpci->header;
>> -    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> +    case MODIFY_MEMORY:
>>       {
>> -        struct vpci_bar *bar = &header->bars[i];
>> -        struct rangeset *mem = v->vpci.bar_mem[i];
>> -        struct map_data data = {
>> -            .d = v->domain,
>> -            .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
>> -            .bar = bar,
>> -        };
>> -        int rc;
>> +        const struct pci_dev *pdev = v->vpci.memory.pdev;
>> +        struct vpci_header *header = NULL;
>> +        unsigned int i;
>>   
>> -        if ( rangeset_is_empty(mem) )
>> -            continue;
>> +        if ( !pdev )
>> +            break;
>>   
>> -        rc = rangeset_consume_ranges(mem, map_range, &data);
>> +        read_lock(&v->domain->pci_lock);
>>   
>> -        if ( rc == -ERESTART )
>> +        if ( !pdev->vpci || (v->domain != pdev->domain) )
>>           {
>> +            v->vpci.memory.pdev = NULL;
>>               read_unlock(&v->domain->pci_lock);
>> -            return true;
>> +            break;
>>           }
>>   
>> -        if ( rc )
>> +        header = &pdev->vpci->header;
>> +        for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>>           {
>> -            spin_lock(&pdev->vpci->lock);
>> -            /* Disable memory decoding unconditionally on failure. */
>> -            modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
>> -                            false);
>> -            spin_unlock(&pdev->vpci->lock);
>> +            struct vpci_bar *bar = &header->bars[i];
>> +            struct rangeset *mem = v->vpci.bar_mem[i];
>> +            struct map_data data = {
>> +                .d = v->domain,
>> +                .map = v->vpci.memory.cmd & PCI_COMMAND_MEMORY,
>> +                .bar = bar,
>> +            };
>> +            int rc;
>> +
>> +            if ( rangeset_is_empty(mem) )
>> +                continue;
>>   
>> -            /* Clean all the rangesets */
>> -            for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> -                if ( !rangeset_is_empty(v->vpci.bar_mem[i]) )
>> -                     rangeset_purge(v->vpci.bar_mem[i]);
>> +            rc = rangeset_consume_ranges(mem, map_range, &data);
>>   
>> -            v->vpci.pdev = NULL;
>> +            if ( rc == -ERESTART )
>> +            {
>> +                read_unlock(&v->domain->pci_lock);
>> +                return true;
>> +            }
>>   
>> -            read_unlock(&v->domain->pci_lock);
>> +            if ( rc )
>> +            {
>> +                spin_lock(&pdev->vpci->lock);
>> +                /* Disable memory decoding unconditionally on failure. */
>> +                modify_decoding(pdev, v->vpci.memory.cmd & ~PCI_COMMAND_MEMORY,
>> +                                false);
>> +                spin_unlock(&pdev->vpci->lock);
>> +
>> +                /* Clean all the rangesets */
>> +                for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>> +                    if ( !rangeset_is_empty(v->vpci.bar_mem[i]) )
>> +                        rangeset_purge(v->vpci.bar_mem[i]);
>> +
>> +                v->vpci.memory.pdev = NULL;
>> +
>> +                read_unlock(&v->domain->pci_lock);
>>   
>> -            if ( !is_hardware_domain(v->domain) )
>> -                domain_crash(v->domain);
>> +                if ( !is_hardware_domain(v->domain) )
>> +                    domain_crash(v->domain);
>>   
>> -            return false;
>> +                break;
>> +            }
>>           }
>> -    }
>> -    v->vpci.pdev = NULL;
>> +        v->vpci.memory.pdev = NULL;
>>   
>> -    spin_lock(&pdev->vpci->lock);
>> -    modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
>> -    spin_unlock(&pdev->vpci->lock);
>> +        spin_lock(&pdev->vpci->lock);
>> +        modify_decoding(pdev, v->vpci.memory.cmd, v->vpci.memory.rom_only);
>> +        spin_unlock(&pdev->vpci->lock);
>>   
>> -    read_unlock(&v->domain->pci_lock);
>> +        read_unlock(&v->domain->pci_lock);
>> +
>> +        break;
>> +    }
>> +    case WAIT:
>> +        if ( NOW() < v->vpci.wait.end )
>> +            return true;
>> +        v->vpci.wait.callback(v->vpci.wait.data);
>> +        break;
> 
> As just indicated in reply to patch 6, busy waiting isn't really acceptable.
> This is even more so when the waiting exceeds the typical length of a
> scheduling timeslice.
> 
> In that other reply I said to put the vCPU to sleep, but you need to be careful
> there too: The domain may not expect its vCPU to not make any progress for such
> an extended period of time. This may need doing entirely differently: Once the
> command register was written, you may want to record the time after which
> accesses to the VF registers are permitted. Earlier accesses would simply be
> terminated. You may still additionally need a timer, in order to kick off BAR
> mapping after that time. (Yet better would  be if the BAR mapping could be
> done during those 100ms. After all that may be a reason why this long a delay
> is specified: Firmware on the device may also require some time to set up the
> BARs accordingly.)
> 
> Jan

I am not sure it would work that way. If we look at how linux 
initialized sriov, it writes VFE and MSE bits, waits 100ms and then 
expects VFs to be operational. If they are not operational at that 
moment, then it considers the operation failed and removes all VFs. If 
we also wait 100ms before enabling access, the probability of a guest 
trying to access something before we allow it would be very high.

So I think there is no way to add VFs in Xen without blocking the 
guest’s vCPU in some way. We can revert back to the old variant and rely 
on physdev op to add VFs one by one as they are discovered by Dom0, then 
we will not need to explicitly wait.
@Roger are you okay with that?


Snippet from Linux:

static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
{
	...
	pci_iov_set_numvfs(dev, nr_virtfn);
	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
	pci_cfg_access_lock(dev);
	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
	msleep(100);
	pci_cfg_access_unlock(dev);

	rc = sriov_add_vfs(dev, initial);
	if (rc)
		goto err_pcibios;
	...
}





-- 
Mykyta

next prev parent reply	other threads:[~2026-04-01  8:00 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-09 11:08 [PATCH v2 0/8] Implement SR-IOV support for PVH Mykyta Poturai
2026-03-09 11:08 ` [PATCH v2 1/8] vpci: rename and export vpci_modify_bars Mykyta Poturai
2026-03-16 21:03   ` Stewart Hildebrand
2026-03-09 11:08 ` [PATCH v2 2/8] vpci: rename and export vpci_guest_mem_bar_{read,write} Mykyta Poturai
2026-03-16 21:04   ` Stewart Hildebrand
2026-03-09 11:08 ` [PATCH v2 4/8] vpci: add a wait operation to the vpci vcpu pending actions Mykyta Poturai
2026-03-17 14:02   ` Stewart Hildebrand
2026-03-31  9:53   ` Jan Beulich
2026-03-31 14:55   ` Jan Beulich
2026-04-01  7:59     ` Mykyta Poturai [this message]
2026-04-01  8:21       ` Jan Beulich
2026-04-01 14:07         ` Mykyta Poturai
2026-04-01 14:14           ` Jan Beulich
2026-04-01 14:40             ` Mykyta Poturai
2026-04-01 14:44               ` Jan Beulich
2026-03-09 11:08 ` [PATCH v2 3/8] vpci: Use pervcpu ranges for BAR mapping Mykyta Poturai
2026-03-16 21:36   ` Stewart Hildebrand
2026-03-17  0:36   ` Stewart Hildebrand
2026-03-31  9:59   ` Jan Beulich
2026-03-31 11:56     ` Andrew Cooper
2026-03-09 11:08 ` [PATCH v2 5/8] pci/iommu: Check that IOMMU supports removing devices Mykyta Poturai
2026-03-31 14:28   ` Jan Beulich
2026-03-09 11:08 ` [PATCH v2 6/8] vpci: add SR-IOV support for PVH Dom0 Mykyta Poturai
2026-03-19 21:11   ` Stewart Hildebrand
2026-03-23  9:46     ` Mykyta Poturai
2026-03-31 14:44   ` Jan Beulich
2026-03-09 11:08 ` [PATCH v2 8/8] docs: Update SR-IOV support status Mykyta Poturai
2026-03-09 11:08 ` [PATCH v2 7/8] vpci: add SR-IOV support for DomUs Mykyta Poturai
2026-03-31 15:02   ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0275be7d-d1d9-47ee-bec2-bfbbb90b4bae@epam.com \
    --to=mykyta_poturai@epam.com \
    --cc=jbeulich@suse.com \
    --cc=roger.pau@citrix.com \
    --cc=stewart.hildebrand@amd.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.