From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:35197)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <jasowang@redhat.com>) id 1YdFeX-0005Ak-0A
	for qemu-devel@nongnu.org; Wed, 01 Apr 2015 06:13:42 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <jasowang@redhat.com>) id 1YdFeS-0007AG-O9
	for qemu-devel@nongnu.org; Wed, 01 Apr 2015 06:13:40 -0400
Received: from mx1.redhat.com ([209.132.183.28]:53550)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <jasowang@redhat.com>) id 1YdFeS-0007A7-FF
	for qemu-devel@nongnu.org; Wed, 01 Apr 2015 06:13:36 -0400
Date: Wed, 01 Apr 2015 18:12:18 +0800
From: Jason Wang <jasowang@redhat.com>
Message-Id: <1427883138.20573.0@smtp.corp.redhat.com>
In-Reply-To: <20150401110531-mutt-send-email-mst@redhat.com>
References: <1427876112-12615-1-git-send-email-jasowang@redhat.com>
	<1427876112-12615-18-git-send-email-jasowang@redhat.com>
	<20150401110531-mutt-send-email-mst@redhat.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8; format=flowed
Subject: Re: [Qemu-devel] [PATCH V5 17/18] pci: remove hard-coded bar size
 in msix_init_exclusive_bar()
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: cornelia.huck@de.ibm.com, Keith Busch <keith.busch@intel.com>, Kevin Wolf <kwolf@redhat.com>, qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>


On Wed, Apr 1, 2015 at 5:18 PM, Michael S. Tsirkin <mst@redhat.com> 
wrote:
> On Wed, Apr 01, 2015 at 04:15:11PM +0800, Jason Wang wrote:
>>  This patch lets msix_init_exclusive_bar() can calculate the bar and
>>  pba size according to the number of MSI-X vectors other than using a
>>  hard-coded limit 4096. This is needed to allow device to have more
>>  than 128 MSI_X vectors. An extra legacy_layout parameter was
>>  introduced to use legacy static 4096 bar size to keep the migration
>>  compatibility.
>>  
>>  Virtio device will be the first user for this.
>>  
>>  Cc: Keith Busch <keith.busch@intel.com>
>>  Cc: Kevin Wolf <kwolf@redhat.com>
>>  Cc: Stefan Hajnoczi <stefanha@redhat.com>
>>  Cc: Michael S. Tsirkin <mst@redhat.com>
>>  Signed-off-by: Jason Wang <jasowang@redhat.com>
>>  ---
>>   hw/block/nvme.c        |  2 +-
>>   hw/misc/ivshmem.c      |  2 +-
>>   hw/pci/msix.c          | 47 
>> +++++++++++++++++++++++++++++------------------
>>   hw/virtio/virtio-pci.c |  2 +-
>>   include/hw/pci/msix.h  |  2 +-
>>   5 files changed, 33 insertions(+), 22 deletions(-)
>>  
>>  diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>>  index 1e07166..9af6e1e 100644
>>  --- a/hw/block/nvme.c
>>  +++ b/hw/block/nvme.c
>>  @@ -787,7 +787,7 @@ static int nvme_init(PCIDevice *pci_dev)
>>       pci_register_bar(&n->parent_obj, 0,
>>           PCI_BASE_ADDRESS_SPACE_MEMORY | 
>> PCI_BASE_ADDRESS_MEM_TYPE_64,
>>           &n->iomem);
>>  -    msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4);
>>  +    msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4, 
>> true);
>>   
>>       id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
>>       id->ssvid = cpu_to_le16(pci_get_word(pci_conf + 
>> PCI_SUBSYSTEM_VENDOR_ID));
>>  diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
>>  index 5d272c8..6ae48ae 100644
>>  --- a/hw/misc/ivshmem.c
>>  +++ b/hw/misc/ivshmem.c
>>  @@ -631,7 +631,7 @@ static uint64_t ivshmem_get_size(IVShmemState * 
>> s) {
>>   
>>   static void ivshmem_setup_msi(IVShmemState * s)
>>   {
>>  -    if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) {
>>  +    if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, 
>> true)) {
>>           IVSHMEM_DPRINTF("msix initialization failed\n");
>>           exit(1);
>>       }
>>  diff --git a/hw/pci/msix.c b/hw/pci/msix.c
>>  index 24de260..8c6d8f3 100644
>>  --- a/hw/pci/msix.c
>>  +++ b/hw/pci/msix.c
>>  @@ -291,33 +291,44 @@ int msix_init(struct PCIDevice *dev, unsigned 
>> short nentries,
>>   }
>>   
>>   int msix_init_exclusive_bar(PCIDevice *dev, unsigned short 
>> nentries,
>>  -                            uint8_t bar_nr)
>>  +                            uint8_t bar_nr, bool legacy_layout)
>>   {
>>       int ret;
>>       char *name;
>>  -
>>  -    /*
>>  -     * Migration compatibility dictates that this remains a 4k
>>  -     * BAR with the vector table in the lower half and PBA in
>>  -     * the upper half.  Do not use these elsewhere!
>>  -     */
>>  -#define MSIX_EXCLUSIVE_BAR_SIZE 4096
>>  -#define MSIX_EXCLUSIVE_BAR_TABLE_OFFSET 0
>>  -#define MSIX_EXCLUSIVE_BAR_PBA_OFFSET (MSIX_EXCLUSIVE_BAR_SIZE / 2)
>>  -#define MSIX_EXCLUSIVE_CAP_OFFSET 0
>>  -
>>  -    if (nentries * PCI_MSIX_ENTRY_SIZE > 
>> MSIX_EXCLUSIVE_BAR_PBA_OFFSET) {
>>  -        return -EINVAL;
>>  +    uint32_t bar_size;
>>  +    uint32_t bar_pba_offset;
>>  +
>>  +    if (legacy_layout) {
>>  +        /*
>>  +         * Migration compatibility dictates that this remains a 4k
>>  +         * BAR with the vector table in the lower half and PBA in
>>  +         * the upper half.  Do not use these elsewhere!
>>  +         */
>>  +        bar_size = 4096;
>>  +        bar_pba_offset = bar_size / 2;
>>  +
>>  +        if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
>>  +            return -EINVAL;
>>  +        }
> 
> So this takes pains to behave in a compatible
> way when more than 128 vectors are requested.
> But since we only had up to 64 VQs, why does
> some a configuration make sense?

This question could also be asked for the code even without this patch. 
Spec does not clarify this and if we think vectors>=128 is not a valid 
configuration with only 64 VQs, qemu should fail and quit instead of a 
warning. Unfortunately we don't do this and leave a chance for user to 
use it.

> 
> 
> I suggest we just ignore this configuration
> for migration compatibility.
> 

Consider this only affects that calling qemu with vectors more than 128 
for 64 VQs limitation. I'm fine with this.

> 
> 
>>  +    } else {
>>  +        bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
>>  +        bar_size = bar_pba_offset + (nentries / 8 + 1) * 8;
>>  +        if (bar_size & (bar_size - 1)) {
>>  +            bar_size = 1 << qemu_fls(bar_size);
>>  +        }
>>  +        if (bar_size < 4096) {
>>  +            bar_size = 4096;
>>  +        }
>>       }
> 
> Don't duplicate code like this please.
> Just do this:
>         /*
> 	 * We always did it like this, so we have to keep doing this to
> 	 * avoid breaking migration.
> 	 */
> 	if (bar_pba_offset < 4096 / 2)
> 		bar_pba_offset =  4096 / 2;
> 
> 
> Will save a ton of churn.


Ok, so you're suggesting to use the same layout as 2.3 for vectors <= 
128? Should work.

> 
> 
> with 2 above suggestions you no longer
> need to worry about machine compatibility.
> 
>>   
>>       name = g_strdup_printf("%s-msix", dev->name);
>>  -    memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), 
>> name, MSIX_EXCLUSIVE_BAR_SIZE);
>>  +    memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), 
>> name, bar_size);
>>       g_free(name);
>>   
>>       ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, 
>> bar_nr,
>>  -                    MSIX_EXCLUSIVE_BAR_TABLE_OFFSET, 
>> &dev->msix_exclusive_bar,
>>  -                    bar_nr, MSIX_EXCLUSIVE_BAR_PBA_OFFSET,
>>  -                    MSIX_EXCLUSIVE_CAP_OFFSET);
>>  +                    0, &dev->msix_exclusive_bar,
>>  +                    bar_nr, bar_pba_offset,
>>  +                    0);
>>       if (ret) {
>>           return ret;
>>       }
>>  diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>>  index 02e3ce8..816a706 100644
>>  --- a/hw/virtio/virtio-pci.c
>>  +++ b/hw/virtio/virtio-pci.c
>>  @@ -937,7 +937,7 @@ static void 
>> virtio_pci_device_plugged(DeviceState *d)
>>       config[PCI_INTERRUPT_PIN] = 1;
>>   
>>       if (proxy->nvectors &&
>>  -        msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 
>> 1)) {
>>  +        msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 
>> 1, true)) {
>>           error_report("unable to init msix vectors to %" PRIu32,
>>                        proxy->nvectors);
>>           proxy->nvectors = 0;
>>  diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
>>  index 954d82b..6c19535 100644
>>  --- a/include/hw/pci/msix.h
>>  +++ b/include/hw/pci/msix.h
>>  @@ -11,7 +11,7 @@ int msix_init(PCIDevice *dev, unsigned short 
>> nentries,
>>                 unsigned table_offset, MemoryRegion *pba_bar,
>>                 uint8_t pba_bar_nr, unsigned pba_offset, uint8_t 
>> cap_pos);
>>   int msix_init_exclusive_bar(PCIDevice *dev, unsigned short 
>> nentries,
>>  -                            uint8_t bar_nr);
>>  +                            uint8_t bar_nr, bool legacy_layout);
>>   
>>   void msix_write_config(PCIDevice *dev, uint32_t address, uint32_t 
>> val, int len);
>>   
>>  -- 
>>  2.1.0
>