All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/1] vpci: Add resizable bar support
@ 2024-12-02  6:09 Jiqian Chen
  2024-12-09 13:59 ` Jan Beulich
  2024-12-10 11:30 ` Roger Pau Monné
  0 siblings, 2 replies; 27+ messages in thread
From: Jiqian Chen @ 2024-12-02  6:09 UTC (permalink / raw)
  To: xen-devel
  Cc: Roger Pau Monné, Andrew Cooper, Jan Beulich, Julien Grall,
	Stefano Stabellini, Huang Rui, Jiqian Chen

Some devices, like discrete GPU of amd, support resizable bar
capability, but vpci of Xen doesn't support this feature, so
they fail to resize bars and then cause probing failure.

According to PCIe spec, each bar that supports resizing has
two registers, PCI_REBAR_CAP and PCI_REBAR_CTRL. So, add
handlers for them to support resizing the size of BARs.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
Hi all,

v2->v1 changes:
*In rebar_ctrl_write, to check if memory decoding is enabled, and added
some checks for the type of Bar.
*Added vpci_hw_write32 to handle PCI_REBAR_CAP's write, since there is
no write limitation of dom0.
*And has many other minor modifications as well.

Best regards,
Jiqian Chen.
---
 xen/drivers/vpci/Makefile  |  2 +-
 xen/drivers/vpci/rebar.c   | 93 ++++++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c    |  6 +++
 xen/include/xen/pci_regs.h | 11 +++++
 xen/include/xen/vpci.h     |  2 +
 5 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 xen/drivers/vpci/rebar.c

diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index 1a1413b93e76..a7c8a30a8956 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1,2 +1,2 @@
-obj-y += vpci.o header.o
+obj-y += vpci.o header.o rebar.o
 obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
diff --git a/xen/drivers/vpci/rebar.c b/xen/drivers/vpci/rebar.c
new file mode 100644
index 000000000000..156e8d337426
--- /dev/null
+++ b/xen/drivers/vpci/rebar.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Author: Jiqian Chen <Jiqian.Chen@amd.com>
+ */
+
+#include <xen/hypercall.h>
+#include <xen/vpci.h>
+
+static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
+                                      unsigned int reg,
+                                      uint32_t val,
+                                      void *data)
+{
+    uint64_t size;
+    unsigned int index;
+    struct vpci_bar *bars = data;
+
+    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
+        return;
+
+    index = pci_conf_read32(pdev->sbdf, reg) & PCI_REBAR_CTRL_BAR_IDX;
+    if ( index >= PCI_HEADER_NORMAL_NR_BARS )
+        return;
+
+    if ( bars[index].type != VPCI_BAR_MEM64_LO &&
+         bars[index].type != VPCI_BAR_MEM32 )
+        return;
+
+    size = PCI_REBAR_CTRL_SIZE(val);
+    if ( !((size >> 20) &
+         MASK_EXTR(pci_conf_read32(pdev->sbdf, reg - 4), PCI_REBAR_CAP_SIZES)) )
+        gprintk(XENLOG_WARNING,
+                "%pp: new size %#lx for BAR%u isn't supported\n",
+                &pdev->sbdf, size, index);
+
+    bars[index].size = size;
+    bars[index].addr = 0;
+    bars[index].guest_addr = 0;
+    pci_conf_write32(pdev->sbdf, reg, val);
+}
+
+static int cf_check init_rebar(struct pci_dev *pdev)
+{
+    uint32_t ctrl;
+    unsigned int rebar_offset, nbars;
+
+    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
+
+    if ( !rebar_offset )
+        return 0;
+
+    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
+    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
+
+    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
+    {
+        int rc;
+
+        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
+                               rebar_offset + PCI_REBAR_CAP, 4, NULL);
+        if ( rc )
+        {
+            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
+                   &pdev->sbdf, rc);
+            break;
+        }
+
+        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
+                               rebar_offset + PCI_REBAR_CTRL, 4,
+                               pdev->vpci->header.bars);
+        if ( rc )
+        {
+            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
+                   &pdev->sbdf, rc);
+            break;
+        }
+    }
+
+    return 0;
+}
+REGISTER_VPCI_INIT(init_rebar, VPCI_PRIORITY_LOW);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 1e6aa5d799b9..3349b98389b8 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -232,6 +232,12 @@ void cf_check vpci_hw_write16(
     pci_conf_write16(pdev->sbdf, reg, val);
 }
 
+void cf_check vpci_hw_write32(
+    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
+{
+    pci_conf_write32(pdev->sbdf, reg, val);
+}
+
 int vpci_add_register_mask(struct vpci *vpci, vpci_read_t *read_handler,
                            vpci_write_t *write_handler, unsigned int offset,
                            unsigned int size, void *data, uint32_t ro_mask,
diff --git a/xen/include/xen/pci_regs.h b/xen/include/xen/pci_regs.h
index 250ba106dbd3..3fa6a9f8cad1 100644
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -459,6 +459,7 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_REBAR	21	/* Resizable BAR */
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -541,6 +542,16 @@
 #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
 #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
 
+/* Resizable BARs */
+#define PCI_REBAR_CAP		4	/* capability register */
+#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
+#define PCI_REBAR_CTRL		8	/* control register */
+#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
+#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
+#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
+#define  PCI_REBAR_CTRL_SIZE(v) \
+            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
+
 /*
  * Hypertransport sub capability types
  *
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 41e7c3bc2791..72992e93cece 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -78,6 +78,8 @@ uint32_t cf_check vpci_hw_read32(
     const struct pci_dev *pdev, unsigned int reg, void *data);
 void cf_check vpci_hw_write16(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
+void cf_check vpci_hw_write32(
+    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
 
 /*
  * Check for pending vPCI operations on this vcpu. Returns true if the vcpu
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-02  6:09 [PATCH v2 1/1] vpci: Add resizable bar support Jiqian Chen
@ 2024-12-09 13:59 ` Jan Beulich
  2024-12-10  7:07   ` Chen, Jiqian
  2024-12-10 11:00   ` Roger Pau Monné
  2024-12-10 11:30 ` Roger Pau Monné
  1 sibling, 2 replies; 27+ messages in thread
From: Jan Beulich @ 2024-12-09 13:59 UTC (permalink / raw)
  To: Jiqian Chen
  Cc: Roger Pau Monné, Andrew Cooper, Julien Grall,
	Stefano Stabellini, Huang Rui, xen-devel

On 02.12.2024 07:09, Jiqian Chen wrote:
> --- /dev/null
> +++ b/xen/drivers/vpci/rebar.c
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */

Was this a deliberate decision? We default to GPL-2.0-only, I think.

> +/*
> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
> + */
> +
> +#include <xen/hypercall.h>
> +#include <xen/vpci.h>
> +
> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> +                                      unsigned int reg,
> +                                      uint32_t val,
> +                                      void *data)
> +{
> +    uint64_t size;
> +    unsigned int index;
> +    struct vpci_bar *bars = data;
> +
> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> +        return;

I don't think something like this can go uncommented. I don't think the
spec mandates to drop writes in this situation?

> +    index = pci_conf_read32(pdev->sbdf, reg) & PCI_REBAR_CTRL_BAR_IDX;
> +    if ( index >= PCI_HEADER_NORMAL_NR_BARS )
> +        return;
> +
> +    if ( bars[index].type != VPCI_BAR_MEM64_LO &&
> +         bars[index].type != VPCI_BAR_MEM32 )
> +        return;
> +
> +    size = PCI_REBAR_CTRL_SIZE(val);
> +    if ( !((size >> 20) &
> +         MASK_EXTR(pci_conf_read32(pdev->sbdf, reg - 4), PCI_REBAR_CAP_SIZES)) )

No such literal 4 please. What I think you mean is reg - PCI_REBAR_CTRL +
PCI_REBAR_CAP.

Also indentation is off (by 2) here.

> +        gprintk(XENLOG_WARNING,
> +                "%pp: new size %#lx for BAR%u isn't supported\n",
> +                &pdev->sbdf, size, index);
> +
> +    bars[index].size = size;
> +    bars[index].addr = 0;
> +    bars[index].guest_addr = 0;
> +    pci_conf_write32(pdev->sbdf, reg, val);
> +}
> +
> +static int cf_check init_rebar(struct pci_dev *pdev)
> +{
> +    uint32_t ctrl;
> +    unsigned int rebar_offset, nbars;
> +
> +    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
> +
> +    if ( !rebar_offset )
> +        return 0;
> +
> +    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
> +    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
> +
> +    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
> +    {
> +        int rc;
> +
> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
> +                               rebar_offset + PCI_REBAR_CAP, 4, NULL);

The capability register is r/o aiui. While permitting hwdom to write it is
fine, DomU-s shouldn't be permitted doing so, just in case. (An alternative
to making handler selection conditional here would be to bail early for the
!hwdom case, accompanied by a TODO comment. This would then also address
the lack of virtualization of the extended capability chain, as we may not
blindly expose all capabilities to DomU-s.)

> +        if ( rc )
> +        {
> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
> +                   &pdev->sbdf, rc);
> +            break;
> +        }
> +
> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
> +                               rebar_offset + PCI_REBAR_CTRL, 4,
> +                               pdev->vpci->header.bars);
> +        if ( rc )
> +        {
> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
> +                   &pdev->sbdf, rc);
> +            break;

Is it correct to keep the other handler installed? After all ...

> +        }
> +    }
> +
> +    return 0;

... you - imo sensibly - aren't communicating the error back up (to allow
the device to be used without BAR resizing.

> @@ -541,6 +542,16 @@
>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>  
> +/* Resizable BARs */
> +#define PCI_REBAR_CAP		4	/* capability register */
> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */

Misra demands that this have a U suffix.

> +#define PCI_REBAR_CTRL		8	/* control register */
> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
> +#define  PCI_REBAR_CTRL_SIZE(v) \
> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))

The literal 20 (appearing here the 2nd time) also wants hiding behind a
#define.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-09 13:59 ` Jan Beulich
@ 2024-12-10  7:07   ` Chen, Jiqian
  2024-12-10  7:17     ` Jan Beulich
  2024-12-10 11:00   ` Roger Pau Monné
  1 sibling, 1 reply; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-10  7:07 UTC (permalink / raw)
  To: Jan Beulich, Roger Pau Monné
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Chen, Jiqian

On 2024/12/9 21:59, Jan Beulich wrote:
> On 02.12.2024 07:09, Jiqian Chen wrote:
>> --- /dev/null
>> +++ b/xen/drivers/vpci/rebar.c
>> @@ -0,0 +1,93 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> 
> Was this a deliberate decision? We default to GPL-2.0-only, I think.
Will change to GPL-2.0-only.
What's the difference between GPL-2.0-only and GPL-2.0-or-later?

> 
>> +/*
>> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
>> + *
>> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
>> + */
>> +
>> +#include <xen/hypercall.h>
>> +#include <xen/vpci.h>
>> +
>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>> +                                      unsigned int reg,
>> +                                      uint32_t val,
>> +                                      void *data)
>> +{
>> +    uint64_t size;
>> +    unsigned int index;
>> +    struct vpci_bar *bars = data;
>> +
>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>> +        return;
> 
> I don't think something like this can go uncommented. I don't think the
> spec mandates to drop writes in this situation?
Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
such as the result of debugging with Roger in the previous version.
I will add the spec's sentences as comments here in next version.

> 
>> +    index = pci_conf_read32(pdev->sbdf, reg) & PCI_REBAR_CTRL_BAR_IDX;
>> +    if ( index >= PCI_HEADER_NORMAL_NR_BARS )
>> +        return;
>> +
>> +    if ( bars[index].type != VPCI_BAR_MEM64_LO &&
>> +         bars[index].type != VPCI_BAR_MEM32 )
>> +        return;
>> +
>> +    size = PCI_REBAR_CTRL_SIZE(val);
>> +    if ( !((size >> 20) &
>> +         MASK_EXTR(pci_conf_read32(pdev->sbdf, reg - 4), PCI_REBAR_CAP_SIZES)) )
> 
> No such literal 4 please. What I think you mean is reg - PCI_REBAR_CTRL +
> PCI_REBAR_CAP.
Yes, will change, thanks.

> 
> Also indentation is off (by 2) here.
> 
>> +        gprintk(XENLOG_WARNING,
>> +                "%pp: new size %#lx for BAR%u isn't supported\n",
>> +                &pdev->sbdf, size, index);
>> +
>> +    bars[index].size = size;
>> +    bars[index].addr = 0;
>> +    bars[index].guest_addr = 0;
>> +    pci_conf_write32(pdev->sbdf, reg, val);
>> +}
>> +
>> +static int cf_check init_rebar(struct pci_dev *pdev)
>> +{
>> +    uint32_t ctrl;
>> +    unsigned int rebar_offset, nbars;
>> +
>> +    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
>> +
>> +    if ( !rebar_offset )
>> +        return 0;
>> +
>> +    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
>> +    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
>> +
>> +    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
>> +    {
>> +        int rc;
>> +
>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
>> +                               rebar_offset + PCI_REBAR_CAP, 4, NULL);
> 
> The capability register is r/o aiui. While permitting hwdom to write it is
> fine, DomU-s shouldn't be permitted doing so, just in case. (An alternative
> to making handler selection conditional here would be to bail early for the
> !hwdom case, accompanied by a TODO comment. This would then also address
> the lack of virtualization of the extended capability chain, as we may not
> blindly expose all capabilities to DomU-s.)
Thanks, will add is_hwdom check and add "TODO" comment here.

> 
>> +        if ( rc )
>> +        {
>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>> +                   &pdev->sbdf, rc);
>> +            break;
>> +        }
>> +
>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>> +                               pdev->vpci->header.bars);
>> +        if ( rc )
>> +        {
>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>> +                   &pdev->sbdf, rc);
>> +            break;
> 
> Is it correct to keep the other handler installed? After all ...
Will change to "return rc;" here and above in next version.

> 
>> +        }
>> +    }
>> +
>> +    return 0;
> 
> ... you - imo sensibly - aren't communicating the error back up (to allow
> the device to be used without BAR resizing.
> 
>> @@ -541,6 +542,16 @@
>>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>>  
>> +/* Resizable BARs */
>> +#define PCI_REBAR_CAP		4	/* capability register */
>> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
> 
> Misra demands that this have a U suffix.
Do below PCI_REBAR_CTRL_BAR_IDX, PCI_REBAR_CTRL_NBAR_MASK and PCI_REBAR_CTRL_BAR_SIZE also need a U suffix?

> 
>> +#define PCI_REBAR_CTRL		8	/* control register */
>> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
>> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
>> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
>> +#define  PCI_REBAR_CTRL_SIZE(v) \
>> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
> 
> The literal 20 (appearing here the 2nd time) also wants hiding behind a
> #define.
OK, will add " #define PCI_REBAR_SIZE_UNIT_BYTES_LEN 20" to replace above two '20' case.

> 
> Jan

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10  7:07   ` Chen, Jiqian
@ 2024-12-10  7:17     ` Jan Beulich
  2024-12-10  7:57       ` Chen, Jiqian
  0 siblings, 1 reply; 27+ messages in thread
From: Jan Beulich @ 2024-12-10  7:17 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné

On 10.12.2024 08:07, Chen, Jiqian wrote:
> On 2024/12/9 21:59, Jan Beulich wrote:
>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>> --- /dev/null
>>> +++ b/xen/drivers/vpci/rebar.c
>>> @@ -0,0 +1,93 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>
>> Was this a deliberate decision? We default to GPL-2.0-only, I think.
> Will change to GPL-2.0-only.
> What's the difference between GPL-2.0-only and GPL-2.0-or-later?

As the name says, the latter includes any known or yet to be written newer
versions of the GPL.

>>> +/*
>>> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
>>> + *
>>> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
>>> + */
>>> +
>>> +#include <xen/hypercall.h>
>>> +#include <xen/vpci.h>
>>> +
>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>> +                                      unsigned int reg,
>>> +                                      uint32_t val,
>>> +                                      void *data)
>>> +{
>>> +    uint64_t size;
>>> +    unsigned int index;
>>> +    struct vpci_bar *bars = data;
>>> +
>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>> +        return;
>>
>> I don't think something like this can go uncommented. I don't think the
>> spec mandates to drop writes in this situation?
> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
> such as the result of debugging with Roger in the previous version.
> I will add the spec's sentences as comments here in next version.

What you quote from the spec may not be enough as a comment here. There's
no direct implication that the write would simply be dropped on the floor
if the bit is still set. So I think you want to go a little beyond just
quoting from the spec.

>>> +        if ( rc )
>>> +        {
>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>> +                   &pdev->sbdf, rc);
>>> +            break;
>>> +        }
>>> +
>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>> +                               pdev->vpci->header.bars);
>>> +        if ( rc )
>>> +        {
>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>> +                   &pdev->sbdf, rc);
>>> +            break;
>>
>> Is it correct to keep the other handler installed? After all ...
> Will change to "return rc;" here and above in next version.

I'm not convinced this is what we want, as per ...

>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>
>> ... you - imo sensibly - aren't communicating the error back up (to allow
>> the device to be used without BAR resizing.

... what I said here.

>>> @@ -541,6 +542,16 @@
>>>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>>>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>>>  
>>> +/* Resizable BARs */
>>> +#define PCI_REBAR_CAP		4	/* capability register */
>>> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
>>
>> Misra demands that this have a U suffix.
> Do below PCI_REBAR_CTRL_BAR_IDX, PCI_REBAR_CTRL_NBAR_MASK and PCI_REBAR_CTRL_BAR_SIZE also need a U suffix?

They may want to gain them for consistency, but they don't strictly need
them. I wanted to say "See the rest of the file", but it looks like the
file wasn't cleaned up yet Misra-wise.

>>> +#define PCI_REBAR_CTRL		8	/* control register */
>>> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
>>> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
>>> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
>>> +#define  PCI_REBAR_CTRL_SIZE(v) \
>>> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
>>
>> The literal 20 (appearing here the 2nd time) also wants hiding behind a
>> #define.
> OK, will add " #define PCI_REBAR_SIZE_UNIT_BYTES_LEN 20" to replace above two '20' case.

What is "UNIT_BYTES_LEN" there? There's nothing byte-ish here, I don't
think, 20 is simply the shift bias.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10  7:17     ` Jan Beulich
@ 2024-12-10  7:57       ` Chen, Jiqian
  2024-12-10  9:54         ` Jan Beulich
  0 siblings, 1 reply; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-10  7:57 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné,
	Chen, Jiqian

On 2024/12/10 15:17, Jan Beulich wrote:
> On 10.12.2024 08:07, Chen, Jiqian wrote:
>> On 2024/12/9 21:59, Jan Beulich wrote:
>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>> --- /dev/null
>>>> +++ b/xen/drivers/vpci/rebar.c
>>>> @@ -0,0 +1,93 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>>>
>>> Was this a deliberate decision? We default to GPL-2.0-only, I think.
>> Will change to GPL-2.0-only.
>> What's the difference between GPL-2.0-only and GPL-2.0-or-later?
> 
> As the name says, the latter includes any known or yet to be written newer
> versions of the GPL.
> 
>>>> +/*
>>>> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
>>>> + *
>>>> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
>>>> + */
>>>> +
>>>> +#include <xen/hypercall.h>
>>>> +#include <xen/vpci.h>
>>>> +
>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>>> +                                      unsigned int reg,
>>>> +                                      uint32_t val,
>>>> +                                      void *data)
>>>> +{
>>>> +    uint64_t size;
>>>> +    unsigned int index;
>>>> +    struct vpci_bar *bars = data;
>>>> +
>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>>> +        return;
>>>
>>> I don't think something like this can go uncommented. I don't think the
>>> spec mandates to drop writes in this situation?
>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
>> such as the result of debugging with Roger in the previous version.
>> I will add the spec's sentences as comments here in next version.
> 
> What you quote from the spec may not be enough as a comment here. There's
> no direct implication that the write would simply be dropped on the floor
> if the bit is still set. So I think you want to go a little beyond just
> quoting from the spec.
How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?

> 
>>>> +        if ( rc )
>>>> +        {
>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>>> +                   &pdev->sbdf, rc);
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>>> +                               pdev->vpci->header.bars);
>>>> +        if ( rc )
>>>> +        {
>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>>> +                   &pdev->sbdf, rc);
>>>> +            break;
>>>
>>> Is it correct to keep the other handler installed? After all ...
>> Will change to "return rc;" here and above in next version.
> 
> I'm not convinced this is what we want, as per ...
> 
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>
>>> ... you - imo sensibly - aren't communicating the error back up (to allow
>>> the device to be used without BAR resizing.
> 
> ... what I said here.
Sorry, I didn’t understand.
Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?

> 
>>>> @@ -541,6 +542,16 @@
>>>>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>>>>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>>>>  
>>>> +/* Resizable BARs */
>>>> +#define PCI_REBAR_CAP		4	/* capability register */
>>>> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
>>>
>>> Misra demands that this have a U suffix.
>> Do below PCI_REBAR_CTRL_BAR_IDX, PCI_REBAR_CTRL_NBAR_MASK and PCI_REBAR_CTRL_BAR_SIZE also need a U suffix?
> 
> They may want to gain them for consistency, but they don't strictly need
> them. I wanted to say "See the rest of the file", but it looks like the
> file wasn't cleaned up yet Misra-wise.
Yes, I noticed that the rest of the file didn't add U suffix too.
So, I just need to add U suffixes for my new macros?

> 
>>>> +#define PCI_REBAR_CTRL		8	/* control register */
>>>> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
>>>> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
>>>> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
>>>> +#define  PCI_REBAR_CTRL_SIZE(v) \
>>>> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
>>>
>>> The literal 20 (appearing here the 2nd time) also wants hiding behind a
>>> #define.
>> OK, will add " #define PCI_REBAR_SIZE_UNIT_BYTES_LEN 20" to replace above two '20' case.
> 
> What is "UNIT_BYTES_LEN" there? There's nothing byte-ish here, I don't
> think, 20 is simply the shift bias.
It's a naming problem. What I want to express here is that the basic unit is 1MB, which is 2^20 of bytes.
Since the spec has the definition about the value of the bar size bits of register:
BAR Size - This is an encoded value.
0	1 MB (2^20 bytes)
1	2 MB (2^21 bytes)
2	4 MB (2^22 bytes)
3	8 MB (2^23 bytes)
…
43	8 EB (2^63 bytes)
Do you have suggestion about this macro name?

> 
> Jan

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10  7:57       ` Chen, Jiqian
@ 2024-12-10  9:54         ` Jan Beulich
  2024-12-10 11:25           ` Roger Pau Monné
  2024-12-11  6:18           ` Chen, Jiqian
  0 siblings, 2 replies; 27+ messages in thread
From: Jan Beulich @ 2024-12-10  9:54 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné

On 10.12.2024 08:57, Chen, Jiqian wrote:
> On 2024/12/10 15:17, Jan Beulich wrote:
>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>>>> +                                      unsigned int reg,
>>>>> +                                      uint32_t val,
>>>>> +                                      void *data)
>>>>> +{
>>>>> +    uint64_t size;
>>>>> +    unsigned int index;
>>>>> +    struct vpci_bar *bars = data;
>>>>> +
>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>>>> +        return;
>>>>
>>>> I don't think something like this can go uncommented. I don't think the
>>>> spec mandates to drop writes in this situation?
>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
>>> such as the result of debugging with Roger in the previous version.
>>> I will add the spec's sentences as comments here in next version.
>>
>> What you quote from the spec may not be enough as a comment here. There's
>> no direct implication that the write would simply be dropped on the floor
>> if the bit is still set. So I think you want to go a little beyond just
>> quoting from the spec.
> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?

That'll be better, but imo still not enough to explain the outright ignoring
of the write.

>>>>> +        if ( rc )
>>>>> +        {
>>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>>>> +                   &pdev->sbdf, rc);
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>>>> +                               pdev->vpci->header.bars);
>>>>> +        if ( rc )
>>>>> +        {
>>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>>>> +                   &pdev->sbdf, rc);
>>>>> +            break;
>>>>
>>>> Is it correct to keep the other handler installed? After all ...
>>> Will change to "return rc;" here and above in next version.
>>
>> I'm not convinced this is what we want, as per ...
>>
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>
>>>> ... you - imo sensibly - aren't communicating the error back up (to allow
>>>> the device to be used without BAR resizing.
>>
>> ... what I said here.
> Sorry, I didn’t understand.
> Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?

No, if you return an error here, nothing else needs doing. However, I
question that returning an error here is good or even necessary. In
the event of an error, the device ought to still be usable, just
without the BAR-resizing capability.

>>>>> @@ -541,6 +542,16 @@
>>>>>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>>>>>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>>>>>  
>>>>> +/* Resizable BARs */
>>>>> +#define PCI_REBAR_CAP		4	/* capability register */
>>>>> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
>>>>
>>>> Misra demands that this have a U suffix.
>>> Do below PCI_REBAR_CTRL_BAR_IDX, PCI_REBAR_CTRL_NBAR_MASK and PCI_REBAR_CTRL_BAR_SIZE also need a U suffix?
>>
>> They may want to gain them for consistency, but they don't strictly need
>> them. I wanted to say "See the rest of the file", but it looks like the
>> file wasn't cleaned up yet Misra-wise.
> Yes, I noticed that the rest of the file didn't add U suffix too.
> So, I just need to add U suffixes for my new macros?

You only strictly need to add U to values with the top bit set.

>>>>> +#define PCI_REBAR_CTRL		8	/* control register */
>>>>> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
>>>>> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
>>>>> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
>>>>> +#define  PCI_REBAR_CTRL_SIZE(v) \
>>>>> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
>>>>
>>>> The literal 20 (appearing here the 2nd time) also wants hiding behind a
>>>> #define.
>>> OK, will add " #define PCI_REBAR_SIZE_UNIT_BYTES_LEN 20" to replace above two '20' case.
>>
>> What is "UNIT_BYTES_LEN" there? There's nothing byte-ish here, I don't
>> think, 20 is simply the shift bias.
> It's a naming problem. What I want to express here is that the basic unit is 1MB, which is 2^20 of bytes.
> Since the spec has the definition about the value of the bar size bits of register:
> BAR Size - This is an encoded value.
> 0	1 MB (2^20 bytes)
> 1	2 MB (2^21 bytes)
> 2	4 MB (2^22 bytes)
> 3	8 MB (2^23 bytes)
> …
> 43	8 EB (2^63 bytes)
> Do you have suggestion about this macro name?

PCI_REBAR_SIZE_BIAS? PCI_REBAR_SIZE_SHIFT_BIAS? PCI_REBAR_SIZE_SHIFT?

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-09 13:59 ` Jan Beulich
  2024-12-10  7:07   ` Chen, Jiqian
@ 2024-12-10 11:00   ` Roger Pau Monné
  2024-12-10 11:05     ` Jan Beulich
  2024-12-11  6:20     ` Chen, Jiqian
  1 sibling, 2 replies; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-10 11:00 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Jiqian Chen, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang Rui, xen-devel

On Mon, Dec 09, 2024 at 02:59:31PM +0100, Jan Beulich wrote:
> On 02.12.2024 07:09, Jiqian Chen wrote:
> > +static int cf_check init_rebar(struct pci_dev *pdev)
> > +{
> > +    uint32_t ctrl;
> > +    unsigned int rebar_offset, nbars;
> > +
> > +    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
> > +
> > +    if ( !rebar_offset )
> > +        return 0;
> > +
> > +    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
> > +    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
> > +
> > +    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
> > +    {
> > +        int rc;
> > +
> > +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
> > +                               rebar_offset + PCI_REBAR_CAP, 4, NULL);
> 
> The capability register is r/o aiui. While permitting hwdom to write it is
> fine, DomU-s shouldn't be permitted doing so, just in case. (An alternative
> to making handler selection conditional here would be to bail early for the
> !hwdom case, accompanied by a TODO comment. This would then also address
> the lack of virtualization of the extended capability chain, as we may not
> blindly expose all capabilities to DomU-s.)

I don't think we can safely expose this capability to domUs by
default, so my preference would be a returning an error in that case
(and printing a log message indicating ReBAR is not supported for
domUs).

Note it's already not exposed to domUs by not being part of
supported_caps in init_header().

Regards, Roger


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:00   ` Roger Pau Monné
@ 2024-12-10 11:05     ` Jan Beulich
  2024-12-11  6:20     ` Chen, Jiqian
  1 sibling, 0 replies; 27+ messages in thread
From: Jan Beulich @ 2024-12-10 11:05 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Jiqian Chen, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang Rui, xen-devel

On 10.12.2024 12:00, Roger Pau Monné wrote:
> On Mon, Dec 09, 2024 at 02:59:31PM +0100, Jan Beulich wrote:
>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>> +static int cf_check init_rebar(struct pci_dev *pdev)
>>> +{
>>> +    uint32_t ctrl;
>>> +    unsigned int rebar_offset, nbars;
>>> +
>>> +    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
>>> +
>>> +    if ( !rebar_offset )
>>> +        return 0;
>>> +
>>> +    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
>>> +    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
>>> +
>>> +    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
>>> +    {
>>> +        int rc;
>>> +
>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
>>> +                               rebar_offset + PCI_REBAR_CAP, 4, NULL);
>>
>> The capability register is r/o aiui. While permitting hwdom to write it is
>> fine, DomU-s shouldn't be permitted doing so, just in case. (An alternative
>> to making handler selection conditional here would be to bail early for the
>> !hwdom case, accompanied by a TODO comment. This would then also address
>> the lack of virtualization of the extended capability chain, as we may not
>> blindly expose all capabilities to DomU-s.)
> 
> I don't think we can safely expose this capability to domUs by
> default, so my preference would be a returning an error in that case
> (and printing a log message indicating ReBAR is not supported for
> domUs).

I understood Jiqian's recent reply to mean that that's what he's going to do.

> Note it's already not exposed to domUs by not being part of
> supported_caps in init_header().

Just to mention it - supported_caps is, aiui, about "traditional" caps only
anyway, not extended ones.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10  9:54         ` Jan Beulich
@ 2024-12-10 11:25           ` Roger Pau Monné
  2024-12-10 12:15             ` Jan Beulich
                               ` (2 more replies)
  2024-12-11  6:18           ` Chen, Jiqian
  1 sibling, 3 replies; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-10 11:25 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
> On 10.12.2024 08:57, Chen, Jiqian wrote:
> > On 2024/12/10 15:17, Jan Beulich wrote:
> >> On 10.12.2024 08:07, Chen, Jiqian wrote:
> >>> On 2024/12/9 21:59, Jan Beulich wrote:
> >>>> On 02.12.2024 07:09, Jiqian Chen wrote:
> >>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> >>>>> +                                      unsigned int reg,
> >>>>> +                                      uint32_t val,
> >>>>> +                                      void *data)
> >>>>> +{
> >>>>> +    uint64_t size;
> >>>>> +    unsigned int index;
> >>>>> +    struct vpci_bar *bars = data;
> >>>>> +
> >>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> >>>>> +        return;
> >>>>
> >>>> I don't think something like this can go uncommented. I don't think the
> >>>> spec mandates to drop writes in this situation?
> >>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
> >>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
> >>> such as the result of debugging with Roger in the previous version.
> >>> I will add the spec's sentences as comments here in next version.
> >>
> >> What you quote from the spec may not be enough as a comment here. There's
> >> no direct implication that the write would simply be dropped on the floor
> >> if the bit is still set. So I think you want to go a little beyond just
> >> quoting from the spec.
> > How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
> > Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
> 
> That'll be better, but imo still not enough to explain the outright ignoring
> of the write.

I think we might want to do something along the lines of:

uint64_t size = PCI_REBAR_CTRL_SIZE(val);
struct vpci_bar *bar = data;

if ( bar->enabled )
{
    if ( size == bar->size )
        return;

    /*
     * Refuse to resize a BAR while memory decoding is enabled, as
     * otherwise the size of the mapped region in the p2m would become
     * stale with the newly set BAR size, and the position of the BAR
     * would be reset to undefined.  Note the PCIe specification also
     * forbids resizing a BAR with memory decoding enabled.
     */
    gprintk(XENLOG_ERR,
            "%pp: refuse to resize BAR with memory decoding enabled\n",
	    &pci->sbdf);
    return;
}

Note this requires that the data parameter points to the BAR that
matches the ReBAR control register, this needs adjusting in
init_rebar().

> >>>>> +        if ( rc )
> >>>>> +        {
> >>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
> >>>>> +                   &pdev->sbdf, rc);
> >>>>> +            break;
> >>>>> +        }
> >>>>> +
> >>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
> >>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
> >>>>> +                               pdev->vpci->header.bars);
> >>>>> +        if ( rc )
> >>>>> +        {
> >>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
> >>>>> +                   &pdev->sbdf, rc);
> >>>>> +            break;
> >>>>
> >>>> Is it correct to keep the other handler installed? After all ...
> >>> Will change to "return rc;" here and above in next version.
> >>
> >> I'm not convinced this is what we want, as per ...
> >>
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +    return 0;
> >>>>
> >>>> ... you - imo sensibly - aren't communicating the error back up (to allow
> >>>> the device to be used without BAR resizing.
> >>
> >> ... what I said here.
> > Sorry, I didn’t understand.
> > Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?
> 
> No, if you return an error here, nothing else needs doing. However, I
> question that returning an error here is good or even necessary. In
> the event of an error, the device ought to still be usable, just
> without the BAR-resizing capability.

So you suggest that the capability should be hidden in that case?  We
have logic to hide capabilities, just not used for the hardware
domain.  It would need some extra wiring to be capable of hiding
failed capabilities.

Regards, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-02  6:09 [PATCH v2 1/1] vpci: Add resizable bar support Jiqian Chen
  2024-12-09 13:59 ` Jan Beulich
@ 2024-12-10 11:30 ` Roger Pau Monné
  2024-12-11  6:42   ` Chen, Jiqian
  1 sibling, 1 reply; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-10 11:30 UTC (permalink / raw)
  To: Jiqian Chen
  Cc: xen-devel, Andrew Cooper, Jan Beulich, Julien Grall,
	Stefano Stabellini, Huang Rui

On Mon, Dec 02, 2024 at 02:09:56PM +0800, Jiqian Chen wrote:
> Some devices, like discrete GPU of amd, support resizable bar
> capability, but vpci of Xen doesn't support this feature, so
> they fail to resize bars and then cause probing failure.
> 
> According to PCIe spec, each bar that supports resizing has
> two registers, PCI_REBAR_CAP and PCI_REBAR_CTRL. So, add
> handlers for them to support resizing the size of BARs.
> 
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> Hi all,
> 
> v2->v1 changes:
> *In rebar_ctrl_write, to check if memory decoding is enabled, and added
> some checks for the type of Bar.
> *Added vpci_hw_write32 to handle PCI_REBAR_CAP's write, since there is
> no write limitation of dom0.
> *And has many other minor modifications as well.
> 
> Best regards,
> Jiqian Chen.
> ---
>  xen/drivers/vpci/Makefile  |  2 +-
>  xen/drivers/vpci/rebar.c   | 93 ++++++++++++++++++++++++++++++++++++++
>  xen/drivers/vpci/vpci.c    |  6 +++
>  xen/include/xen/pci_regs.h | 11 +++++
>  xen/include/xen/vpci.h     |  2 +
>  5 files changed, 113 insertions(+), 1 deletion(-)
>  create mode 100644 xen/drivers/vpci/rebar.c
> 
> diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
> index 1a1413b93e76..a7c8a30a8956 100644
> --- a/xen/drivers/vpci/Makefile
> +++ b/xen/drivers/vpci/Makefile
> @@ -1,2 +1,2 @@
> -obj-y += vpci.o header.o
> +obj-y += vpci.o header.o rebar.o
>  obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
> diff --git a/xen/drivers/vpci/rebar.c b/xen/drivers/vpci/rebar.c
> new file mode 100644
> index 000000000000..156e8d337426
> --- /dev/null
> +++ b/xen/drivers/vpci/rebar.c
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
> + */
> +
> +#include <xen/hypercall.h>
> +#include <xen/vpci.h>
> +
> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> +                                      unsigned int reg,
> +                                      uint32_t val,
> +                                      void *data)
> +{
> +    uint64_t size;
> +    unsigned int index;
> +    struct vpci_bar *bars = data;
> +
> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> +        return;
> +
> +    index = pci_conf_read32(pdev->sbdf, reg) & PCI_REBAR_CTRL_BAR_IDX;
> +    if ( index >= PCI_HEADER_NORMAL_NR_BARS )
> +        return;
> +
> +    if ( bars[index].type != VPCI_BAR_MEM64_LO &&
> +         bars[index].type != VPCI_BAR_MEM32 )
> +        return;

For those early returns that don't propagate the write to the
register, we need to log a message to notice the write has been
dropped, otherwise there's no way to identify such cases.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:25           ` Roger Pau Monné
@ 2024-12-10 12:15             ` Jan Beulich
  2024-12-10 12:54               ` Roger Pau Monné
  2024-12-11  6:37             ` Chen, Jiqian
  2024-12-11  7:57             ` Chen, Jiqian
  2 siblings, 1 reply; 27+ messages in thread
From: Jan Beulich @ 2024-12-10 12:15 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On 10.12.2024 12:25, Roger Pau Monné wrote:
> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
>> On 10.12.2024 08:57, Chen, Jiqian wrote:
>>> On 2024/12/10 15:17, Jan Beulich wrote:
>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>>>>>> +                                      unsigned int reg,
>>>>>>> +                                      uint32_t val,
>>>>>>> +                                      void *data)
>>>>>>> +{
>>>>>>> +    uint64_t size;
>>>>>>> +    unsigned int index;
>>>>>>> +    struct vpci_bar *bars = data;
>>>>>>> +
>>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>>>>>> +        return;
>>>>>>
>>>>>> I don't think something like this can go uncommented. I don't think the
>>>>>> spec mandates to drop writes in this situation?
>>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
>>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
>>>>> such as the result of debugging with Roger in the previous version.
>>>>> I will add the spec's sentences as comments here in next version.
>>>>
>>>> What you quote from the spec may not be enough as a comment here. There's
>>>> no direct implication that the write would simply be dropped on the floor
>>>> if the bit is still set. So I think you want to go a little beyond just
>>>> quoting from the spec.
>>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
>>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
>>
>> That'll be better, but imo still not enough to explain the outright ignoring
>> of the write.
> 
> I think we might want to do something along the lines of:
> 
> uint64_t size = PCI_REBAR_CTRL_SIZE(val);
> struct vpci_bar *bar = data;
> 
> if ( bar->enabled )
> {
>     if ( size == bar->size )
>         return;
> 
>     /*
>      * Refuse to resize a BAR while memory decoding is enabled, as
>      * otherwise the size of the mapped region in the p2m would become
>      * stale with the newly set BAR size, and the position of the BAR
>      * would be reset to undefined.  Note the PCIe specification also
>      * forbids resizing a BAR with memory decoding enabled.
>      */
>     gprintk(XENLOG_ERR,
>             "%pp: refuse to resize BAR with memory decoding enabled\n",
> 	    &pci->sbdf);
>     return;
> }
> 
> Note this requires that the data parameter points to the BAR that
> matches the ReBAR control register, this needs adjusting in
> init_rebar().

SGTM.

>>>>>>> +        if ( rc )
>>>>>>> +        {
>>>>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>>>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>>>>>> +                               pdev->vpci->header.bars);
>>>>>>> +        if ( rc )
>>>>>>> +        {
>>>>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>> +            break;
>>>>>>
>>>>>> Is it correct to keep the other handler installed? After all ...
>>>>> Will change to "return rc;" here and above in next version.
>>>>
>>>> I'm not convinced this is what we want, as per ...
>>>>
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>
>>>>>> ... you - imo sensibly - aren't communicating the error back up (to allow
>>>>>> the device to be used without BAR resizing.
>>>>
>>>> ... what I said here.
>>> Sorry, I didn’t understand.
>>> Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?
>>
>> No, if you return an error here, nothing else needs doing. However, I
>> question that returning an error here is good or even necessary. In
>> the event of an error, the device ought to still be usable, just
>> without the BAR-resizing capability.
> 
> So you suggest that the capability should be hidden in that case?

Yes.

>  We
> have logic to hide capabilities, just not used for the hardware
> domain.  It would need some extra wiring to be capable of hiding
> failed capabilities.

Indeed.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 12:15             ` Jan Beulich
@ 2024-12-10 12:54               ` Roger Pau Monné
  0 siblings, 0 replies; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-10 12:54 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Tue, Dec 10, 2024 at 01:15:00PM +0100, Jan Beulich wrote:
> On 10.12.2024 12:25, Roger Pau Monné wrote:
> > On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
> >> On 10.12.2024 08:57, Chen, Jiqian wrote:
> >>> On 2024/12/10 15:17, Jan Beulich wrote:
> >>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
> >>>>> On 2024/12/9 21:59, Jan Beulich wrote:
> >>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
> >>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> >>>>>>> +                                      unsigned int reg,
> >>>>>>> +                                      uint32_t val,
> >>>>>>> +                                      void *data)
> >>>>>>> +{
> >>>>>>> +    uint64_t size;
> >>>>>>> +    unsigned int index;
> >>>>>>> +    struct vpci_bar *bars = data;
> >>>>>>> +
> >>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> >>>>>>> +        return;
> >>>>>>
> >>>>>> I don't think something like this can go uncommented. I don't think the
> >>>>>> spec mandates to drop writes in this situation?
> >>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
> >>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
> >>>>> such as the result of debugging with Roger in the previous version.
> >>>>> I will add the spec's sentences as comments here in next version.
> >>>>
> >>>> What you quote from the spec may not be enough as a comment here. There's
> >>>> no direct implication that the write would simply be dropped on the floor
> >>>> if the bit is still set. So I think you want to go a little beyond just
> >>>> quoting from the spec.
> >>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
> >>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
> >>
> >> That'll be better, but imo still not enough to explain the outright ignoring
> >> of the write.
> > 
> > I think we might want to do something along the lines of:
> > 
> > uint64_t size = PCI_REBAR_CTRL_SIZE(val);
> > struct vpci_bar *bar = data;
> > 
> > if ( bar->enabled )
> > {
> >     if ( size == bar->size )
> >         return;
> > 
> >     /*
> >      * Refuse to resize a BAR while memory decoding is enabled, as
> >      * otherwise the size of the mapped region in the p2m would become
> >      * stale with the newly set BAR size, and the position of the BAR
> >      * would be reset to undefined.  Note the PCIe specification also
> >      * forbids resizing a BAR with memory decoding enabled.
> >      */
> >     gprintk(XENLOG_ERR,
> >             "%pp: refuse to resize BAR with memory decoding enabled\n",
> > 	    &pci->sbdf);
> >     return;
> > }
> > 
> > Note this requires that the data parameter points to the BAR that
> > matches the ReBAR control register, this needs adjusting in
> > init_rebar().
> 
> SGTM.

One nit that I'm kind of unsure, is whether writing the ReBAR with the
same BAR size as it's currently set also causes the matching BAR
register to be reset.  I'm unsure whether dropping the write when size
== bar->size without logging a message is appropriate, as it could be
a change in behavior if the software expects the BAR position to be
reset.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10  9:54         ` Jan Beulich
  2024-12-10 11:25           ` Roger Pau Monné
@ 2024-12-11  6:18           ` Chen, Jiqian
  1 sibling, 0 replies; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  6:18 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné,
	Chen, Jiqian

On 2024/12/10 17:54, Jan Beulich wrote:
> On 10.12.2024 08:57, Chen, Jiqian wrote:
>> On 2024/12/10 15:17, Jan Beulich wrote:
>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>> On 02.12.2024 07:09, Jiqian Chen wrote: 
>>>>>> @@ -541,6 +542,16 @@
>>>>>>  #define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
>>>>>>  #define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
>>>>>>  
>>>>>> +/* Resizable BARs */
>>>>>> +#define PCI_REBAR_CAP		4	/* capability register */
>>>>>> +#define  PCI_REBAR_CAP_SIZES		0xFFFFFFF0  /* supported BAR sizes */
>>>>>
>>>>> Misra demands that this have a U suffix.
>>>> Do below PCI_REBAR_CTRL_BAR_IDX, PCI_REBAR_CTRL_NBAR_MASK and PCI_REBAR_CTRL_BAR_SIZE also need a U suffix?
>>>
>>> They may want to gain them for consistency, but they don't strictly need
>>> them. I wanted to say "See the rest of the file", but it looks like the
>>> file wasn't cleaned up yet Misra-wise.
>> Yes, I noticed that the rest of the file didn't add U suffix too.
>> So, I just need to add U suffixes for my new macros?
> 
> You only strictly need to add U to values with the top bit set.
Got it, thanks!

> 
>>>>>> +#define PCI_REBAR_CTRL		8	/* control register */
>>>>>> +#define  PCI_REBAR_CTRL_BAR_IDX	0x00000007  /* BAR index */
>>>>>> +#define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
>>>>>> +#define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
>>>>>> +#define  PCI_REBAR_CTRL_SIZE(v) \
>>>>>> +            (1UL << (MASK_EXTR(v, PCI_REBAR_CTRL_BAR_SIZE) + 20))
>>>>>
>>>>> The literal 20 (appearing here the 2nd time) also wants hiding behind a
>>>>> #define.
>>>> OK, will add " #define PCI_REBAR_SIZE_UNIT_BYTES_LEN 20" to replace above two '20' case.
>>>
>>> What is "UNIT_BYTES_LEN" there? There's nothing byte-ish here, I don't
>>> think, 20 is simply the shift bias.
>> It's a naming problem. What I want to express here is that the basic unit is 1MB, which is 2^20 of bytes.
>> Since the spec has the definition about the value of the bar size bits of register:
>> BAR Size - This is an encoded value.
>> 0	1 MB (2^20 bytes)
>> 1	2 MB (2^21 bytes)
>> 2	4 MB (2^22 bytes)
>> 3	8 MB (2^23 bytes)
>> …
>> 43	8 EB (2^63 bytes)
>> Do you have suggestion about this macro name?
> 
> PCI_REBAR_SIZE_BIAS? PCI_REBAR_SIZE_SHIFT_BIAS? PCI_REBAR_SIZE_SHIFT?
Will change to PCI_REBAR_SIZE_BIAS, thanks.

> 
> Jan

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:00   ` Roger Pau Monné
  2024-12-10 11:05     ` Jan Beulich
@ 2024-12-11  6:20     ` Chen, Jiqian
  1 sibling, 0 replies; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  6:20 UTC (permalink / raw)
  To: Roger Pau Monné, Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On 2024/12/10 19:00, Roger Pau Monné wrote:
> On Mon, Dec 09, 2024 at 02:59:31PM +0100, Jan Beulich wrote:
>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>> +static int cf_check init_rebar(struct pci_dev *pdev)
>>> +{
>>> +    uint32_t ctrl;
>>> +    unsigned int rebar_offset, nbars;
>>> +
>>> +    rebar_offset = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_REBAR);
>>> +
>>> +    if ( !rebar_offset )
>>> +        return 0;
>>> +
>>> +    ctrl = pci_conf_read32(pdev->sbdf, rebar_offset + PCI_REBAR_CTRL);
>>> +    nbars = MASK_EXTR(ctrl, PCI_REBAR_CTRL_NBAR_MASK);
>>> +
>>> +    for ( unsigned int i = 0; i < nbars; i++, rebar_offset += PCI_REBAR_CTRL )
>>> +    {
>>> +        int rc;
>>> +
>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, vpci_hw_write32,
>>> +                               rebar_offset + PCI_REBAR_CAP, 4, NULL);
>>
>> The capability register is r/o aiui. While permitting hwdom to write it is
>> fine, DomU-s shouldn't be permitted doing so, just in case. (An alternative
>> to making handler selection conditional here would be to bail early for the
>> !hwdom case, accompanied by a TODO comment. This would then also address
>> the lack of virtualization of the extended capability chain, as we may not
>> blindly expose all capabilities to DomU-s.)
> 
> I don't think we can safely expose this capability to domUs by
> default, so my preference would be a returning an error in that case
> (and printing a log message indicating ReBAR is not supported for
> domUs).

If I understand correctly.
I need to add below check in init_rebar():
    /* TODO */
    if ( !is_hardware_domain(pdev->domain) )
    {
        printk("ReBar is not supported for domUs\n");
        return -EOPNOTSUPP;
    }

> 
> Note it's already not exposed to domUs by not being part of
> supported_caps in init_header().
> 
> Regards, Roger

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:25           ` Roger Pau Monné
  2024-12-10 12:15             ` Jan Beulich
@ 2024-12-11  6:37             ` Chen, Jiqian
  2024-12-11  8:16               ` Roger Pau Monné
  2024-12-11  7:57             ` Chen, Jiqian
  2 siblings, 1 reply; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  6:37 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org, Chen, Jiqian

On 2024/12/10 19:25, Roger Pau Monné wrote:
> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
>> On 10.12.2024 08:57, Chen, Jiqian wrote:
>>> On 2024/12/10 15:17, Jan Beulich wrote:
>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>>>>>> +                                      unsigned int reg,
>>>>>>> +                                      uint32_t val,
>>>>>>> +                                      void *data)
>>>>>>> +{
>>>>>>> +    uint64_t size;
>>>>>>> +    unsigned int index;
>>>>>>> +    struct vpci_bar *bars = data;
>>>>>>> +
>>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>>>>>> +        return;
>>>>>>
>>>>>> I don't think something like this can go uncommented. I don't think the
>>>>>> spec mandates to drop writes in this situation?
>>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
>>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
>>>>> such as the result of debugging with Roger in the previous version.
>>>>> I will add the spec's sentences as comments here in next version.
>>>>
>>>> What you quote from the spec may not be enough as a comment here. There's
>>>> no direct implication that the write would simply be dropped on the floor
>>>> if the bit is still set. So I think you want to go a little beyond just
>>>> quoting from the spec.
>>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
>>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
>>
>> That'll be better, but imo still not enough to explain the outright ignoring
>> of the write.
> 
> I think we might want to do something along the lines of:
> 
> uint64_t size = PCI_REBAR_CTRL_SIZE(val);
> struct vpci_bar *bar = data;
> 
> if ( bar->enabled )
> {
>     if ( size == bar->size )
>         return;
> 
>     /*
>      * Refuse to resize a BAR while memory decoding is enabled, as
>      * otherwise the size of the mapped region in the p2m would become
>      * stale with the newly set BAR size, and the position of the BAR
>      * would be reset to undefined.  Note the PCIe specification also
>      * forbids resizing a BAR with memory decoding enabled.
>      */
>     gprintk(XENLOG_ERR,
>             "%pp: refuse to resize BAR with memory decoding enabled\n",
> 	    &pci->sbdf);
>     return;
> }
Thank you very much!

> 
> Note this requires that the data parameter points to the BAR that
> matches the ReBAR control register, this needs adjusting in
> init_rebar().
I think I can keep current implementation of init_rebar() and use bars[index] to get the corresponding BAR.

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:30 ` Roger Pau Monné
@ 2024-12-11  6:42   ` Chen, Jiqian
  0 siblings, 0 replies; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  6:42 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Jan Beulich,
	Julien Grall, Stefano Stabellini, Huang, Ray, Chen, Jiqian

On 2024/12/10 19:30, Roger Pau Monné wrote:
> On Mon, Dec 02, 2024 at 02:09:56PM +0800, Jiqian Chen wrote:
>> Some devices, like discrete GPU of amd, support resizable bar
>> capability, but vpci of Xen doesn't support this feature, so
>> they fail to resize bars and then cause probing failure.
>>
>> According to PCIe spec, each bar that supports resizing has
>> two registers, PCI_REBAR_CAP and PCI_REBAR_CTRL. So, add
>> handlers for them to support resizing the size of BARs.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>> Hi all,
>>
>> v2->v1 changes:
>> *In rebar_ctrl_write, to check if memory decoding is enabled, and added
>> some checks for the type of Bar.
>> *Added vpci_hw_write32 to handle PCI_REBAR_CAP's write, since there is
>> no write limitation of dom0.
>> *And has many other minor modifications as well.
>>
>> Best regards,
>> Jiqian Chen.
>> ---
>>  xen/drivers/vpci/Makefile  |  2 +-
>>  xen/drivers/vpci/rebar.c   | 93 ++++++++++++++++++++++++++++++++++++++
>>  xen/drivers/vpci/vpci.c    |  6 +++
>>  xen/include/xen/pci_regs.h | 11 +++++
>>  xen/include/xen/vpci.h     |  2 +
>>  5 files changed, 113 insertions(+), 1 deletion(-)
>>  create mode 100644 xen/drivers/vpci/rebar.c
>>
>> diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
>> index 1a1413b93e76..a7c8a30a8956 100644
>> --- a/xen/drivers/vpci/Makefile
>> +++ b/xen/drivers/vpci/Makefile
>> @@ -1,2 +1,2 @@
>> -obj-y += vpci.o header.o
>> +obj-y += vpci.o header.o rebar.o
>>  obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
>> diff --git a/xen/drivers/vpci/rebar.c b/xen/drivers/vpci/rebar.c
>> new file mode 100644
>> index 000000000000..156e8d337426
>> --- /dev/null
>> +++ b/xen/drivers/vpci/rebar.c
>> @@ -0,0 +1,93 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
>> + *
>> + * Author: Jiqian Chen <Jiqian.Chen@amd.com>
>> + */
>> +
>> +#include <xen/hypercall.h>
>> +#include <xen/vpci.h>
>> +
>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>> +                                      unsigned int reg,
>> +                                      uint32_t val,
>> +                                      void *data)
>> +{
>> +    uint64_t size;
>> +    unsigned int index;
>> +    struct vpci_bar *bars = data;
>> +
>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>> +        return;
>> +
>> +    index = pci_conf_read32(pdev->sbdf, reg) & PCI_REBAR_CTRL_BAR_IDX;
>> +    if ( index >= PCI_HEADER_NORMAL_NR_BARS )
>> +        return;
>> +
>> +    if ( bars[index].type != VPCI_BAR_MEM64_LO &&
>> +         bars[index].type != VPCI_BAR_MEM32 )
>> +        return;
> 
> For those early returns that don't propagate the write to the
> register, we need to log a message to notice the write has been
> dropped, otherwise there's no way to identify such cases.
Will add some warning logs in next version.

> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-10 11:25           ` Roger Pau Monné
  2024-12-10 12:15             ` Jan Beulich
  2024-12-11  6:37             ` Chen, Jiqian
@ 2024-12-11  7:57             ` Chen, Jiqian
  2024-12-11  8:25               ` Jan Beulich
  2024-12-11  8:40               ` Roger Pau Monné
  2 siblings, 2 replies; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  7:57 UTC (permalink / raw)
  To: Roger Pau Monné, Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On 2024/12/10 19:25, Roger Pau Monné wrote:
> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
>> On 10.12.2024 08:57, Chen, Jiqian wrote:
>>> On 2024/12/10 15:17, Jan Beulich wrote:
>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>>>> +        if ( rc )
>>>>>>> +        {
>>>>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>>>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>>>>>> +                               pdev->vpci->header.bars);
>>>>>>> +        if ( rc )
>>>>>>> +        {
>>>>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>> +            break;
>>>>>>
>>>>>> Is it correct to keep the other handler installed? After all ...
>>>>> Will change to "return rc;" here and above in next version.
>>>>
>>>> I'm not convinced this is what we want, as per ...
>>>>
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>
>>>>>> ... you - imo sensibly - aren't communicating the error back up (to allow
>>>>>> the device to be used without BAR resizing.
>>>>
>>>> ... what I said here.
>>> Sorry, I didn’t understand.
>>> Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?
>>
>> No, if you return an error here, nothing else needs doing. However, I
>> question that returning an error here is good or even necessary. In
>> the event of an error, the device ought to still be usable, just
>> without the BAR-resizing capability.
> 
> So you suggest that the capability should be hidden in that case?  We
> have logic to hide capabilities, just not used for the hardware
> domain.  It would need some extra wiring to be capable of hiding
> failed capabilities.
Can you give me a guidance on how to hide a failed capability?
What codes are current logic to hide capabilities?
Then maybe I can add a patch to implement it.

> 
> Regards, Roger.

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  6:37             ` Chen, Jiqian
@ 2024-12-11  8:16               ` Roger Pau Monné
  2024-12-11  9:36                 ` Chen, Jiqian
  0 siblings, 1 reply; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-11  8:16 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Wed, Dec 11, 2024 at 06:37:30AM +0000, Chen, Jiqian wrote:
> On 2024/12/10 19:25, Roger Pau Monné wrote:
> > On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
> >> On 10.12.2024 08:57, Chen, Jiqian wrote:
> >>> On 2024/12/10 15:17, Jan Beulich wrote:
> >>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
> >>>>> On 2024/12/9 21:59, Jan Beulich wrote:
> >>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
> >>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> >>>>>>> +                                      unsigned int reg,
> >>>>>>> +                                      uint32_t val,
> >>>>>>> +                                      void *data)
> >>>>>>> +{
> >>>>>>> +    uint64_t size;
> >>>>>>> +    unsigned int index;
> >>>>>>> +    struct vpci_bar *bars = data;
> >>>>>>> +
> >>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> >>>>>>> +        return;
> >>>>>>
> >>>>>> I don't think something like this can go uncommented. I don't think the
> >>>>>> spec mandates to drop writes in this situation?
> >>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
> >>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
> >>>>> such as the result of debugging with Roger in the previous version.
> >>>>> I will add the spec's sentences as comments here in next version.
> >>>>
> >>>> What you quote from the spec may not be enough as a comment here. There's
> >>>> no direct implication that the write would simply be dropped on the floor
> >>>> if the bit is still set. So I think you want to go a little beyond just
> >>>> quoting from the spec.
> >>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
> >>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
> >>
> >> That'll be better, but imo still not enough to explain the outright ignoring
> >> of the write.
> > 
> > I think we might want to do something along the lines of:
> > 
> > uint64_t size = PCI_REBAR_CTRL_SIZE(val);
> > struct vpci_bar *bar = data;
> > 
> > if ( bar->enabled )
> > {
> >     if ( size == bar->size )
> >         return;
> > 
> >     /*
> >      * Refuse to resize a BAR while memory decoding is enabled, as
> >      * otherwise the size of the mapped region in the p2m would become
> >      * stale with the newly set BAR size, and the position of the BAR
> >      * would be reset to undefined.  Note the PCIe specification also
> >      * forbids resizing a BAR with memory decoding enabled.
> >      */
> >     gprintk(XENLOG_ERR,
> >             "%pp: refuse to resize BAR with memory decoding enabled\n",
> > 	    &pci->sbdf);
> >     return;
> > }
> Thank you very much!
> 
> > 
> > Note this requires that the data parameter points to the BAR that
> > matches the ReBAR control register, this needs adjusting in
> > init_rebar().
> I think I can keep current implementation of init_rebar() and use bars[index] to get the corresponding BAR.

IMO it would be best if you can pass the corresponding bar struct into
the handler directly, as that will avoid having to do a PCI read just
to get the BAR index from PCI_REBAR_CTRL.  It should also avoid the
need for the index and BAR type checks in rebar_ctrl_write().

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  7:57             ` Chen, Jiqian
@ 2024-12-11  8:25               ` Jan Beulich
  2024-12-11  8:43                 ` Roger Pau Monné
  2024-12-11  8:40               ` Roger Pau Monné
  1 sibling, 1 reply; 27+ messages in thread
From: Jan Beulich @ 2024-12-11  8:25 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné

On 11.12.2024 08:57, Chen, Jiqian wrote:
> On 2024/12/10 19:25, Roger Pau Monné wrote:
>> So you suggest that the capability should be hidden in that case?  We
>> have logic to hide capabilities, just not used for the hardware
>> domain.  It would need some extra wiring to be capable of hiding
>> failed capabilities.
> Can you give me a guidance on how to hide a failed capability?
> What codes are current logic to hide capabilities?
> Then maybe I can add a patch to implement it.

It's really the other way around right now for "normal" capabilities:
We whitelist what we expose. See init_header()'s logic after checking
the PCI_STATUS_CAP_LIST bit. Actually past that block there's

        /* Extended capabilities read as zero, write ignore */
        rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
                               (void *)0);

I.e. no extended capabilities are exposed at all right now to DomU-s.
For Dom0 I guess we shouldn't use whitelisting, but the (extended)
capability list(s) would need similarly virtualizing to be able to
hide some.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  7:57             ` Chen, Jiqian
  2024-12-11  8:25               ` Jan Beulich
@ 2024-12-11  8:40               ` Roger Pau Monné
  2024-12-11  9:44                 ` Chen, Jiqian
  1 sibling, 1 reply; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-11  8:40 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Wed, Dec 11, 2024 at 07:57:29AM +0000, Chen, Jiqian wrote:
> On 2024/12/10 19:25, Roger Pau Monné wrote:
> > On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
> >> On 10.12.2024 08:57, Chen, Jiqian wrote:
> >>> On 2024/12/10 15:17, Jan Beulich wrote:
> >>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
> >>>>> On 2024/12/9 21:59, Jan Beulich wrote:
> >>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
> >>>>>>> +        if ( rc )
> >>>>>>> +        {
> >>>>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
> >>>>>>> +                   &pdev->sbdf, rc);
> >>>>>>> +            break;
> >>>>>>> +        }
> >>>>>>> +
> >>>>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
> >>>>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
> >>>>>>> +                               pdev->vpci->header.bars);
> >>>>>>> +        if ( rc )
> >>>>>>> +        {
> >>>>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
> >>>>>>> +                   &pdev->sbdf, rc);
> >>>>>>> +            break;
> >>>>>>
> >>>>>> Is it correct to keep the other handler installed? After all ...
> >>>>> Will change to "return rc;" here and above in next version.
> >>>>
> >>>> I'm not convinced this is what we want, as per ...
> >>>>
> >>>>>>> +        }
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    return 0;
> >>>>>>
> >>>>>> ... you - imo sensibly - aren't communicating the error back up (to allow
> >>>>>> the device to be used without BAR resizing.
> >>>>
> >>>> ... what I said here.
> >>> Sorry, I didn’t understand.
> >>> Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?
> >>
> >> No, if you return an error here, nothing else needs doing. However, I
> >> question that returning an error here is good or even necessary. In
> >> the event of an error, the device ought to still be usable, just
> >> without the BAR-resizing capability.
> > 
> > So you suggest that the capability should be hidden in that case?  We
> > have logic to hide capabilities, just not used for the hardware
> > domain.  It would need some extra wiring to be capable of hiding
> > failed capabilities.
> Can you give me a guidance on how to hide a failed capability?
> What codes are current logic to hide capabilities?

This was done by Stewart for the legacy PCI capabilities, but not
exactly to hide the capabilities that fail to init.  Take a look at
commit:

d830b0a7bc7e xen/vpci: header: filter PCI capabilities

However that was designed to expose a fixed set of capabilities,
always known when init_header() is executed.  If we want to hide
capabilities on failure we will need a bit more flexible interface I
think.

Ideally we would like to tie this to initialization hooks themselves,
so that in vpci_assign_device() an init function failing automatically
triggers the hiding of the failing capability.  That would need an
interface similar to:

#define REGISTER_VPCI_INIT(<capability id>, <function>, <priority>,
<pcie?>)

REGISTER_VPCI_INIT(PCI_CAP_ID_MSI, init_msi, VPCI_PRIORITY_LOW,
false);

And then in vpci_assign_device() any init function that has a
capability ID different than 0 and fails to initialize would lead to
the capability being masked.

It would be great to have an interface like this in place, because the
current error handling in vPCI is not great.  For the hardware domain
init functions failing will just lead to the device being fully
accessible by dom0 without any mediation.

Anyway, we don't do any of this for dom0 at the moment when MSI or
MSI-X fail to init, so I'm not sure it's fair to ask that you do this
for ReBAR.  It would be great if you want to, but it's not a trivial
amount of work.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  8:25               ` Jan Beulich
@ 2024-12-11  8:43                 ` Roger Pau Monné
  2024-12-11  8:53                   ` Jan Beulich
  0 siblings, 1 reply; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-11  8:43 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Wed, Dec 11, 2024 at 09:25:16AM +0100, Jan Beulich wrote:
> On 11.12.2024 08:57, Chen, Jiqian wrote:
> > On 2024/12/10 19:25, Roger Pau Monné wrote:
> >> So you suggest that the capability should be hidden in that case?  We
> >> have logic to hide capabilities, just not used for the hardware
> >> domain.  It would need some extra wiring to be capable of hiding
> >> failed capabilities.
> > Can you give me a guidance on how to hide a failed capability?
> > What codes are current logic to hide capabilities?
> > Then maybe I can add a patch to implement it.
> 
> It's really the other way around right now for "normal" capabilities:
> We whitelist what we expose. See init_header()'s logic after checking
> the PCI_STATUS_CAP_LIST bit. Actually past that block there's
> 
>         /* Extended capabilities read as zero, write ignore */
>         rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
>                                (void *)0);
> 
> I.e. no extended capabilities are exposed at all right now to DomU-s.
> For Dom0 I guess we shouldn't use whitelisting, but the (extended)
> capability list(s) would need similarly virtualizing to be able to
> hide some.

Given this capability is only to be exposed to the hw domain (at least
for now), I'm not sure it's fair to ask to add all this
infrastructure as part of adding the new capability.  It would be great
to have it, but doesn't seem fair when there's already MSI and MSI-X
implemented without such support.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  8:43                 ` Roger Pau Monné
@ 2024-12-11  8:53                   ` Jan Beulich
  2024-12-11 10:19                     ` Chen, Jiqian
  0 siblings, 1 reply; 27+ messages in thread
From: Jan Beulich @ 2024-12-11  8:53 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On 11.12.2024 09:43, Roger Pau Monné wrote:
> On Wed, Dec 11, 2024 at 09:25:16AM +0100, Jan Beulich wrote:
>> On 11.12.2024 08:57, Chen, Jiqian wrote:
>>> On 2024/12/10 19:25, Roger Pau Monné wrote:
>>>> So you suggest that the capability should be hidden in that case?  We
>>>> have logic to hide capabilities, just not used for the hardware
>>>> domain.  It would need some extra wiring to be capable of hiding
>>>> failed capabilities.
>>> Can you give me a guidance on how to hide a failed capability?
>>> What codes are current logic to hide capabilities?
>>> Then maybe I can add a patch to implement it.
>>
>> It's really the other way around right now for "normal" capabilities:
>> We whitelist what we expose. See init_header()'s logic after checking
>> the PCI_STATUS_CAP_LIST bit. Actually past that block there's
>>
>>         /* Extended capabilities read as zero, write ignore */
>>         rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
>>                                (void *)0);
>>
>> I.e. no extended capabilities are exposed at all right now to DomU-s.
>> For Dom0 I guess we shouldn't use whitelisting, but the (extended)
>> capability list(s) would need similarly virtualizing to be able to
>> hide some.
> 
> Given this capability is only to be exposed to the hw domain (at least
> for now), I'm not sure it's fair to ask to add all this
> infrastructure as part of adding the new capability.  It would be great
> to have it, but doesn't seem fair when there's already MSI and MSI-X
> implemented without such support.

Well, of course this can also be modeled after MSI/MSI-X, failing
assignment when initialization for a capability fails. Yet while for
MSI / MSI-X this feels okay-ish (considering that many devices now
can't even operate very well without either of the two), I'd expect
BAR resizing to not be something that drivers (typically) rely on.
"Typically" because iirc Jiqian said the AMD display driver actually
does.

Jan


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  8:16               ` Roger Pau Monné
@ 2024-12-11  9:36                 ` Chen, Jiqian
  2024-12-11  9:45                   ` Roger Pau Monné
  0 siblings, 1 reply; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  9:36 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org, Chen, Jiqian

On 2024/12/11 16:16, Roger Pau Monné wrote:
> On Wed, Dec 11, 2024 at 06:37:30AM +0000, Chen, Jiqian wrote:
>> On 2024/12/10 19:25, Roger Pau Monné wrote:
>>> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
>>>> On 10.12.2024 08:57, Chen, Jiqian wrote:
>>>>> On 2024/12/10 15:17, Jan Beulich wrote:
>>>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
>>>>>>>>> +                                      unsigned int reg,
>>>>>>>>> +                                      uint32_t val,
>>>>>>>>> +                                      void *data)
>>>>>>>>> +{
>>>>>>>>> +    uint64_t size;
>>>>>>>>> +    unsigned int index;
>>>>>>>>> +    struct vpci_bar *bars = data;
>>>>>>>>> +
>>>>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
>>>>>>>>> +        return;
>>>>>>>>
>>>>>>>> I don't think something like this can go uncommented. I don't think the
>>>>>>>> spec mandates to drop writes in this situation?
>>>>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
>>>>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
>>>>>>> such as the result of debugging with Roger in the previous version.
>>>>>>> I will add the spec's sentences as comments here in next version.
>>>>>>
>>>>>> What you quote from the spec may not be enough as a comment here. There's
>>>>>> no direct implication that the write would simply be dropped on the floor
>>>>>> if the bit is still set. So I think you want to go a little beyond just
>>>>>> quoting from the spec.
>>>>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
>>>>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
>>>>
>>>> That'll be better, but imo still not enough to explain the outright ignoring
>>>> of the write.
>>>
>>> I think we might want to do something along the lines of:
>>>
>>> uint64_t size = PCI_REBAR_CTRL_SIZE(val);
>>> struct vpci_bar *bar = data;
>>>
>>> if ( bar->enabled )
>>> {
>>>     if ( size == bar->size )
>>>         return;
>>>
>>>     /*
>>>      * Refuse to resize a BAR while memory decoding is enabled, as
>>>      * otherwise the size of the mapped region in the p2m would become
>>>      * stale with the newly set BAR size, and the position of the BAR
>>>      * would be reset to undefined.  Note the PCIe specification also
>>>      * forbids resizing a BAR with memory decoding enabled.
>>>      */
>>>     gprintk(XENLOG_ERR,
>>>             "%pp: refuse to resize BAR with memory decoding enabled\n",
>>> 	    &pci->sbdf);
>>>     return;
>>> }
>> Thank you very much!
>>
>>>
>>> Note this requires that the data parameter points to the BAR that
>>> matches the ReBAR control register, this needs adjusting in
>>> init_rebar().
>> I think I can keep current implementation of init_rebar() and use bars[index] to get the corresponding BAR.
> 
> IMO it would be best if you can pass the corresponding bar struct into
> the handler directly, as that will avoid having to do a PCI read just
> to get the BAR index from PCI_REBAR_CTRL.  It should also avoid the
> need for the index and BAR type checks in rebar_ctrl_write().
OK, if so, then I need to move the logic of getting index from PCI_REBAR_CTRL register and checking the BAR type into init_rebar(), right?

> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  8:40               ` Roger Pau Monné
@ 2024-12-11  9:44                 ` Chen, Jiqian
  0 siblings, 0 replies; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11  9:44 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org, Chen, Jiqian

On 2024/12/11 16:40, Roger Pau Monné wrote:
> On Wed, Dec 11, 2024 at 07:57:29AM +0000, Chen, Jiqian wrote:
>> On 2024/12/10 19:25, Roger Pau Monné wrote:
>>> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
>>>> On 10.12.2024 08:57, Chen, Jiqian wrote:
>>>>> On 2024/12/10 15:17, Jan Beulich wrote:
>>>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
>>>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
>>>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
>>>>>>>>> +        if ( rc )
>>>>>>>>> +        {
>>>>>>>>> +            printk("%pp: add register for PCI_REBAR_CAP failed (rc=%d)\n",
>>>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rebar_ctrl_write,
>>>>>>>>> +                               rebar_offset + PCI_REBAR_CTRL, 4,
>>>>>>>>> +                               pdev->vpci->header.bars);
>>>>>>>>> +        if ( rc )
>>>>>>>>> +        {
>>>>>>>>> +            printk("%pp: add register for PCI_REBAR_CTRL failed %d\n",
>>>>>>>>> +                   &pdev->sbdf, rc);
>>>>>>>>> +            break;
>>>>>>>>
>>>>>>>> Is it correct to keep the other handler installed? After all ...
>>>>>>> Will change to "return rc;" here and above in next version.
>>>>>>
>>>>>> I'm not convinced this is what we want, as per ...
>>>>>>
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return 0;
>>>>>>>>
>>>>>>>> ... you - imo sensibly - aren't communicating the error back up (to allow
>>>>>>>> the device to be used without BAR resizing.
>>>>>>
>>>>>> ... what I said here.
>>>>> Sorry, I didn’t understand.
>>>>> Do you mean it is not enough to return error code once a handler failed to be installed, I need to remove the already installed handlers?
>>>>
>>>> No, if you return an error here, nothing else needs doing. However, I
>>>> question that returning an error here is good or even necessary. In
>>>> the event of an error, the device ought to still be usable, just
>>>> without the BAR-resizing capability.
>>>
>>> So you suggest that the capability should be hidden in that case?  We
>>> have logic to hide capabilities, just not used for the hardware
>>> domain.  It would need some extra wiring to be capable of hiding
>>> failed capabilities.
>> Can you give me a guidance on how to hide a failed capability?
>> What codes are current logic to hide capabilities?
> 
> This was done by Stewart for the legacy PCI capabilities, but not
> exactly to hide the capabilities that fail to init.  Take a look at
> commit:
> 
> d830b0a7bc7e xen/vpci: header: filter PCI capabilities
> 
> However that was designed to expose a fixed set of capabilities,
> always known when init_header() is executed.  If we want to hide
> capabilities on failure we will need a bit more flexible interface I
> think.
> 
> Ideally we would like to tie this to initialization hooks themselves,
> so that in vpci_assign_device() an init function failing automatically
> triggers the hiding of the failing capability.  That would need an
> interface similar to:
> 
> #define REGISTER_VPCI_INIT(<capability id>, <function>, <priority>,
> <pcie?>)
> 
> REGISTER_VPCI_INIT(PCI_CAP_ID_MSI, init_msi, VPCI_PRIORITY_LOW,
> false);
> 
> And then in vpci_assign_device() any init function that has a
> capability ID different than 0 and fails to initialize would lead to
> the capability being masked.
> 
> It would be great to have an interface like this in place, because the
> current error handling in vPCI is not great.  For the hardware domain
> init functions failing will just lead to the device being fully
> accessible by dom0 without any mediation.
> 
> Anyway, we don't do any of this for dom0 at the moment when MSI or
> MSI-X fail to init, so I'm not sure it's fair to ask that you do this
> for ReBAR.  It would be great if you want to, but it's not a trivial
> amount of work.
Thanks!
It sounds like not easy to do.
But I will try to implement this if I have time.

> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  9:36                 ` Chen, Jiqian
@ 2024-12-11  9:45                   ` Roger Pau Monné
  0 siblings, 0 replies; 27+ messages in thread
From: Roger Pau Monné @ 2024-12-11  9:45 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Jan Beulich, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org

On Wed, Dec 11, 2024 at 09:36:00AM +0000, Chen, Jiqian wrote:
> On 2024/12/11 16:16, Roger Pau Monné wrote:
> > On Wed, Dec 11, 2024 at 06:37:30AM +0000, Chen, Jiqian wrote:
> >> On 2024/12/10 19:25, Roger Pau Monné wrote:
> >>> On Tue, Dec 10, 2024 at 10:54:43AM +0100, Jan Beulich wrote:
> >>>> On 10.12.2024 08:57, Chen, Jiqian wrote:
> >>>>> On 2024/12/10 15:17, Jan Beulich wrote:
> >>>>>> On 10.12.2024 08:07, Chen, Jiqian wrote:
> >>>>>>> On 2024/12/9 21:59, Jan Beulich wrote:
> >>>>>>>> On 02.12.2024 07:09, Jiqian Chen wrote:
> >>>>>>>>> +static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
> >>>>>>>>> +                                      unsigned int reg,
> >>>>>>>>> +                                      uint32_t val,
> >>>>>>>>> +                                      void *data)
> >>>>>>>>> +{
> >>>>>>>>> +    uint64_t size;
> >>>>>>>>> +    unsigned int index;
> >>>>>>>>> +    struct vpci_bar *bars = data;
> >>>>>>>>> +
> >>>>>>>>> +    if ( pci_conf_read16(pdev->sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY )
> >>>>>>>>> +        return;
> >>>>>>>>
> >>>>>>>> I don't think something like this can go uncommented. I don't think the
> >>>>>>>> spec mandates to drop writes in this situation?
> >>>>>>> Spec says: Software must clear the Memory Space Enable bit in the Command register before writing the BAR Size field.
> >>>>>>> This check is suggested by Roger and it really helps to prevent erroneous writes in this case,
> >>>>>>> such as the result of debugging with Roger in the previous version.
> >>>>>>> I will add the spec's sentences as comments here in next version.
> >>>>>>
> >>>>>> What you quote from the spec may not be enough as a comment here. There's
> >>>>>> no direct implication that the write would simply be dropped on the floor
> >>>>>> if the bit is still set. So I think you want to go a little beyond just
> >>>>>> quoting from the spec.
> >>>>> How about quoting Roger's previous words: " The memory decoding must be disabled before writing the BAR size field.
> >>>>> Otherwise changing the BAR size will lead to the active p2m mappings getting out of sync w.r.t. the new BAR size." ?
> >>>>
> >>>> That'll be better, but imo still not enough to explain the outright ignoring
> >>>> of the write.
> >>>
> >>> I think we might want to do something along the lines of:
> >>>
> >>> uint64_t size = PCI_REBAR_CTRL_SIZE(val);
> >>> struct vpci_bar *bar = data;
> >>>
> >>> if ( bar->enabled )
> >>> {
> >>>     if ( size == bar->size )
> >>>         return;
> >>>
> >>>     /*
> >>>      * Refuse to resize a BAR while memory decoding is enabled, as
> >>>      * otherwise the size of the mapped region in the p2m would become
> >>>      * stale with the newly set BAR size, and the position of the BAR
> >>>      * would be reset to undefined.  Note the PCIe specification also
> >>>      * forbids resizing a BAR with memory decoding enabled.
> >>>      */
> >>>     gprintk(XENLOG_ERR,
> >>>             "%pp: refuse to resize BAR with memory decoding enabled\n",
> >>> 	    &pci->sbdf);
> >>>     return;
> >>> }
> >> Thank you very much!
> >>
> >>>
> >>> Note this requires that the data parameter points to the BAR that
> >>> matches the ReBAR control register, this needs adjusting in
> >>> init_rebar().
> >> I think I can keep current implementation of init_rebar() and use bars[index] to get the corresponding BAR.
> > 
> > IMO it would be best if you can pass the corresponding bar struct into
> > the handler directly, as that will avoid having to do a PCI read just
> > to get the BAR index from PCI_REBAR_CTRL.  It should also avoid the
> > need for the index and BAR type checks in rebar_ctrl_write().
> OK, if so, then I need to move the logic of getting index from PCI_REBAR_CTRL register and checking the BAR type into init_rebar(), right?

Yes, I think that would be better, as then the check is done only once
at init rather than on every access.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11  8:53                   ` Jan Beulich
@ 2024-12-11 10:19                     ` Chen, Jiqian
  2024-12-11 10:25                       ` Jan Beulich
  0 siblings, 1 reply; 27+ messages in thread
From: Chen, Jiqian @ 2024-12-11 10:19 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Chen, Jiqian, Andrew Cooper, Julien Grall, Stefano Stabellini,
	Huang, Ray, xen-devel@lists.xenproject.org, Roger Pau Monné

On 2024/12/11 16:53, Jan Beulich wrote:
> On 11.12.2024 09:43, Roger Pau Monné wrote:
>> On Wed, Dec 11, 2024 at 09:25:16AM +0100, Jan Beulich wrote:
>>> On 11.12.2024 08:57, Chen, Jiqian wrote:
>>>> On 2024/12/10 19:25, Roger Pau Monné wrote:
>>>>> So you suggest that the capability should be hidden in that case?  We
>>>>> have logic to hide capabilities, just not used for the hardware
>>>>> domain.  It would need some extra wiring to be capable of hiding
>>>>> failed capabilities.
>>>> Can you give me a guidance on how to hide a failed capability?
>>>> What codes are current logic to hide capabilities?
>>>> Then maybe I can add a patch to implement it.
>>>
>>> It's really the other way around right now for "normal" capabilities:
>>> We whitelist what we expose. See init_header()'s logic after checking
>>> the PCI_STATUS_CAP_LIST bit. Actually past that block there's
>>>
>>>         /* Extended capabilities read as zero, write ignore */
>>>         rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
>>>                                (void *)0);
>>>
>>> I.e. no extended capabilities are exposed at all right now to DomU-s.
>>> For Dom0 I guess we shouldn't use whitelisting, but the (extended)
>>> capability list(s) would need similarly virtualizing to be able to
>>> hide some.
>>
>> Given this capability is only to be exposed to the hw domain (at least
>> for now), I'm not sure it's fair to ask to add all this
>> infrastructure as part of adding the new capability.  It would be great
>> to have it, but doesn't seem fair when there's already MSI and MSI-X
>> implemented without such support.
> 
> Well, of course this can also be modeled after MSI/MSI-X, failing
> assignment when initialization for a capability fails. Yet while for
> MSI / MSI-X this feels okay-ish (considering that many devices now
> can't even operate very well without either of the two), I'd expect
> BAR resizing to not be something that drivers (typically) rely on.
> "Typically" because iirc Jiqian said the AMD display driver actually
> does.
You mean what I said in last version? It is "amdgpu driver saves and restores the same pci state during initiazation without disabling memory decoding, that caused Roger's method not work".
And currently running amdgpu on Xen hypervisor has two scenarios, 
1. APU does not rely on ReBar capability, because APU's vram are all CPU accessible.
2. But for discrete GPU, they can't work based on current Xen codes, it needs resize Bar to make all vram CPU accessible. That is why I sent this patch to add Rebar support in Xen.

> 
> Jan

-- 
Best regards,
Jiqian Chen.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v2 1/1] vpci: Add resizable bar support
  2024-12-11 10:19                     ` Chen, Jiqian
@ 2024-12-11 10:25                       ` Jan Beulich
  0 siblings, 0 replies; 27+ messages in thread
From: Jan Beulich @ 2024-12-11 10:25 UTC (permalink / raw)
  To: Chen, Jiqian
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Huang, Ray,
	xen-devel@lists.xenproject.org, Roger Pau Monné

On 11.12.2024 11:19, Chen, Jiqian wrote:
> On 2024/12/11 16:53, Jan Beulich wrote:
>> On 11.12.2024 09:43, Roger Pau Monné wrote:
>>> On Wed, Dec 11, 2024 at 09:25:16AM +0100, Jan Beulich wrote:
>>>> On 11.12.2024 08:57, Chen, Jiqian wrote:
>>>>> On 2024/12/10 19:25, Roger Pau Monné wrote:
>>>>>> So you suggest that the capability should be hidden in that case?  We
>>>>>> have logic to hide capabilities, just not used for the hardware
>>>>>> domain.  It would need some extra wiring to be capable of hiding
>>>>>> failed capabilities.
>>>>> Can you give me a guidance on how to hide a failed capability?
>>>>> What codes are current logic to hide capabilities?
>>>>> Then maybe I can add a patch to implement it.
>>>>
>>>> It's really the other way around right now for "normal" capabilities:
>>>> We whitelist what we expose. See init_header()'s logic after checking
>>>> the PCI_STATUS_CAP_LIST bit. Actually past that block there's
>>>>
>>>>         /* Extended capabilities read as zero, write ignore */
>>>>         rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
>>>>                                (void *)0);
>>>>
>>>> I.e. no extended capabilities are exposed at all right now to DomU-s.
>>>> For Dom0 I guess we shouldn't use whitelisting, but the (extended)
>>>> capability list(s) would need similarly virtualizing to be able to
>>>> hide some.
>>>
>>> Given this capability is only to be exposed to the hw domain (at least
>>> for now), I'm not sure it's fair to ask to add all this
>>> infrastructure as part of adding the new capability.  It would be great
>>> to have it, but doesn't seem fair when there's already MSI and MSI-X
>>> implemented without such support.
>>
>> Well, of course this can also be modeled after MSI/MSI-X, failing
>> assignment when initialization for a capability fails. Yet while for
>> MSI / MSI-X this feels okay-ish (considering that many devices now
>> can't even operate very well without either of the two), I'd expect
>> BAR resizing to not be something that drivers (typically) rely on.
>> "Typically" because iirc Jiqian said the AMD display driver actually
>> does.
> You mean what I said in last version?

Yes.

Jan

> It is "amdgpu driver saves and restores the same pci state during initiazation without disabling memory decoding, that caused Roger's method not work".
> And currently running amdgpu on Xen hypervisor has two scenarios, 
> 1. APU does not rely on ReBar capability, because APU's vram are all CPU accessible.
> 2. But for discrete GPU, they can't work based on current Xen codes, it needs resize Bar to make all vram CPU accessible. That is why I sent this patch to add Rebar support in Xen.
> 
>>
>> Jan
> 



^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2024-12-11 10:25 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-02  6:09 [PATCH v2 1/1] vpci: Add resizable bar support Jiqian Chen
2024-12-09 13:59 ` Jan Beulich
2024-12-10  7:07   ` Chen, Jiqian
2024-12-10  7:17     ` Jan Beulich
2024-12-10  7:57       ` Chen, Jiqian
2024-12-10  9:54         ` Jan Beulich
2024-12-10 11:25           ` Roger Pau Monné
2024-12-10 12:15             ` Jan Beulich
2024-12-10 12:54               ` Roger Pau Monné
2024-12-11  6:37             ` Chen, Jiqian
2024-12-11  8:16               ` Roger Pau Monné
2024-12-11  9:36                 ` Chen, Jiqian
2024-12-11  9:45                   ` Roger Pau Monné
2024-12-11  7:57             ` Chen, Jiqian
2024-12-11  8:25               ` Jan Beulich
2024-12-11  8:43                 ` Roger Pau Monné
2024-12-11  8:53                   ` Jan Beulich
2024-12-11 10:19                     ` Chen, Jiqian
2024-12-11 10:25                       ` Jan Beulich
2024-12-11  8:40               ` Roger Pau Monné
2024-12-11  9:44                 ` Chen, Jiqian
2024-12-11  6:18           ` Chen, Jiqian
2024-12-10 11:00   ` Roger Pau Monné
2024-12-10 11:05     ` Jan Beulich
2024-12-11  6:20     ` Chen, Jiqian
2024-12-10 11:30 ` Roger Pau Monné
2024-12-11  6:42   ` Chen, Jiqian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.