qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>,
	qemu-devel@nongnu.org, david@gibson.dropbear.id.au
Cc: qemu-ppc@nongnu.org, agraf@suse.de, mdroth@linux.vnet.ibm.com
Subject: Re: [Qemu-devel] [PATCH v5 3/4] spapr_pci: enumerate and add PCI device tree
Date: Mon, 25 May 2015 19:51:27 +1000	[thread overview]
Message-ID: <5562F09F.3000808@ozlabs.ru> (raw)
In-Reply-To: <87d21p8du8.fsf@abhimanyu.in.ibm.com>

On 05/25/2015 02:45 PM, Nikunj A Dadhania wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>
>> On 05/19/2015 06:26 PM, Nikunj A Dadhania wrote:
>>> All the PCI enumeration and device node creation was off-loaded to
>>> SLOF. With PCI hotplug support, code needed to be added to add device
>>> node. This creates multiple copy of the code one in SLOF and other in
>>> hotplug code. To unify this, the patch adds the pci device node
>>> creation in Qemu. For backward compatibility, a flag
>>> "qemu,phb-enumerated" is added to the phb, suggesting to SLOF to not
>>> do device node creation.
>>>
>>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>>> [ Squashed Michael's drc_index changes ]
>>> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>>> ---
>>>    hw/ppc/spapr_pci.c | 188 ++++++++++++++++++++++++++++++++++++++++++-----------
>>>    1 file changed, 150 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 8b02a3e..12f1b9c 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -23,6 +23,7 @@
>>>     * THE SOFTWARE.
>>>     */
>>>    #include "hw/hw.h"
>>> +#include "hw/sysbus.h"
>>>    #include "hw/pci/pci.h"
>>>    #include "hw/pci/msi.h"
>>>    #include "hw/pci/msix.h"
>>> @@ -35,6 +36,7 @@
>>>    #include "qemu/error-report.h"
>>>    #include "qapi/qmp/qerror.h"
>>>
>>> +#include "hw/pci/pci_bridge.h"
>>>    #include "hw/pci/pci_bus.h"
>>>    #include "hw/ppc/spapr_drc.h"
>>>    #include "sysemu/device_tree.h"
>>> @@ -742,6 +744,31 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>        return &phb->iommu_as;
>>>    }
>>>
>>> +
>>> +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>>> +                                               PCIDevice *pdev)
>>> +{
>>> +    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>>> +    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>>> +                                    (phb->index << 16) |
>>> +                                    (busnr << 8) |
>>> +                                    pdev->devfn);
>>> +}
>>> +
>>> +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
>>> +                                            PCIDevice *pdev)
>>> +{
>>> +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
>>> +    sPAPRDRConnectorClass *drck;
>>> +
>>> +    if (!drc) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>>> +    return drck->get_index(drc);
>>> +}
>>> +
>>>    /* Macros to operate with address in OF binding to PCI */
>>>    #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
>>>    #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
>>> @@ -879,12 +906,13 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
>>>    }
>>>
>>>    static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>> -                                       int phb_index, int drc_index,
>>> +                                       sPAPRPHBState *sphb,
>>>                                           const char *drc_name)
>>>    {
>>>        ResourceProps rp;
>>>        bool is_bridge = false;
>>>        int pci_status;
>>> +    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
>>
>> Is this drc_index any different from the one which used to be passed to
>> this function? If no, then I do not see the point in changing the prototype
>> (or make another "this just makes code easier/nicer" patch).
>
> Its the same, I can have a separate patch. As I was changing this code
> the drc_index would need to be read in boot and hotplug code. So brought
> over the code here.
>
>> If yes, then it would be nice to see what the patch changed in this
>> regard in the commit log.
>>
>>
>>
>>>        if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
>>>            PCI_HEADER_TYPE_BRIDGE) {
>>> @@ -945,8 +973,13 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>>         * processed by OF beforehand
>>>         */
>>>        _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
>>> -    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
>>> -    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>>> +    if (drc_name) {
>>> +        _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name,
>>> +                         strlen(drc_name)));
>>> +    }
>>> +    if (drc_index) {
>>> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>>> +    }
>>>
>>>        _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>>>                              RESOURCE_CELLS_ADDRESS));
>>> @@ -963,30 +996,34 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>>        return 0;
>>>    }
>>>
>>> +typedef struct sPAPRFDT {
>>> +    void *fdt;
>>> +    int node_off;
>>> +    sPAPRPHBState *sphb;
>>> +} sPAPRFDT;
>>> +
>>>    /* create OF node for pci device and required OF DT properties */
>>> -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
>>> -                                       int drc_index, const char *drc_name,
>>> -                                       int *dt_offset)
>>> +static int spapr_create_pci_child_dt(PCIDevice *pdev, sPAPRFDT *p,
>>> +                                     const char *drc_name)
>>
>> Why s/dev/pdev/?
>
> PCIDev thats the only reason.


In this file, PCIDevice is called "dev", "pdev", "d", "pci_dev" so if you 
really want to change the name - do it once and for all occurrences OR do 
not do this at all :)


>
>>
>>
>>>    {
>>> -    void *fdt;
>>> -    int offset, ret, fdt_size;
>>> -    int slot = PCI_SLOT(dev->devfn);
>>> -    int func = PCI_FUNC(dev->devfn);
>>> -    char nodename[512];
>>> +    int offset, ret;
>>> +    char nodename[64];
>>
>> Why s/512/64/?
>
> Earlier this was called in recursion, so there was a comment in previous
> series to reduce this to lesser number.

I'd make a separate patch with s/512/64/ and also do s/sprintf/snprintf/ below.



>> This change and the one above hide what the patch really does to
>> spapr_create_pci_child_dt.
>>
>>
>>> +    int slot = PCI_SLOT(pdev->devfn);
>>> +    int func = PCI_FUNC(pdev->devfn);
>>>
>>> -    fdt = create_device_tree(&fdt_size);
>>>        if (func != 0) {
>>>            sprintf(nodename, "pci@%d,%d", slot, func);
>>>        } else {
>>>            sprintf(nodename, "pci@%d", slot);
>>>        }
>>> -    offset = fdt_add_subnode(fdt, 0, nodename);
>>> -    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
>>> +    offset = fdt_add_subnode(p->fdt, p->node_off, nodename);
>>> +    ret = spapr_populate_pci_child_dt(pdev, p->fdt, offset, p->sphb,
>>>                                          drc_name);
>>>        g_assert(!ret);
>>> -
>>> -    *dt_offset = offset;
>>> -    return fdt;
>>> +    if (ret) {
>>> +        return 0;
>>> +    }
>>> +    return offset;
>>>    }
>>>
>>>    static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>> @@ -996,24 +1033,26 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>>    {
>>>        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>>>        DeviceState *dev = DEVICE(pdev);
>>> -    int drc_index = drck->get_index(drc);
>>>        const char *drc_name = drck->get_name(drc);
>>> -    void *fdt = NULL;
>>> -    int fdt_start_offset = 0;
>>> +    int fdt_start_offset = 0, fdt_size;
>>> +    sPAPRFDT s_fdt = {NULL, 0, NULL};
>>>
>>> -    /* boot-time devices get their device tree node created by SLOF, but for
>>> -     * hotplugged devices we need QEMU to generate it so the guest can fetch
>>> -     * it via RTAS
>>> -     */
>>>        if (dev->hotplugged) {
>>
>>
>> I understand the patch is not changing this but still while we are here -
>> spapr_phb_add_pci_device() is only called from spapr_phb_hot_plug_child(),
>> how can dev->hotplugged be not true in this function (if it cannot, you
>> could get rid of "out:"?
>
> It gets called even when the devices are added during boot.

Where else? I did grep and see just one call:

hw/ppc/spapr_pci.c:1087:static void 
spapr_phb_add_pci_device(sPAPRDRConnector *drc,
hw/ppc/spapr_pci.c:1166:    spapr_phb_add_pci_device(drc, phb, pdev, 
&local_err);



>
>>
>>> -        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
>>> -                                        &fdt_start_offset);
>>> +        s_fdt.fdt = create_device_tree(&fdt_size);
>>> +        s_fdt.sphb = phb;
>>> +        s_fdt.node_off = 0;
>>> +        fdt_start_offset = spapr_create_pci_child_dt(pdev, &s_fdt, drc_name);
>>> +        if (!fdt_start_offset) {
>>> +            error_setg(errp, "Failed to create pci child device tree node");
>>> +            goto out;
>>> +        }
>>>        }
>>>
>>>        drck->attach(drc, DEVICE(pdev),
>>> -                 fdt, fdt_start_offset, !dev->hotplugged, errp);
>>> +                 s_fdt.fdt, fdt_start_offset, !dev->hotplugged, errp);
>>> +out:
>>>        if (*errp) {
>>> -        g_free(fdt);
>>> +        g_free(s_fdt.fdt);
>>>        }
>>>    }
>>>
>>> @@ -1043,16 +1082,6 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
>>>        drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
>>>    }
>>>
>>> -static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>>> -                                               PCIDevice *pdev)
>>
>> Just adding forward declaration would make the patch shorter.
>
> Yes, I can do that.
>
>>
>>> -{
>>> -    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>>> -    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>>> -                                    (phb->index << 16) |
>>> -                                    (busnr << 8) |
>>> -                                    pdev->devfn);
>>> -}
>>> -
>>>    static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
>>>                                         DeviceState *plugged_dev, Error **errp)
>>>    {
>>> @@ -1482,6 +1511,75 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
>>>        return PCI_HOST_BRIDGE(dev);
>>>    }
>>>
>>> +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
>>> +                                          void *opaque)
>>> +{
>>> +    PCIBus *sec_bus;
>>> +    sPAPRFDT *p = opaque;
>>> +    int offset;
>>> +    sPAPRFDT s_fdt;
>>> +
>>> +    offset = spapr_create_pci_child_dt(pdev, p, NULL);
>>> +    if (!offset) {
>>> +        error_report("Failed to create pci child device tree node");
>>> +        return;
>>> +    }
>>> +
>>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
>>> +         PCI_HEADER_TYPE_BRIDGE)) {
>>> +        return;
>>> +    }
>>> +
>>> +    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>>> +    if (!sec_bus) {
>>> +        return;
>>> +    }
>>> +
>>> +    s_fdt.fdt = p->fdt;
>>> +    s_fdt.node_off = offset;
>>> +    s_fdt.sphb = p->sphb;
>>> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>> +                        spapr_populate_pci_devices_dt,
>>> +                        &s_fdt);
>>> +}
>>> +
>>> +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
>>> +                                           void *opaque)
>>> +{
>>> +    unsigned int *bus_no = opaque;
>>> +    unsigned int primary = *bus_no;
>>> +    unsigned int secondary;
>>> +    unsigned int subordinate = 0xff;
>>> +
>>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) ==
>>> +         PCI_HEADER_TYPE_BRIDGE)) {
>>
>>
>> s/==/!=/ and "return" and no need in extra indent below.
>
> Right.
>
>>
>>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>>> +        secondary = *bus_no + 1;
>>
>>
>> (*bus_no)++;
>> secondary = *bus_no;
>>
>> and remove "bus_no = *bus_no + 1" below?
>> In fact, I do not need much sense in having "secondary" variable in this
>> function.
>>
>>> +        pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
>>> +        pci_default_write_config(pdev, PCI_SECONDARY_BUS, secondary, 1);
>>> +        pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, secondary, 1);
>>> +        *bus_no = *bus_no + 1;
>>> +        if (sec_bus) {
>>
>> same here? Just like you did in spapr_populate_pci_devices_dt(). I do not
>> insist though. But having less scopes just makes it easier/nicer to wrap
>> long lines in QEMU coding style (new line starts under "(").
>>
>>
>>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
>>> +            pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>> +                                spapr_phb_pci_enumerate_bridge,
>>> +                                bus_no);
>>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
>>> +{
>>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>> +    unsigned int bus_no = 0;
>>> +
>>> +    pci_for_each_device(bus, pci_bus_num(bus),
>>> +                        spapr_phb_pci_enumerate_bridge,
>>> +                        &bus_no);
>>> +
>>> +}
>>> +
>>>    int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>                              uint32_t xics_phandle,
>>>                              void *fdt)
>>> @@ -1521,6 +1619,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>            cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
>>>        uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
>>>        sPAPRTCETable *tcet;
>>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>> +    sPAPRFDT s_fdt;
>>>
>>>        /* Start populating the FDT */
>>>        sprintf(nodename, "pci@%" PRIx64, phb->buid);
>>> @@ -1570,6 +1670,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>                     tcet->liobn, tcet->bus_offset,
>>>                     tcet->nb_table << tcet->page_shift);
>>>
>>> +    /* Walk the bridges and program the bus numbers*/
>>> +    spapr_phb_pci_enumerate(phb);
>>> +    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
>>
>>
>> Can we also add a hack here to scan for the "qemu,phb-enumerated" string in
>> the SLOF bin image?
>
> Really ? That would be ugly.


Well, chances that the binary image will have this line by accident are zero.

And I spent quite some time debugging SRIOV + VFIO when I realized that 
SLOF is old on the test machine where others used to debug too. It would be 
really nice to have a warning that something is wrong. May be extend 
"client-architecture-support" somehow or have some release/date signature 
in known place in SLOF... Thomas (?) also asked for this :)



>
>>
>>> +
>>> +    /* Populate tree nodes with PCI devices attached */
>>> +    s_fdt.fdt = fdt;
>>> +    s_fdt.node_off = bus_off;
>>> +    s_fdt.sphb = phb;
>>> +    pci_for_each_device(bus, pci_bus_num(bus),
>>> +                        spapr_populate_pci_devices_dt,
>>> +                        &s_fdt);
>>> +
>>>        ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
>>>                                    SPAPR_DR_CONNECTOR_TYPE_PCI);
>>>        if (ret) {
>>>




-- 
Alexey

  reply	other threads:[~2015-05-25  9:59 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-19  8:25 [Qemu-devel] [PATCH v5 0/4] spapr_pci: DT field fixes and PCI DT node creation in QEMU Nikunj A Dadhania
2015-05-19  8:25 ` [Qemu-devel] [PATCH v5 1/4] spapr_pci: encode missing 64-bit memory address space Nikunj A Dadhania
2015-05-19  8:26 ` [Qemu-devel] [PATCH v5 2/4] spapr_pci: encode class code including Prog IF register Nikunj A Dadhania
2015-05-19  8:26 ` [Qemu-devel] [PATCH v5 3/4] spapr_pci: enumerate and add PCI device tree Nikunj A Dadhania
2015-05-24 11:05   ` Alexey Kardashevskiy
2015-05-25  4:45     ` Nikunj A Dadhania
2015-05-25  9:51       ` Alexey Kardashevskiy [this message]
2015-05-25 10:23         ` Nikunj A Dadhania
2015-05-26  0:34           ` David Gibson
2015-05-19  8:26 ` [Qemu-devel] [PATCH v5 4/4] spapr_pci: populate ibm,loc-code Nikunj A Dadhania
2015-05-24 11:24   ` Alexey Kardashevskiy
2015-05-25  4:58     ` Nikunj A Dadhania
2015-05-25 10:06       ` Alexey Kardashevskiy
2015-05-25 10:27         ` Nikunj A Dadhania

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5562F09F.3000808@ozlabs.ru \
    --to=aik@ozlabs.ru \
    --cc=agraf@suse.de \
    --cc=david@gibson.dropbear.id.au \
    --cc=mdroth@linux.vnet.ibm.com \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).