From: Huang Rui <ray.huang@amd.com>
To: "Kuehling, Felix" <Felix.Kuehling@amd.com>
Cc: "amd-gfx@lists.freedesktop.org" <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH v4 2/2] drm/amdkfd: force raven as "dgpu" path (v4)
Date: Fri, 21 Aug 2020 11:53:17 +0800 [thread overview]
Message-ID: <20200821035317.GA337061@hr-amd> (raw)
In-Reply-To: <f266f414-94a2-795f-bcd4-2076eba38043@amd.com>
On Fri, Aug 21, 2020 at 10:41:17AM +0800, Kuehling, Felix wrote:
>
> Am 2020-08-20 um 4:40 a.m. schrieb Huang Rui:
> > We still have a few iommu issues which need to address, so force raven
> > as "dgpu" path for the moment.
> >
> > This is to add the fallback path to bypass IOMMU if IOMMU v2 is disabled
> > or ACPI CRAT table not correct.
> >
> > v2: Use ignore_crat parameter to decide whether it will go with IOMMUv2.
> > v3: Align with existed thunk, don't change the way of raven, only renoir
> > will use "dgpu" path by default.
> > v4: don't update global ignore_crat in the driver, and revise fallback
> > function if CRAT is broken.
> >
> > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++-
> > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 23 +++++++++++++++++++++--
> > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 ++++-
> > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 10 ++++++++--
> > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 20 ++++++++++++++++++++
> > 5 files changed, 57 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index a9a4319c24ae..189f9d7e190d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -684,11 +684,14 @@ MODULE_PARM_DESC(debug_largebar,
> > * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
> > * table to get information about AMD APUs. This option can serve as a workaround on
> > * systems with a broken CRAT table.
> > + *
> > + * Default is auto (according to asic type, iommu_v2, and crat table, to decide
> > + * whehter use CRAT)
> > */
> > int ignore_crat;
> > module_param(ignore_crat, int, 0444);
> > MODULE_PARM_DESC(ignore_crat,
> > - "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
> > + "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
> >
> > /**
> > * DOC: halt_if_hws_hang (int)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > index 59557e3e206a..a17cfc290072 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > @@ -22,6 +22,7 @@
> >
> > #include <linux/pci.h>
> > #include <linux/acpi.h>
> > +#include <asm/processor.h>
> > #include "kfd_crat.h"
> > #include "kfd_priv.h"
> > #include "kfd_topology.h"
> > @@ -740,6 +741,25 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
> > return 0;
> > }
> >
> > +
> > +#ifdef CONFIG_ACPI
> > +
> > +bool kfd_ignore_crat(void)
> > +{
> > + bool ret;
> > +
> > + if (ignore_crat)
> > + return true;
> > +
> > +#ifndef KFD_SUPPORT_IOMMU_V2
> > + ret = true;
> > +#else
> > + ret = false;
> > +#endif
> > +
> > + return ret;
> > +}
> > +
> > /*
> > * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
> > * copies CRAT from ACPI (if available).
> > @@ -751,7 +771,6 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
> > *
> > * Return 0 if successful else return error code
> > */
> > -#ifdef CONFIG_ACPI
> > int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
> > {
> > struct acpi_table_header *crat_table;
> > @@ -775,7 +794,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
> > return -EINVAL;
> > }
> >
> > - if (ignore_crat) {
> > + if (kfd_ignore_crat()) {
> > pr_info("CRAT table disabled by module option\n");
> > return -ENODATA;
> > }
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index 2c030c2b5b8d..fdf64d361be3 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -112,6 +112,7 @@ static const struct kfd_device_info carrizo_device_info = {
> > .num_xgmi_sdma_engines = 0,
> > .num_sdma_queues_per_engine = 2,
> > };
> > +#endif
> >
> > static const struct kfd_device_info raven_device_info = {
> > .asic_family = CHIP_RAVEN,
> > @@ -130,7 +131,6 @@ static const struct kfd_device_info raven_device_info = {
> > .num_xgmi_sdma_engines = 0,
> > .num_sdma_queues_per_engine = 2,
> > };
> > -#endif
> >
> > static const struct kfd_device_info hawaii_device_info = {
> > .asic_family = CHIP_HAWAII,
> > @@ -688,6 +688,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> > goto gws_error;
> > }
> >
> > + /* If CRAT is broken, won't set iommu enabled */
> > + kfd_double_confirm_iommu_support(kfd);
> > +
> > if (kfd_iommu_device_init(kfd)) {
> > dev_err(kfd_device, "Error initializing iommuv2\n");
> > goto device_iommu_error;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > index 82f955750e75..5b70fbe429f1 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > @@ -308,12 +308,14 @@ struct kfd_dev {
> >
> > /* xGMI */
> > uint64_t hive_id;
> > -
> > /* UUID */
> > uint64_t unique_id;
> >
> > bool pci_atomic_requested;
> >
> > + /* Use IOMMU v2 flag */
> > + bool use_iommu_v2;
> > +
> > /* SRAM ECC flag */
> > atomic_t sram_ecc_flag;
> >
> > @@ -1009,6 +1011,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
> > struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
> > int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
> > int kfd_numa_node_to_apic_id(int numa_node_id);
> > +void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
> >
> > /* Interrupts */
> > int kfd_interrupt_init(struct kfd_dev *dev);
> > @@ -1232,9 +1235,12 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
> > #endif
> > }
> >
> > +bool kfd_ignore_crat(void);
> > +
> > static inline bool kfd_device_use_iommu_v2(const struct kfd_dev *dev)
> > {
> > - return dev && dev->device_info->needs_iommu_device;
> > + return !kfd_ignore_crat() && dev && dev->use_iommu_v2 &&
> > + dev->device_info->needs_iommu_device;
>
> I think this could now be simplified:
>
> return dev && dev->use_iommu_v2;
>
> So maybe you don't need this function any more.
In Renoir, if ACPI CRAT from SBIOS is good, we may still use
dev->device_info->needs_iommu_device to confirm whether we should go dGPU.
>
>
> > }
> >
> > /* Debugfs */
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > index 4b29815e9205..8907b5317103 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > @@ -1579,6 +1579,26 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
> > return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
> > }
> >
> > +void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
> > +{
> > + struct kfd_topology_device *dev;
> > +
> > + unsigned temp = 0;
> > +
> > + down_read(&topology_lock);
> > +
> > + /* The cpu_cores_count and simd_count aren't zero at the same time in
> > + * APU node.
> > + */
> > + list_for_each_entry(dev, &topology_device_list, list)
> > + temp |= dev->node_props.cpu_cores_count *
> > + dev->node_props.simd_count;
>
> You shouldn't look at all GPUs, only at the GPU currently being
> initialized. Otherwise all your dGPUs in an APU system will also have
> use_iommu_v2 == true, which would be confusing.
>
> I'd do this in kfd_assign_gpu, because at that point you have access to
> the kfd_topology_device and the kfd_dev at the same time without having
> to add another loop.
>
Actually, I follow your comment to do it like this, however, we have to set
the use_iommu_v2 before kfd_iommu_device_init(). kfd_assign_gpu in kfd_topology_add_device()
is a little late.
Thanks,
Ray
> ...
> list_for_each_entry(dev, &topology_device_list, list) {
> /* Discrete GPUs need their own topology device list
> * entries. Don't assign them to CPU/APU nodes.
> */
> if (!gpu->device_info->needs_iommu_device &&
> dev->node_props.cpu_cores_count)
> continue;
>
> if (!dev->gpu && (dev->node_props.simd_count > 0)) {
> + if (dev->node_props.cpu_cores_count)
> + dev->use_iommu_v2 = true;
> ...
>
> Regards,
> Felix
>
>
> > +
> > + up_read(&topology_lock);
> > +
> > + gpu->use_iommu_v2 = temp ? true : false;
> > +}
> > +
> > #if defined(CONFIG_DEBUG_FS)
> >
> > int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
next prev parent reply other threads:[~2020-08-21 3:53 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-20 8:40 [PATCH v4 1/2] drm/amdkfd: abstract the iommu device checking with ignore_crat (v2) Huang Rui
2020-08-20 8:40 ` [PATCH v4 2/2] drm/amdkfd: force raven as "dgpu" path (v4) Huang Rui
2020-08-21 2:41 ` Felix Kuehling
2020-08-21 3:53 ` Huang Rui [this message]
2020-08-21 4:20 ` Felix Kuehling
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200821035317.GA337061@hr-amd \
--to=ray.huang@amd.com \
--cc=Felix.Kuehling@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.