* [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
@ 2023-03-28 18:36 Dan Williams
2023-03-29 5:22 ` Gregory Price
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Dan Williams @ 2023-03-28 18:36 UTC (permalink / raw)
To: linux-cxl
Cc: vishal.l.verma, ira.weiny, dave.jiang, alison.schofield,
Jonathan.Cameron
The find_cxl_root() helper is used to lookup root decoders and other CXL
platform topology information for a given endpoint. It turns out that
for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
is always NULL for the RCH topology case because it expects to find a
cxl_port at the host-bridge. RCH topologies only have the root cxl_port
object with the host-bridge as a dport. While there are no reports of
this being a problem to date, by inspection region enumeration should
crash as a result of this problem, and it does in a local unit test for
this scenario.
However, an observation that ever since:
commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
...all callers of find_cxl_root() occur after the memdev connection to
the port topology has been established. That means that find_cxl_root()
can be simplified to a walk of the endpoint port topology to the root.
Switch to that arrangement which also fixes the RCD bug.
Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/cxl/core/pmem.c | 6 +++---
drivers/cxl/core/port.c | 38 +++++++-------------------------------
drivers/cxl/core/region.c | 2 +-
drivers/cxl/cxl.h | 4 ++--
drivers/cxl/port.c | 2 +-
5 files changed, 14 insertions(+), 38 deletions(-)
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index c2e4b1093788..f8c38d997252 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
{
- struct cxl_port *port = find_cxl_root(start);
+ struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
struct device *dev;
if (!port)
@@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
struct device *dev;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
if (!cxl_nvb)
return -ENODEV;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 8ee6b6e2e2a4..4d1f9c5b5029 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev)
return false;
}
-/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
-static int match_root_child(struct device *dev, const void *match)
+struct cxl_port *find_cxl_root(struct cxl_port *port)
{
- const struct device *iter = NULL;
- struct cxl_dport *dport;
- struct cxl_port *port;
-
- if (!dev_is_cxl_root_child(dev))
- return 0;
-
- port = to_cxl_port(dev);
- iter = match;
- while (iter) {
- dport = cxl_find_dport_by_dev(port, iter);
- if (dport)
- break;
- iter = iter->parent;
- }
-
- return !!iter;
-}
+ struct cxl_port *iter = port;
-struct cxl_port *find_cxl_root(struct device *dev)
-{
- struct device *port_dev;
- struct cxl_port *root;
+ while (iter && !is_cxl_root(iter))
+ iter = to_cxl_port(iter->dev.parent);
- port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
- if (!port_dev)
+ if (!iter)
return NULL;
-
- root = to_cxl_port(port_dev->parent);
- get_device(&root->dev);
- put_device(port_dev);
- return root;
+ get_device(&iter->dev);
+ return iter;
}
EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f29028148806..808f23ec4e2b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2251,7 +2251,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
if (!cxl_nvb) {
cxlr_pmem = ERR_PTR(-ENODEV);
goto out;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f2b0962a552d..06ca12418f7d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport);
-struct cxl_port *find_cxl_root(struct device *dev);
+struct cxl_port *find_cxl_root(struct cxl_port *port);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
void cxl_bus_rescan(void);
void cxl_bus_drain(void);
@@ -758,7 +758,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 1049bb5ea496..f243132fb174 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
* This can't fail in practice as CXL root exit unregisters all
* descendant ports and that in turn synchronizes with cxl_port_probe()
*/
- root = find_cxl_root(&cxlmd->dev);
+ root = find_cxl_root(port);
/*
* Now that all endpoint decoders are successfully enumerated, try to
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-28 18:36 [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it Dan Williams
@ 2023-03-29 5:22 ` Gregory Price
2023-03-29 21:39 ` Dan Williams
2023-03-29 17:36 ` Dave Jiang
2023-03-30 17:35 ` Jonathan Cameron
2 siblings, 1 reply; 8+ messages in thread
From: Gregory Price @ 2023-03-29 5:22 UTC (permalink / raw)
To: Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield, Jonathan.Cameron
On Tue, Mar 28, 2023 at 11:36:17AM -0700, Dan Williams wrote:
> The find_cxl_root() helper is used to lookup root decoders and other CXL
> platform topology information for a given endpoint. It turns out that
> for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> is always NULL for the RCH topology case because it expects to find a
> cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> object with the host-bridge as a dport. While there are no reports of
> this being a problem to date, by inspection region enumeration should
> crash as a result of this problem, and it does in a local unit test for
> this scenario.
>
> However, an observation that ever since:
>
> commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
>
> ...all callers of find_cxl_root() occur after the memdev connection to
> the port topology has been established. That means that find_cxl_root()
> can be simplified to a walk of the endpoint port topology to the root.
> Switch to that arrangement which also fixes the RCD bug.
>
> Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
> drivers/cxl/core/pmem.c | 6 +++---
> drivers/cxl/core/port.c | 38 +++++++-------------------------------
> drivers/cxl/core/region.c | 2 +-
> drivers/cxl/cxl.h | 4 ++--
> drivers/cxl/port.c | 2 +-
> 5 files changed, 14 insertions(+), 38 deletions(-)
>
Testing this, want to make sure I'm seeing the correct results. As of
right now, I'm not seeing any regressions, however RCD/RCH combinations
don't appear to work as-expected when marked EFI_MEMORY_SP.
When not marked MEMORY_SP, we see the below topology, which I believe is
the correct result (mildly trimmed for brevity).
The memory is correctly onlined as expected at boot (single socket
system, memory is on node 1)
[user@host0 cxl]# numactl --hardware
available: 2 nodes (0-1)
node 1 cpus:
node 1 size: 128934 MB
node 1 free: 137 MB
When the CXL region is marked EFI_MEMORY_SP in bios, the memory is not
onlined - which is expected, and the topology is the same as before.
However, now attempts to online the memory in a region fail.
[root@amd0 ~]# numactl --hardware
available: 1 nodes (0)
[root@amd0 ~]# /data/ndctl/build/cxl/cxl create-region -t ram -d decoder0.0 -w 1 -g 4096 -m mem0
cxl region: collect_memdevs: no active memdevs found: decoder: decoder0.0 filter: mem0
cxl region: cmd_create_region: created 0 regions
As you can see in the topology below, the memory device is not attached
to a region, which seems to be correct as there's no step between the
root complex and the device?
However if we attempt to disable the memdev we get a failure
[root@amd0 ~]# /data/ndctl/build/cxl/cxl disable-memdev mem0
cxl memdev: action_disable: mem0 is part of an active region
cxl memdev: cmd_disable_memdev: disabled 0 mem
So the device becomes unusable in this configuration.
Is it expected that RCD's will fail when set to EFI_MEMORY_SP? If
that's the case, then this (and the other patch) look safe and do not
produce regression.
Just want to capture this behavior, as it appears there may be other
issues related to RCH/RCD combinations.
~Gregory
CXL Topology produced by ndctl:cxl (lastest version 76.x).
[user@host0 cxl]# ./cxl list -vvvv
[
{
"bus":"root0",
"provider":"ACPI.CXL",
"nr_dports":1,
"dports":[
{
"dport":"pci0000:3f",
"alias":"ACPI0016:00",
"id":4
}
],
"endpoints:root0":[
{
"endpoint":"endpoint1",
"host":"mem0",
"depth":1,
"memdev":{
"memdev":"mem0",
"ram_size":137438953472,
"numa_node":1, <------ absent in MEMORY_SP
"host":"0000:3f:00.0",
"partition_info":{ ... snip ... }
},
"decoders:endpoint1":[
{
"decoder":"decoder1.0",
"resource":0,
"size":137438953472,
"interleave_ways":1
}
]
}
],
"decoders:root0":[
{
"decoder":"decoder0.0",
"resource":70061654016,
"size":137438953472,
"interleave_ways":1,
"max_available_extent":137438953472,
"volatile_capable":true,
"nr_targets":1,
"targets":[
{
"target":"pci0000:3f",
"alias":"ACPI0016:00",
"position":0,
"id":4
}
]
}
]
}
]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-29 21:39 ` Dan Williams
@ 2023-03-29 10:27 ` Gregory Price
2023-03-29 22:21 ` Dan Williams
0 siblings, 1 reply; 8+ messages in thread
From: Gregory Price @ 2023-03-29 10:27 UTC (permalink / raw)
To: Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield, Jonathan.Cameron
On Wed, Mar 29, 2023 at 02:39:53PM -0700, Dan Williams wrote:
> Gregory Price wrote:
> > On Tue, Mar 28, 2023 at 11:36:17AM -0700, Dan Williams wrote:
> > > The find_cxl_root() helper is used to lookup root decoders and other CXL
> > > platform topology information for a given endpoint. It turns out that
> > > for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> > > is always NULL for the RCH topology case because it expects to find a
> > > cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> > > object with the host-bridge as a dport. While there are no reports of
> > > this being a problem to date, by inspection region enumeration should
> > > crash as a result of this problem, and it does in a local unit test for
> > > this scenario.
> > >
> > > However, an observation that ever since:
> > >
> > > commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
> > >
> > > ...all callers of find_cxl_root() occur after the memdev connection to
> > > the port topology has been established. That means that find_cxl_root()
> > > can be simplified to a walk of the endpoint port topology to the root.
> > > Switch to that arrangement which also fixes the RCD bug.
> > >
> > > Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> [..]
> > Is it expected that RCD's will fail when set to EFI_MEMORY_SP? If
> > that's the case, then this (and the other patch) look safe and do not
> > produce regression.
> >
> > Just want to capture this behavior, as it appears there may be other
> > issues related to RCH/RCD combinations.
>
> After I posted this fix testing revealed the need for a few more fixes,
> now posted. Most importantly for accessing range register defined
> regions is this new fix:
>
> http://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com
Are these patches on top of an unpushed cxl branch? I'm having a hard
time finding the base patch to apply these to.
Thanks for the help,
Gregory
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-29 22:21 ` Dan Williams
@ 2023-03-29 10:38 ` Gregory Price
0 siblings, 0 replies; 8+ messages in thread
From: Gregory Price @ 2023-03-29 10:38 UTC (permalink / raw)
To: Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield, Jonathan.Cameron
On Wed, Mar 29, 2023 at 03:21:32PM -0700, Dan Williams wrote:
> Gregory Price wrote:
> > On Wed, Mar 29, 2023 at 02:39:53PM -0700, Dan Williams wrote:
> > > Gregory Price wrote:
> > > > On Tue, Mar 28, 2023 at 11:36:17AM -0700, Dan Williams wrote:
> > > > > The find_cxl_root() helper is used to lookup root decoders and other CXL
> > > > > platform topology information for a given endpoint. It turns out that
> > > > > for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> > > > > is always NULL for the RCH topology case because it expects to find a
> > > > > cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> > > > > object with the host-bridge as a dport. While there are no reports of
> > > > > this being a problem to date, by inspection region enumeration should
> > > > > crash as a result of this problem, and it does in a local unit test for
> > > > > this scenario.
> > > > >
> > > > > However, an observation that ever since:
> > > > >
> > > > > commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
> > > > >
> > > > > ...all callers of find_cxl_root() occur after the memdev connection to
> > > > > the port topology has been established. That means that find_cxl_root()
> > > > > can be simplified to a walk of the endpoint port topology to the root.
> > > > > Switch to that arrangement which also fixes the RCD bug.
> > > > >
> > > > > Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> > > > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > > [..]
> > > > Is it expected that RCD's will fail when set to EFI_MEMORY_SP? If
> > > > that's the case, then this (and the other patch) look safe and do not
> > > > produce regression.
> > > >
> > > > Just want to capture this behavior, as it appears there may be other
> > > > issues related to RCH/RCD combinations.
> > >
> > > After I posted this fix testing revealed the need for a few more fixes,
> > > now posted. Most importantly for accessing range register defined
> > > regions is this new fix:
> > >
> > > http://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com
> >
> > Are these patches on top of an unpushed cxl branch? I'm having a hard
> > time finding the base patch to apply these to.
>
> Pushed a preview with all pending fixes applied here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/log/?h=for-6.3/fixes
o7 will give it all a spin tonight and have some tags for you
~Gregory
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-28 18:36 [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it Dan Williams
2023-03-29 5:22 ` Gregory Price
@ 2023-03-29 17:36 ` Dave Jiang
2023-03-30 17:35 ` Jonathan Cameron
2 siblings, 0 replies; 8+ messages in thread
From: Dave Jiang @ 2023-03-29 17:36 UTC (permalink / raw)
To: Dan Williams, linux-cxl
Cc: vishal.l.verma, ira.weiny, alison.schofield, Jonathan.Cameron
On 3/28/23 11:36 AM, Dan Williams wrote:
> The find_cxl_root() helper is used to lookup root decoders and other CXL
> platform topology information for a given endpoint. It turns out that
> for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> is always NULL for the RCH topology case because it expects to find a
> cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> object with the host-bridge as a dport. While there are no reports of
> this being a problem to date, by inspection region enumeration should
> crash as a result of this problem, and it does in a local unit test for
> this scenario.
>
> However, an observation that ever since:
>
> commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
>
> ...all callers of find_cxl_root() occur after the memdev connection to
> the port topology has been established. That means that find_cxl_root()
> can be simplified to a walk of the endpoint port topology to the root.
> Switch to that arrangement which also fixes the RCD bug.
>
> Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/cxl/core/pmem.c | 6 +++---
> drivers/cxl/core/port.c | 38 +++++++-------------------------------
> drivers/cxl/core/region.c | 2 +-
> drivers/cxl/cxl.h | 4 ++--
> drivers/cxl/port.c | 2 +-
> 5 files changed, 14 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
> index c2e4b1093788..f8c38d997252 100644
> --- a/drivers/cxl/core/pmem.c
> +++ b/drivers/cxl/core/pmem.c
> @@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
> return is_cxl_nvdimm_bridge(dev);
> }
>
> -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
> +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
> {
> - struct cxl_port *port = find_cxl_root(start);
> + struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
> struct device *dev;
>
> if (!port)
> @@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
> struct device *dev;
> int rc;
>
> - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
> + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
> if (!cxl_nvb)
> return -ENODEV;
>
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 8ee6b6e2e2a4..4d1f9c5b5029 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev)
> return false;
> }
>
> -/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
> -static int match_root_child(struct device *dev, const void *match)
> +struct cxl_port *find_cxl_root(struct cxl_port *port)
> {
> - const struct device *iter = NULL;
> - struct cxl_dport *dport;
> - struct cxl_port *port;
> -
> - if (!dev_is_cxl_root_child(dev))
> - return 0;
> -
> - port = to_cxl_port(dev);
> - iter = match;
> - while (iter) {
> - dport = cxl_find_dport_by_dev(port, iter);
> - if (dport)
> - break;
> - iter = iter->parent;
> - }
> -
> - return !!iter;
> -}
> + struct cxl_port *iter = port;
>
> -struct cxl_port *find_cxl_root(struct device *dev)
> -{
> - struct device *port_dev;
> - struct cxl_port *root;
> + while (iter && !is_cxl_root(iter))
> + iter = to_cxl_port(iter->dev.parent);
>
> - port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
> - if (!port_dev)
> + if (!iter)
> return NULL;
> -
> - root = to_cxl_port(port_dev->parent);
> - get_device(&root->dev);
> - put_device(port_dev);
> - return root;
> + get_device(&iter->dev);
> + return iter;
> }
> EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index f29028148806..808f23ec4e2b 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2251,7 +2251,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
> * bridge for one device is the same for all.
> */
> if (i == 0) {
> - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
> + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
> if (!cxl_nvb) {
> cxlr_pmem = ERR_PTR(-ENODEV);
> goto out;
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index f2b0962a552d..06ca12418f7d 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
> struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> resource_size_t component_reg_phys,
> struct cxl_dport *parent_dport);
> -struct cxl_port *find_cxl_root(struct device *dev);
> +struct cxl_port *find_cxl_root(struct cxl_port *port);
> int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
> void cxl_bus_rescan(void);
> void cxl_bus_drain(void);
> @@ -758,7 +758,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
> bool is_cxl_nvdimm(struct device *dev);
> bool is_cxl_nvdimm_bridge(struct device *dev);
> int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
> -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
> +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
>
> #ifdef CONFIG_CXL_REGION
> bool is_cxl_pmem_region(struct device *dev);
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 1049bb5ea496..f243132fb174 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
> * This can't fail in practice as CXL root exit unregisters all
> * descendant ports and that in turn synchronizes with cxl_port_probe()
> */
> - root = find_cxl_root(&cxlmd->dev);
> + root = find_cxl_root(port);
>
> /*
> * Now that all endpoint decoders are successfully enumerated, try to
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-29 5:22 ` Gregory Price
@ 2023-03-29 21:39 ` Dan Williams
2023-03-29 10:27 ` Gregory Price
0 siblings, 1 reply; 8+ messages in thread
From: Dan Williams @ 2023-03-29 21:39 UTC (permalink / raw)
To: Gregory Price, Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield, Jonathan.Cameron
Gregory Price wrote:
> On Tue, Mar 28, 2023 at 11:36:17AM -0700, Dan Williams wrote:
> > The find_cxl_root() helper is used to lookup root decoders and other CXL
> > platform topology information for a given endpoint. It turns out that
> > for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> > is always NULL for the RCH topology case because it expects to find a
> > cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> > object with the host-bridge as a dport. While there are no reports of
> > this being a problem to date, by inspection region enumeration should
> > crash as a result of this problem, and it does in a local unit test for
> > this scenario.
> >
> > However, an observation that ever since:
> >
> > commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
> >
> > ...all callers of find_cxl_root() occur after the memdev connection to
> > the port topology has been established. That means that find_cxl_root()
> > can be simplified to a walk of the endpoint port topology to the root.
> > Switch to that arrangement which also fixes the RCD bug.
> >
> > Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
[..]
> Is it expected that RCD's will fail when set to EFI_MEMORY_SP? If
> that's the case, then this (and the other patch) look safe and do not
> produce regression.
>
> Just want to capture this behavior, as it appears there may be other
> issues related to RCH/RCD combinations.
After I posted this fix testing revealed the need for a few more fixes,
now posted. Most importantly for accessing range register defined
regions is this new fix:
http://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-29 10:27 ` Gregory Price
@ 2023-03-29 22:21 ` Dan Williams
2023-03-29 10:38 ` Gregory Price
0 siblings, 1 reply; 8+ messages in thread
From: Dan Williams @ 2023-03-29 22:21 UTC (permalink / raw)
To: Gregory Price, Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield, Jonathan.Cameron
Gregory Price wrote:
> On Wed, Mar 29, 2023 at 02:39:53PM -0700, Dan Williams wrote:
> > Gregory Price wrote:
> > > On Tue, Mar 28, 2023 at 11:36:17AM -0700, Dan Williams wrote:
> > > > The find_cxl_root() helper is used to lookup root decoders and other CXL
> > > > platform topology information for a given endpoint. It turns out that
> > > > for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> > > > is always NULL for the RCH topology case because it expects to find a
> > > > cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> > > > object with the host-bridge as a dport. While there are no reports of
> > > > this being a problem to date, by inspection region enumeration should
> > > > crash as a result of this problem, and it does in a local unit test for
> > > > this scenario.
> > > >
> > > > However, an observation that ever since:
> > > >
> > > > commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
> > > >
> > > > ...all callers of find_cxl_root() occur after the memdev connection to
> > > > the port topology has been established. That means that find_cxl_root()
> > > > can be simplified to a walk of the endpoint port topology to the root.
> > > > Switch to that arrangement which also fixes the RCD bug.
> > > >
> > > > Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> > > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > [..]
> > > Is it expected that RCD's will fail when set to EFI_MEMORY_SP? If
> > > that's the case, then this (and the other patch) look safe and do not
> > > produce regression.
> > >
> > > Just want to capture this behavior, as it appears there may be other
> > > issues related to RCH/RCD combinations.
> >
> > After I posted this fix testing revealed the need for a few more fixes,
> > now posted. Most importantly for accessing range register defined
> > regions is this new fix:
> >
> > http://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com
>
> Are these patches on top of an unpushed cxl branch? I'm having a hard
> time finding the base patch to apply these to.
Pushed a preview with all pending fixes applied here:
https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/log/?h=for-6.3/fixes
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it
2023-03-28 18:36 [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it Dan Williams
2023-03-29 5:22 ` Gregory Price
2023-03-29 17:36 ` Dave Jiang
@ 2023-03-30 17:35 ` Jonathan Cameron
2 siblings, 0 replies; 8+ messages in thread
From: Jonathan Cameron @ 2023-03-30 17:35 UTC (permalink / raw)
To: Dan Williams
Cc: linux-cxl, vishal.l.verma, ira.weiny, dave.jiang,
alison.schofield
On Tue, 28 Mar 2023 11:36:17 -0700
Dan Williams <dan.j.williams@intel.com> wrote:
> The find_cxl_root() helper is used to lookup root decoders and other CXL
> platform topology information for a given endpoint. It turns out that
> for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev)
> is always NULL for the RCH topology case because it expects to find a
> cxl_port at the host-bridge. RCH topologies only have the root cxl_port
> object with the host-bridge as a dport. While there are no reports of
> this being a problem to date, by inspection region enumeration should
> crash as a result of this problem, and it does in a local unit test for
> this scenario.
>
> However, an observation that ever since:
>
> commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
>
> ...all callers of find_cxl_root() occur after the memdev connection to
> the port topology has been established. That means that find_cxl_root()
> can be simplified to a walk of the endpoint port topology to the root.
> Switch to that arrangement which also fixes the RCD bug.
>
> Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Even without the bug fix being needed (I've not chased that one through)
seems like a good simplification to me.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
> drivers/cxl/core/pmem.c | 6 +++---
> drivers/cxl/core/port.c | 38 +++++++-------------------------------
> drivers/cxl/core/region.c | 2 +-
> drivers/cxl/cxl.h | 4 ++--
> drivers/cxl/port.c | 2 +-
> 5 files changed, 14 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
> index c2e4b1093788..f8c38d997252 100644
> --- a/drivers/cxl/core/pmem.c
> +++ b/drivers/cxl/core/pmem.c
> @@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
> return is_cxl_nvdimm_bridge(dev);
> }
>
> -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
> +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
> {
> - struct cxl_port *port = find_cxl_root(start);
> + struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
> struct device *dev;
>
> if (!port)
> @@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
> struct device *dev;
> int rc;
>
> - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
> + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
> if (!cxl_nvb)
> return -ENODEV;
>
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 8ee6b6e2e2a4..4d1f9c5b5029 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev)
> return false;
> }
>
> -/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
> -static int match_root_child(struct device *dev, const void *match)
> +struct cxl_port *find_cxl_root(struct cxl_port *port)
> {
> - const struct device *iter = NULL;
> - struct cxl_dport *dport;
> - struct cxl_port *port;
> -
> - if (!dev_is_cxl_root_child(dev))
> - return 0;
> -
> - port = to_cxl_port(dev);
> - iter = match;
> - while (iter) {
> - dport = cxl_find_dport_by_dev(port, iter);
> - if (dport)
> - break;
> - iter = iter->parent;
> - }
> -
> - return !!iter;
> -}
> + struct cxl_port *iter = port;
>
> -struct cxl_port *find_cxl_root(struct device *dev)
> -{
> - struct device *port_dev;
> - struct cxl_port *root;
> + while (iter && !is_cxl_root(iter))
> + iter = to_cxl_port(iter->dev.parent);
>
> - port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
> - if (!port_dev)
> + if (!iter)
> return NULL;
> -
> - root = to_cxl_port(port_dev->parent);
> - get_device(&root->dev);
> - put_device(port_dev);
> - return root;
> + get_device(&iter->dev);
> + return iter;
> }
> EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index f29028148806..808f23ec4e2b 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2251,7 +2251,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
> * bridge for one device is the same for all.
> */
> if (i == 0) {
> - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
> + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
> if (!cxl_nvb) {
> cxlr_pmem = ERR_PTR(-ENODEV);
> goto out;
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index f2b0962a552d..06ca12418f7d 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
> struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
> resource_size_t component_reg_phys,
> struct cxl_dport *parent_dport);
> -struct cxl_port *find_cxl_root(struct device *dev);
> +struct cxl_port *find_cxl_root(struct cxl_port *port);
> int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
> void cxl_bus_rescan(void);
> void cxl_bus_drain(void);
> @@ -758,7 +758,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
> bool is_cxl_nvdimm(struct device *dev);
> bool is_cxl_nvdimm_bridge(struct device *dev);
> int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
> -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
> +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
>
> #ifdef CONFIG_CXL_REGION
> bool is_cxl_pmem_region(struct device *dev);
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 1049bb5ea496..f243132fb174 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
> * This can't fail in practice as CXL root exit unregisters all
> * descendant ports and that in turn synchronizes with cxl_port_probe()
> */
> - root = find_cxl_root(&cxlmd->dev);
> + root = find_cxl_root(port);
>
> /*
> * Now that all endpoint decoders are successfully enumerated, try to
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-03-30 17:35 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-28 18:36 [PATCH] cxl/port: Fix find_cxl_root() for RCDs and simplify it Dan Williams
2023-03-29 5:22 ` Gregory Price
2023-03-29 21:39 ` Dan Williams
2023-03-29 10:27 ` Gregory Price
2023-03-29 22:21 ` Dan Williams
2023-03-29 10:38 ` Gregory Price
2023-03-29 17:36 ` Dave Jiang
2023-03-30 17:35 ` Jonathan Cameron
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox