Linux CXL
 help / color / mirror / Atom feed
* [PATCH v2 0/2] ] cxl: Move operations after memory is ready
@ 2023-05-18 17:31 Dave Jiang
  2023-05-18 17:31 ` [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info Dave Jiang
  2023-05-18 17:31 ` [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval Dave Jiang
  0 siblings, 2 replies; 7+ messages in thread
From: Dave Jiang @ 2023-05-18 17:31 UTC (permalink / raw)
  To: linux-cxl, dan.j.williams
  Cc: Jonathan Cameron, ira.weiny, vishal.l.verma, alison.schofield,
	Jonathan.Cameron

v2:
- Check both ranges for memory ready (Ira)
- Drop last 2 patches from previous series. Move media wait to pci_probe()
  before mailbox operations. (Dan)
- Fix up fixes tags. (Dan)

This series contains fixes broken out from the QTG ID series. It augments
the Memory_Info_Valid check and move the relevant operations that needs to
happen after memory is valid behind the check.

---

Dave Jiang (2):
      cxl: Wait Memory_Info_Valid before access memory related info
      cxl: Move cxl_await_media_ready() to before capacity info retrieval


 drivers/cxl/core/pci.c | 85 +++++++++++++++++++++++++++++++++++++-----
 drivers/cxl/cxlpci.h   |  2 +
 drivers/cxl/pci.c      |  6 +++
 drivers/cxl/port.c     |  6 ---
 4 files changed, 84 insertions(+), 15 deletions(-)

--


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info
  2023-05-18 17:31 [PATCH v2 0/2] ] cxl: Move operations after memory is ready Dave Jiang
@ 2023-05-18 17:31 ` Dave Jiang
  2023-05-18 19:01   ` Ira Weiny
  2023-05-18 17:31 ` [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval Dave Jiang
  1 sibling, 1 reply; 7+ messages in thread
From: Dave Jiang @ 2023-05-18 17:31 UTC (permalink / raw)
  To: linux-cxl, dan.j.williams
  Cc: Jonathan Cameron, ira.weiny, vishal.l.verma, alison.schofield,
	Jonathan.Cameron

CXL rev3.0 8.1.3.8.2 Memory_Info_valid field

The Memory_Info_Valid bit indicates that the CXL Range Size High and Size
Low registers are valid. The bit must be set within 1 second of reset
deassertion to the device. Check valid bit before we check the
Memory_Active bit when waiting for cxl_await_media_ready() to ensure that
the memory info is valid for consumption. Also ensures both DVSEC ranges
1 and 2 are ready if DVSEC Capability indicates they are both supported.

Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

---
v2:
- Check both ranges instead of just first offset. (Ira)
- Add to commit log. (Ira)
- Fix fixes tag. (Dan)

Before QTG series split:
v2:
- Check both ranges. (Jonathan)
---
 drivers/cxl/core/pci.c |   85 +++++++++++++++++++++++++++++++++++++++++++-----
 drivers/cxl/cxlpci.h   |    2 +
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 63f2f0b86fbc..9a38a6f91624 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -101,23 +101,57 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
 
-/*
- * Wait up to @media_ready_timeout for the device to report memory
- * active.
- */
-int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int d = cxlds->cxl_dvsec;
+	bool valid = false;
+	int rc, i;
+	u32 temp;
+
+	if (id > CXL_DVSEC_RANGE_MAX)
+		return -EINVAL;
+
+	/* Check MEM INFO VALID bit first, give up after 1s */
+	i = 1;
+	do {
+		rc = pci_read_config_dword(pdev,
+					   d + CXL_DVSEC_RANGE_SIZE_LOW(id),
+					   &temp);
+		if (rc)
+			return rc;
+
+		valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
+		if (valid)
+			break;
+		msleep(1000);
+	} while (i--);
+
+	if (!valid) {
+		dev_err(&pdev->dev,
+			"Timeout awaiting memory range %d valid after 1s.\n",
+			id);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	int d = cxlds->cxl_dvsec;
 	bool active = false;
-	u64 md_status;
 	int rc, i;
+	u32 temp;
 
-	for (i = media_ready_timeout; i; i--) {
-		u32 temp;
+	if (id > CXL_DVSEC_RANGE_MAX)
+		return -EINVAL;
 
+	/* Check MEM ACTIVE bit, up to 60s timeout by default */
+	for (i = media_ready_timeout; i; i--) {
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
 		if (rc)
 			return rc;
 
@@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
 		return -ETIMEDOUT;
 	}
 
+	return 0;
+}
+
+/*
+ * Wait up to @media_ready_timeout for the device to report memory
+ * active.
+ */
+int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int d = cxlds->cxl_dvsec;
+	int rc, i, hdm_count;
+	u64 md_status;
+	u16 cap;
+
+	rc = pci_read_config_word(pdev,
+				  d + CXL_DVSEC_CAP_OFFSET, &cap);
+	if (rc)
+		return rc;
+
+	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+	for (i = 0; i < hdm_count; i++) {
+		rc = cxl_dvsec_mem_range_valid(cxlds, i);
+		if (rc)
+			return rc;
+	}
+
+	for (i = 0; i < hdm_count; i++) {
+		rc = cxl_dvsec_mem_range_active(cxlds, i);
+		if (rc)
+			return rc;
+	}
+
 	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
 	if (!CXLMDEV_READY(md_status))
 		return -EIO;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 0465ef963cd6..7c02e55b8042 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -31,6 +31,8 @@
 #define   CXL_DVSEC_RANGE_BASE_LOW(i)	(0x24 + (i * 0x10))
 #define     CXL_DVSEC_MEM_BASE_LOW_MASK	GENMASK(31, 28)
 
+#define CXL_DVSEC_RANGE_MAX		2
+
 /* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */
 #define CXL_DVSEC_FUNCTION_MAP					2
 



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval
  2023-05-18 17:31 [PATCH v2 0/2] ] cxl: Move operations after memory is ready Dave Jiang
  2023-05-18 17:31 ` [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info Dave Jiang
@ 2023-05-18 17:31 ` Dave Jiang
  2023-05-18 19:05   ` Ira Weiny
  2023-05-18 20:38   ` Dan Williams
  1 sibling, 2 replies; 7+ messages in thread
From: Dave Jiang @ 2023-05-18 17:31 UTC (permalink / raw)
  To: linux-cxl, dan.j.williams
  Cc: ira.weiny, vishal.l.verma, alison.schofield, Jonathan.Cameron

Move cxl_await_media_ready() to cxl_pci probe before driver starts issuing
IDENTIFY and retrieving memory device information to ensure that the
device is ready to provide the information.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/pci.c  |    6 ++++++
 drivers/cxl/port.c |    6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index ea38bd49b0cf..fc59ca79b2a0 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -708,6 +708,12 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
 
+	rc = cxl_await_media_ready(cxlds);
+	if (rc) {
+		dev_err(&pdev->dev, "Media not active (%d)\n", rc);
+		return rc;
+	}
+
 	rc = cxl_pci_setup_mailbox(cxlds);
 	if (rc)
 		return rc;
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index a49f5eb149f1..bfb948e00c42 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -108,12 +108,6 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
 	if (rc)
 		return rc;
 
-	rc = cxl_await_media_ready(cxlds);
-	if (rc) {
-		dev_err(&port->dev, "Media not active (%d)\n", rc);
-		return rc;
-	}
-
 	rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
 	if (rc)
 		return rc;



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info
  2023-05-18 17:31 ` [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info Dave Jiang
@ 2023-05-18 19:01   ` Ira Weiny
  2023-05-18 20:52     ` Dave Jiang
  0 siblings, 1 reply; 7+ messages in thread
From: Ira Weiny @ 2023-05-18 19:01 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl, dan.j.williams
  Cc: Jonathan Cameron, ira.weiny, vishal.l.verma, alison.schofield

Dave Jiang wrote:
> CXL rev3.0 8.1.3.8.2 Memory_Info_valid field
> 
> The Memory_Info_Valid bit indicates that the CXL Range Size High and Size
> Low registers are valid. The bit must be set within 1 second of reset
> deassertion to the device. Check valid bit before we check the
> Memory_Active bit when waiting for cxl_await_media_ready() to ensure that
> the memory info is valid for consumption. Also ensures both DVSEC ranges
> 1 and 2 are ready if DVSEC Capability indicates they are both supported.
> 
> Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active")
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> 
> ---
> v2:
> - Check both ranges instead of just first offset. (Ira)
> - Add to commit log. (Ira)
> - Fix fixes tag. (Dan)
> 

[snip]

> +
> +static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
>  {
>  	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>  	int d = cxlds->cxl_dvsec;
>  	bool active = false;
> -	u64 md_status;
>  	int rc, i;
> +	u32 temp;
>  
> -	for (i = media_ready_timeout; i; i--) {
> -		u32 temp;
> +	if (id > CXL_DVSEC_RANGE_MAX)
> +		return -EINVAL;
>  
> +	/* Check MEM ACTIVE bit, up to 60s timeout by default */
> +	for (i = media_ready_timeout; i; i--) {
>  		rc = pci_read_config_dword(
> -			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
> +			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);

I think this is still wrong.  I think this should be 'id' shouldn't it?
...

>  		if (rc)
>  			return rc;
>  
> @@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
>  		return -ETIMEDOUT;
>  	}
>  
> +	return 0;
> +}
> +
> +/*
> + * Wait up to @media_ready_timeout for the device to report memory
> + * active.
> + */
> +int cxl_await_media_ready(struct cxl_dev_state *cxlds)
> +{
> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> +	int d = cxlds->cxl_dvsec;
> +	int rc, i, hdm_count;
> +	u64 md_status;
> +	u16 cap;
> +
> +	rc = pci_read_config_word(pdev,
> +				  d + CXL_DVSEC_CAP_OFFSET, &cap);
> +	if (rc)
> +		return rc;
> +
> +	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
> +	for (i = 0; i < hdm_count; i++) {
> +		rc = cxl_dvsec_mem_range_valid(cxlds, i);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	for (i = 0; i < hdm_count; i++) {
> +		rc = cxl_dvsec_mem_range_active(cxlds, i);

... Based on this 'i'...

Ira

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval
  2023-05-18 17:31 ` [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval Dave Jiang
@ 2023-05-18 19:05   ` Ira Weiny
  2023-05-18 20:38   ` Dan Williams
  1 sibling, 0 replies; 7+ messages in thread
From: Ira Weiny @ 2023-05-18 19:05 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl, dan.j.williams
  Cc: ira.weiny, vishal.l.verma, alison.schofield, Jonathan.Cameron

Dave Jiang wrote:
> Move cxl_await_media_ready() to cxl_pci probe before driver starts issuing
> IDENTIFY and retrieving memory device information to ensure that the
> device is ready to provide the information.
> 
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices")
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>

Reviewed-by: Ira Weiny <ira.weiny@intel.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval
  2023-05-18 17:31 ` [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval Dave Jiang
  2023-05-18 19:05   ` Ira Weiny
@ 2023-05-18 20:38   ` Dan Williams
  1 sibling, 0 replies; 7+ messages in thread
From: Dan Williams @ 2023-05-18 20:38 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl, dan.j.williams
  Cc: ira.weiny, vishal.l.verma, alison.schofield, Jonathan.Cameron

Dave Jiang wrote:
> Move cxl_await_media_ready() to cxl_pci probe before driver starts issuing
> IDENTIFY and retrieving memory device information to ensure that the
> device is ready to provide the information.
> 
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices")
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/cxl/pci.c  |    6 ++++++
>  drivers/cxl/port.c |    6 ------
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index ea38bd49b0cf..fc59ca79b2a0 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -708,6 +708,12 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>  
> +	rc = cxl_await_media_ready(cxlds);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Media not active (%d)\n", rc);
> +		return rc;
> +	}

So, now that I see this new failure mode here it raises another concern.
I think there is value in still trying to bring up the mailbox interface
even if media-ready never completes. For example, what if you need to
update-firmware to remediate the device?

The mailbox interface does not need the memdev to attach to the cxl_mem
driver to enable command submission, but the question becomes what do
you do once you have fixed up whatever condition caused the media to
fail?

I think the simplest answer is to just require tooling to reload cxl_pci
for that device to re-attempt init. A more complicated answer would be
to teach cxl_mem how to revalidate capacity after-the-fact, but at this
point I think the implementation is stuck with cxl_pci owning capacity
initialization.

So I think this looks like caching the the media-ready timeout condition
and teaching all subsequent code that cares about capacity info to fail
and continue. Like don't issue identify, make sure the sysfs capacity
values return 0, and don't let cxl_mem proceed with topology
enumeration.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info
  2023-05-18 19:01   ` Ira Weiny
@ 2023-05-18 20:52     ` Dave Jiang
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Jiang @ 2023-05-18 20:52 UTC (permalink / raw)
  To: Ira Weiny, linux-cxl, dan.j.williams
  Cc: Jonathan Cameron, vishal.l.verma, alison.schofield



On 5/18/23 12:01 PM, Ira Weiny wrote:
> Dave Jiang wrote:
>> CXL rev3.0 8.1.3.8.2 Memory_Info_valid field
>>
>> The Memory_Info_Valid bit indicates that the CXL Range Size High and Size
>> Low registers are valid. The bit must be set within 1 second of reset
>> deassertion to the device. Check valid bit before we check the
>> Memory_Active bit when waiting for cxl_await_media_ready() to ensure that
>> the memory info is valid for consumption. Also ensures both DVSEC ranges
>> 1 and 2 are ready if DVSEC Capability indicates they are both supported.
>>
>> Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active")
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>
>> ---
>> v2:
>> - Check both ranges instead of just first offset. (Ira)
>> - Add to commit log. (Ira)
>> - Fix fixes tag. (Dan)
>>
> 
> [snip]
> 
>> +
>> +static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
>>   {
>>   	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>   	int d = cxlds->cxl_dvsec;
>>   	bool active = false;
>> -	u64 md_status;
>>   	int rc, i;
>> +	u32 temp;
>>   
>> -	for (i = media_ready_timeout; i; i--) {
>> -		u32 temp;
>> +	if (id > CXL_DVSEC_RANGE_MAX)
>> +		return -EINVAL;
>>   
>> +	/* Check MEM ACTIVE bit, up to 60s timeout by default */
>> +	for (i = media_ready_timeout; i; i--) {
>>   		rc = pci_read_config_dword(
>> -			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
>> +			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
> 
> I think this is still wrong.  I think this should be 'id' shouldn't it?

Yes. I was thinking of something else....

> ...
> 
>>   		if (rc)
>>   			return rc;
>>   
>> @@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
>>   		return -ETIMEDOUT;
>>   	}
>>   
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Wait up to @media_ready_timeout for the device to report memory
>> + * active.
>> + */
>> +int cxl_await_media_ready(struct cxl_dev_state *cxlds)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>> +	int d = cxlds->cxl_dvsec;
>> +	int rc, i, hdm_count;
>> +	u64 md_status;
>> +	u16 cap;
>> +
>> +	rc = pci_read_config_word(pdev,
>> +				  d + CXL_DVSEC_CAP_OFFSET, &cap);
>> +	if (rc)
>> +		return rc;
>> +
>> +	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
>> +	for (i = 0; i < hdm_count; i++) {
>> +		rc = cxl_dvsec_mem_range_valid(cxlds, i);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	for (i = 0; i < hdm_count; i++) {
>> +		rc = cxl_dvsec_mem_range_active(cxlds, i);
> 
> ... Based on this 'i'...
> 
> Ira

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-05-18 20:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-18 17:31 [PATCH v2 0/2] ] cxl: Move operations after memory is ready Dave Jiang
2023-05-18 17:31 ` [PATCH v2 1/2] cxl: Wait Memory_Info_Valid before access memory related info Dave Jiang
2023-05-18 19:01   ` Ira Weiny
2023-05-18 20:52     ` Dave Jiang
2023-05-18 17:31 ` [PATCH v2 2/2] cxl: Move cxl_await_media_ready() to before capacity info retrieval Dave Jiang
2023-05-18 19:05   ` Ira Weiny
2023-05-18 20:38   ` Dan Williams

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox