All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
@ 2025-10-14  6:28 Alison Schofield
  2025-10-14  8:47 ` kernel test robot
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Alison Schofield @ 2025-10-14  6:28 UTC (permalink / raw)
  To: Davidlohr Bueso, Jonathan Cameron, Dave Jiang, Alison Schofield,
	Vishal Verma, Ira Weiny, Dan Williams
  Cc: linux-cxl, Qing Huang

The CXL driver implementation of DPA->HPA address translation depends
on a region's starting address always being aligned to Host Bridge
Interleave Ways * 256MB. The driver follows the decode methods
defined in the CXL Spec[1] and expanded upon in the CXL Driver Writers
Guide[2], which describe bit manipulations based on power-of-2
alignment to translate a DPA to an HPA.

With the introduction of MOD3 interleave way support, platforms may
create regions at starting addresses that are not power-of-2 aligned.
This allows platforms to avoid gaps in the memory map, but addresses
within those regions cannot be translated using the existing bit
manipulation method.

Introduce an unaligned translation method for DPA->HPA that
reconstructs an HPA by restoring the address first at the port level
and then at the host bridge level.

[1] CXL Spec 3.2 8.2.4.20.13 Implementation Note Device Decoder Logic
[2] CXL Type 3 Memory Software Guide 1.1 2.13.25 DPA to HPA Translation

Suggested-by: Qing Huang <qing.huang@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---

Changes in v2:
- Add 6 and 12 Host Bridge interleaves to decode_pos() (Jonathan)
- Limit the unalignment check to MOD3 regions
- Move the cache_size increment to a single place 
- Updated some in code comments
- Rebase on v6.18-rc1 

Changes in v1 (was RFC):
- Replace "/" with do_div() to quiet i386 build warning (lkp)
- Replace 'cxld->interleave_ways' with 'hbiw' for clarity
- Use div64_u64_rem() for alignment alignment
- Fix up a printk format specifier (lkp)
- Update code comments and commit log
- Rebase on v6.17-rc7


 drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 140 insertions(+), 7 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e14c1d305b22..3dc6f0ae9f19 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2934,13 +2934,124 @@ static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
 	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
 }
 
+static int decode_pos(int reg_ways, int hb_ways, int pos, int *pos_port,
+		      int *pos_hb)
+{
+	int devices_per_hb;
+
+	/*
+	 * Decode for 3-6-12 way interleaves as defined in the CXL
+	 * Spec 3.2 9.13.1.1 Legal Interleaving Configurations.
+	 */
+	switch (hb_ways) {
+	case 3: /* Supports 3-way, 6-way, or 12-way regions */
+		if (reg_ways != 3 && reg_ways != 6 && reg_ways != 12)
+			return -EINVAL;
+
+		devices_per_hb = reg_ways / 3;
+		break;
+
+	case 6: /* Supports 6-way or 12-way regions */
+		if (reg_ways != 6 && reg_ways != 12)
+			return -EINVAL;
+
+		devices_per_hb = reg_ways / 6;
+		break;
+
+	case 12: /* Supports 12-way regions */
+		if (reg_ways != 12)
+			return -EINVAL;
+
+		devices_per_hb = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	/* Calculate port and host bridge positions */
+	*pos_port = pos % devices_per_hb;
+	*pos_hb = pos / devices_per_hb;
+
+	return 0;
+}
+
+/*
+ * restore_parent() reconstruct the address in parent
+ *
+ * [mask] isolate the offset with the granularity
+ * [addr & ~mask] remove the offset leaving the aligned portion
+ * [* ways] distribute across all interleave ways
+ * [+ (pos * gran)] add the positional offset
+ * [+ (addr & mask)] restore the masked offset
+ */
+static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways)
+{
+	u64 mask = gran - 1;
+
+	return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask);
+}
+
+/*
+ * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
+ * start address is not aligned at Host Bridge Interleave Ways * 256MB.
+ *
+ * Unaligned start addresses only occur with MOD3 interleaves. All power-
+ * of-two interleaves are guaranteed aligned.
+ */
+static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
+				struct cxl_region_params *p, int pos, u64 dpa)
+{
+	int ways_port = p->interleave_ways / cxld->interleave_ways;
+	int gran_port = p->interleave_granularity;
+	int gran_hb = cxld->interleave_granularity;
+	int ways_hb = cxld->interleave_ways;
+	int pos_port, pos_hb, gran_shift;
+	u64 shifted, hpa, hpa_port = 0;
+
+	/* Decode an endpoint 'pos' into port and host-bridge components */
+	if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
+		dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
+			p->interleave_ways);
+		return ULLONG_MAX;
+	}
+	/* Restore the port parent address if needed */
+	if (gran_hb != gran_port)
+		hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
+	else
+		hpa_port = dpa;
+
+	/*
+	 * Complete the HPA reconstruction by restoring the address as if
+	 * each HB position is a candidate. Test against expected pos_hb
+	 * to confirm match.
+	 */
+	gran_shift = ilog2(gran_hb);
+	for (int index = 0; index < ways_hb; index++) {
+		hpa = restore_parent(hpa_port, index, gran_hb, ways_hb);
+		hpa += p->res->start;
+
+		shifted = hpa >> gran_shift;
+		if (do_div(shifted, ways_hb) == pos_hb)
+			return hpa;
+	}
+
+	dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
+		pos);
+	dev_dbg(&cxld->dev, "     port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
+		ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
+
+	return ULLONG_MAX;
+}
+
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa)
 {
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_endpoint_decoder *cxled = NULL;
+	int hbiw = cxld->interleave_ways;
+	bool aligned;
 	u16 eig = 0;
 	u8 eiw = 0;
 	int pos;
@@ -2953,6 +3064,28 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	if (!cxled || cxlmd != cxled_to_memdev(cxled))
 		return ULLONG_MAX;
 
+	/* Remove the dpa base */
+	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+
+	/* Unaligned calc: MOD3 interleaves not hbiw * 256MB aligned */
+	if (!is_power_of_2(hbiw)) {
+		u64 rem;
+
+		div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
+		aligned = (rem == 0);
+		if (!aligned)
+			hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos,
+						   dpa_offset);
+		if (hpa == ULLONG_MAX)
+			return ULLONG_MAX;
+
+		goto skip_aligned;
+	}
+
+	/*
+	 * Aligned calc: all power-of-2 interleaves and MOD3 interleaves
+	 * that are aligned at hbiw * 256MB
+	 */
 	pos = cxled->pos;
 	ways_to_eiw(p->interleave_ways, &eiw);
 	granularity_to_eig(p->interleave_granularity, &eig);
@@ -2967,9 +3100,6 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	 * 8.2.4.19.13 Implementation Note: Device Decode Logic
 	 */
 
-	/* Remove the dpa base */
-	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
-
 	mask_upper = GENMASK_ULL(51, eig + 8);
 
 	if (eiw < 8) {
@@ -2985,7 +3115,10 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
 
 	/* Apply the hpa_offset to the region base address */
-	hpa = hpa_offset + p->res->start + p->cache_size;
+	hpa = hpa_offset + p->res->start;
+
+skip_aligned:
+	hpa += p->cache_size;
 
 	/* Root decoder translation overrides typical modulo decode */
 	if (has_hpa_to_spa(cxlrd))
@@ -2996,9 +3129,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 			"Addr trans fail: hpa 0x%llx not in region\n", hpa);
 		return ULLONG_MAX;
 	}
-
-	/* Simple chunk check, by pos & gran, only applies to modulo decodes */
-	if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
+	/* Chunk check applies to aligned modulo decodes only */
+	if (aligned && !has_hpa_to_spa(cxlrd) &&
+	    !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
 		return ULLONG_MAX;
 
 	return hpa;

base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
  2025-10-14  6:28 [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions Alison Schofield
@ 2025-10-14  8:47 ` kernel test robot
  2025-10-14 18:03 ` Alison Schofield
  2025-10-29 12:03 ` Jonathan Cameron
  2 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2025-10-14  8:47 UTC (permalink / raw)
  To: Alison Schofield; +Cc: llvm, oe-kbuild-all

Hi Alison,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 3a8660878839faadb4f1a6dd72c3179c1df56787]

url:    https://github.com/intel-lab-lkp/linux/commits/Alison-Schofield/cxl-region-Translate-DPA-HPA-in-unaligned-MOD3-regions/20251014-143112
base:   3a8660878839faadb4f1a6dd72c3179c1df56787
patch link:    https://lore.kernel.org/r/20251014062850.727428-1-alison.schofield%40intel.com
patch subject: [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
config: loongarch-randconfig-001-20251014 (https://download.01.org/0day-ci/archive/20251014/202510141648.2pQWexpT-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251014/202510141648.2pQWexpT-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510141648.2pQWexpT-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/cxl/core/region.c:3076:7: warning: variable 'hpa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
    3076 |                 if (!aligned)
         |                     ^~~~~~~~
   drivers/cxl/core/region.c:3079:7: note: uninitialized use occurs here
    3079 |                 if (hpa == ULLONG_MAX)
         |                     ^~~
   drivers/cxl/core/region.c:3076:3: note: remove the 'if' if its condition is always true
    3076 |                 if (!aligned)
         |                 ^~~~~~~~~~~~~
    3077 |                         hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos,
   drivers/cxl/core/region.c:3049:57: note: initialize the variable 'hpa' to silence this warning
    3049 |         u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
         |                                                                ^
         |                                                                 = 0
   1 warning generated.


vim +3076 drivers/cxl/core/region.c

  3044	
  3045	u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
  3046			   u64 dpa)
  3047	{
  3048		struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
  3049		u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
  3050		struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
  3051		struct cxl_region_params *p = &cxlr->params;
  3052		struct cxl_endpoint_decoder *cxled = NULL;
  3053		int hbiw = cxld->interleave_ways;
  3054		bool aligned;
  3055		u16 eig = 0;
  3056		u8 eiw = 0;
  3057		int pos;
  3058	
  3059		for (int i = 0; i < p->nr_targets; i++) {
  3060			cxled = p->targets[i];
  3061			if (cxlmd == cxled_to_memdev(cxled))
  3062				break;
  3063		}
  3064		if (!cxled || cxlmd != cxled_to_memdev(cxled))
  3065			return ULLONG_MAX;
  3066	
  3067		/* Remove the dpa base */
  3068		dpa_offset = dpa - cxl_dpa_resource_start(cxled);
  3069	
  3070		/* Unaligned calc: MOD3 interleaves not hbiw * 256MB aligned */
  3071		if (!is_power_of_2(hbiw)) {
  3072			u64 rem;
  3073	
  3074			div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
  3075			aligned = (rem == 0);
> 3076			if (!aligned)
  3077				hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos,
  3078							   dpa_offset);
  3079			if (hpa == ULLONG_MAX)
  3080				return ULLONG_MAX;
  3081	
  3082			goto skip_aligned;
  3083		}
  3084	
  3085		/*
  3086		 * Aligned calc: all power-of-2 interleaves and MOD3 interleaves
  3087		 * that are aligned at hbiw * 256MB
  3088		 */
  3089		pos = cxled->pos;
  3090		ways_to_eiw(p->interleave_ways, &eiw);
  3091		granularity_to_eig(p->interleave_granularity, &eig);
  3092	
  3093		/*
  3094		 * The device position in the region interleave set was removed
  3095		 * from the offset at HPA->DPA translation. To reconstruct the
  3096		 * HPA, place the 'pos' in the offset.
  3097		 *
  3098		 * The placement of 'pos' in the HPA is determined by interleave
  3099		 * ways and granularity and is defined in the CXL Spec 3.0 Section
  3100		 * 8.2.4.19.13 Implementation Note: Device Decode Logic
  3101		 */
  3102	
  3103		mask_upper = GENMASK_ULL(51, eig + 8);
  3104	
  3105		if (eiw < 8) {
  3106			hpa_offset = (dpa_offset & mask_upper) << eiw;
  3107			hpa_offset |= pos << (eig + 8);
  3108		} else {
  3109			bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
  3110			bits_upper = bits_upper * 3;
  3111			hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
  3112		}
  3113	
  3114		/* The lower bits remain unchanged */
  3115		hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
  3116	
  3117		/* Apply the hpa_offset to the region base address */
  3118		hpa = hpa_offset + p->res->start;
  3119	
  3120	skip_aligned:
  3121		hpa += p->cache_size;
  3122	
  3123		/* Root decoder translation overrides typical modulo decode */
  3124		if (has_hpa_to_spa(cxlrd))
  3125			hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa);
  3126	
  3127		if (!cxl_resource_contains_addr(p->res, hpa)) {
  3128			dev_dbg(&cxlr->dev,
  3129				"Addr trans fail: hpa 0x%llx not in region\n", hpa);
  3130			return ULLONG_MAX;
  3131		}
  3132		/* Chunk check applies to aligned modulo decodes only */
  3133		if (aligned && !has_hpa_to_spa(cxlrd) &&
  3134		    !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
  3135			return ULLONG_MAX;
  3136	
  3137		return hpa;
  3138	}
  3139	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
  2025-10-14  6:28 [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions Alison Schofield
  2025-10-14  8:47 ` kernel test robot
@ 2025-10-14 18:03 ` Alison Schofield
  2025-10-29 12:03 ` Jonathan Cameron
  2 siblings, 0 replies; 5+ messages in thread
From: Alison Schofield @ 2025-10-14 18:03 UTC (permalink / raw)
  To: Davidlohr Bueso, Jonathan Cameron, Dave Jiang, Vishal Verma,
	Ira Weiny, Dan Williams
  Cc: linux-cxl, Qing Huang

On Mon, Oct 13, 2025 at 11:28:48PM -0700, Alison Schofield wrote:

snip

>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  		   u64 dpa)
>  {
>  	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
>  	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>  	struct cxl_region_params *p = &cxlr->params;
>  	struct cxl_endpoint_decoder *cxled = NULL;
> +	int hbiw = cxld->interleave_ways;
> +	bool aligned;
>  	u16 eig = 0;
>  	u8 eiw = 0;
>  	int pos;
> @@ -2953,6 +3064,28 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  	if (!cxled || cxlmd != cxled_to_memdev(cxled))
>  		return ULLONG_MAX;
>  
> +	/* Remove the dpa base */
> +	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
> +
> +	/* Unaligned calc: MOD3 interleaves not hbiw * 256MB aligned */
> +	if (!is_power_of_2(hbiw)) {
> +		u64 rem;
> +
> +		div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
> +		aligned = (rem == 0);
> +		if (!aligned)
> +			hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos,
> +						   dpa_offset);
> +		if (hpa == ULLONG_MAX)
> +			return ULLONG_MAX;
> +
> +		goto skip_aligned;
> +	}

I got a personal kernel test robot msg on the above and don't know why
it is not on the list here. hpa is used possibly uninitialized above,
because of the missing bracket around "if(aligned)"

Should be:
		if (!aligned) {
                        hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos,
                                                   dpa_offset);
                        if (hpa == ULLONG_MAX)
                                return ULLONG_MAX;

                        goto skip_aligned;
                }

There may be more flow improvements here. I'll take a look.


> +
> +	/*
> +	 * Aligned calc: all power-of-2 interleaves and MOD3 interleaves
> +	 * that are aligned at hbiw * 256MB
> +	 */
>  	pos = cxled->pos;
>  	ways_to_eiw(p->interleave_ways, &eiw);
>  	granularity_to_eig(p->interleave_granularity, &eig);
> @@ -2967,9 +3100,6 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  	 * 8.2.4.19.13 Implementation Note: Device Decode Logic
>  	 */
>  
> -	/* Remove the dpa base */
> -	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
> -
>  	mask_upper = GENMASK_ULL(51, eig + 8);
>  
>  	if (eiw < 8) {
> @@ -2985,7 +3115,10 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
>  
>  	/* Apply the hpa_offset to the region base address */
> -	hpa = hpa_offset + p->res->start + p->cache_size;
> +	hpa = hpa_offset + p->res->start;
> +
> +skip_aligned:
> +	hpa += p->cache_size;
>  
>  	/* Root decoder translation overrides typical modulo decode */
>  	if (has_hpa_to_spa(cxlrd))
> @@ -2996,9 +3129,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  			"Addr trans fail: hpa 0x%llx not in region\n", hpa);
>  		return ULLONG_MAX;
>  	}
> -
> -	/* Simple chunk check, by pos & gran, only applies to modulo decodes */
> -	if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
> +	/* Chunk check applies to aligned modulo decodes only */
> +	if (aligned && !has_hpa_to_spa(cxlrd) &&
> +	    !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
>  		return ULLONG_MAX;
>  
>  	return hpa;
> 
> base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
> -- 
> 2.37.3
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
  2025-10-14  6:28 [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions Alison Schofield
  2025-10-14  8:47 ` kernel test robot
  2025-10-14 18:03 ` Alison Schofield
@ 2025-10-29 12:03 ` Jonathan Cameron
  2025-10-30  3:07   ` Alison Schofield
  2 siblings, 1 reply; 5+ messages in thread
From: Jonathan Cameron @ 2025-10-29 12:03 UTC (permalink / raw)
  To: Alison Schofield
  Cc: Davidlohr Bueso, Dave Jiang, Vishal Verma, Ira Weiny,
	Dan Williams, linux-cxl, Qing Huang

On Mon, 13 Oct 2025 23:28:48 -0700
Alison Schofield <alison.schofield@intel.com> wrote:

> The CXL driver implementation of DPA->HPA address translation depends
> on a region's starting address always being aligned to Host Bridge
> Interleave Ways * 256MB. The driver follows the decode methods
> defined in the CXL Spec[1] and expanded upon in the CXL Driver Writers
> Guide[2], which describe bit manipulations based on power-of-2
> alignment to translate a DPA to an HPA.
> 
> With the introduction of MOD3 interleave way support, platforms may
> create regions at starting addresses that are not power-of-2 aligned.
> This allows platforms to avoid gaps in the memory map, but addresses
> within those regions cannot be translated using the existing bit
> manipulation method.
> 
> Introduce an unaligned translation method for DPA->HPA that
> reconstructs an HPA by restoring the address first at the port level
> and then at the host bridge level.
> 
> [1] CXL Spec 3.2 8.2.4.20.13 Implementation Note Device Decoder Logic
> [2] CXL Type 3 Memory Software Guide 1.1 2.13.25 DPA to HPA Translation
> 
> Suggested-by: Qing Huang <qing.huang@intel.com>
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Hi Alison,

A few minor inline you might want to tweak a little. 

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>

> ---
> 
> Changes in v2:
> - Add 6 and 12 Host Bridge interleaves to decode_pos() (Jonathan)
> - Limit the unalignment check to MOD3 regions
> - Move the cache_size increment to a single place 
> - Updated some in code comments
> - Rebase on v6.18-rc1 
> 
> Changes in v1 (was RFC):
> - Replace "/" with do_div() to quiet i386 build warning (lkp)
> - Replace 'cxld->interleave_ways' with 'hbiw' for clarity
> - Use div64_u64_rem() for alignment alignment
> - Fix up a printk format specifier (lkp)
> - Update code comments and commit log
> - Rebase on v6.17-rc7
> 
> 
>  drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++++--
>  1 file changed, 140 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index e14c1d305b22..3dc6f0ae9f19 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2934,13 +2934,124 @@ static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
>  	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
>  }
>  
> +static int decode_pos(int reg_ways, int hb_ways, int pos, int *pos_port,
> +		      int *pos_hb)
> +{
> +	int devices_per_hb;
> +
> +	/*
> +	 * Decode for 3-6-12 way interleaves as defined in the CXL
> +	 * Spec 3.2 9.13.1.1 Legal Interleaving Configurations.
> +	 */
> +	switch (hb_ways) {
> +	case 3: /* Supports 3-way, 6-way, or 12-way regions */

Does no harm I guess, but the comment seems a little unnecessary given
the line that immediately follows!

> +		if (reg_ways != 3 && reg_ways != 6 && reg_ways != 12)
> +			return -EINVAL;
> +
> +		devices_per_hb = reg_ways / 3;
You could drop this out of the switch as

	devices_per_hb = reg_ways / hb_ways;

Then the switch is simply checking for a valid config.

> +		break;
> +
> +	case 6: /* Supports 6-way or 12-way regions */
> +		if (reg_ways != 6 && reg_ways != 12)
> +			return -EINVAL;
> +
> +		devices_per_hb = reg_ways / 6;
> +		break;
> +
> +	case 12: /* Supports 12-way regions */
> +		if (reg_ways != 12)
> +			return -EINVAL;
> +
> +		devices_per_hb = 1;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	/* Calculate port and host bridge positions */
> +	*pos_port = pos % devices_per_hb;
> +	*pos_hb = pos / devices_per_hb;
> +
> +	return 0;
> +}

...

> +/*
> + * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
> + * start address is not aligned at Host Bridge Interleave Ways * 256MB.
> + *
> + * Unaligned start addresses only occur with MOD3 interleaves. All power-
> + * of-two interleaves are guaranteed aligned.
> + */
> +static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
> +				struct cxl_region_params *p, int pos, u64 dpa)
> +{
> +	int ways_port = p->interleave_ways / cxld->interleave_ways;
> +	int gran_port = p->interleave_granularity;
> +	int gran_hb = cxld->interleave_granularity;
> +	int ways_hb = cxld->interleave_ways;
> +	int pos_port, pos_hb, gran_shift;
> +	u64 shifted, hpa, hpa_port = 0;

Trivial but I really don't like mixing assignment and non assignments
in these.  Also can reduce scope of hpa and shifted which is probably a good
idea for readability.

> +
> +	/* Decode an endpoint 'pos' into port and host-bridge components */
> +	if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
> +		dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
> +			p->interleave_ways);
> +		return ULLONG_MAX;
> +	}

Trivial but I'd put a blank line here if you end up respinning.

> +	/* Restore the port parent address if needed */
> +	if (gran_hb != gran_port)
> +		hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
> +	else
> +		hpa_port = dpa;
> +
> +	/*
> +	 * Complete the HPA reconstruction by restoring the address as if
> +	 * each HB position is a candidate. Test against expected pos_hb
> +	 * to confirm match.
> +	 */
> +	gran_shift = ilog2(gran_hb);
> +	for (int index = 0; index < ways_hb; index++) {
Why not just i for the index?

		u64 hpa = restore_parent(hpa_port, index, gran_hb, ways_hb);

> +		hpa = restore_parent(hpa_port, index, gran_hb, ways_hb);
> +		hpa += p->res->start;
> +
> +		shifted = hpa >> gran_shift;
> +		if (do_div(shifted, ways_hb) == pos_hb)
> +			return hpa;
> +	}
> +
> +	dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
> +		pos);
> +	dev_dbg(&cxld->dev, "     port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
> +		ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
> +
> +	return ULLONG_MAX;
> +}
> +



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions
  2025-10-29 12:03 ` Jonathan Cameron
@ 2025-10-30  3:07   ` Alison Schofield
  0 siblings, 0 replies; 5+ messages in thread
From: Alison Schofield @ 2025-10-30  3:07 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Davidlohr Bueso, Dave Jiang, Vishal Verma, Ira Weiny,
	Dan Williams, linux-cxl, Qing Huang

On Wed, Oct 29, 2025 at 12:03:57PM +0000, Jonathan Cameron wrote:
> On Mon, 13 Oct 2025 23:28:48 -0700
> Alison Schofield <alison.schofield@intel.com> wrote:
> 
> > Suggested-by: Qing Huang <qing.huang@intel.com>
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> Hi Alison,
> 
> A few minor inline you might want to tweak a little. 
> 
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>

Thanks for the review...
> 
> >  drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 140 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index e14c1d305b22..3dc6f0ae9f19 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -2934,13 +2934,124 @@ static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
> >  	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
> >  }
> >  
> > +static int decode_pos(int reg_ways, int hb_ways, int pos, int *pos_port,
> > +		      int *pos_hb)
> > +{
> > +	int devices_per_hb;
> > +
> > +	/*
> > +	 * Decode for 3-6-12 way interleaves as defined in the CXL
> > +	 * Spec 3.2 9.13.1.1 Legal Interleaving Configurations.
> > +	 */
> > +	switch (hb_ways) {
> > +	case 3: /* Supports 3-way, 6-way, or 12-way regions */
> 
> Does no harm I guess, but the comment seems a little unnecessary given
> the line that immediately follows!

I've removed my excessive narration. Thanks.

> 
> > +		if (reg_ways != 3 && reg_ways != 6 && reg_ways != 12)
> > +			return -EINVAL;
> > +
> > +		devices_per_hb = reg_ways / 3;
> You could drop this out of the switch as
> 
> 	devices_per_hb = reg_ways / hb_ways;
> 
> Then the switch is simply checking for a valid config.

Nice, done!

> 
> > +		break;
> > +
> > +	case 6: /* Supports 6-way or 12-way regions */
> > +		if (reg_ways != 6 && reg_ways != 12)
> > +			return -EINVAL;
> > +
> > +		devices_per_hb = reg_ways / 6;
> > +		break;
> > +
> > +	case 12: /* Supports 12-way regions */
> > +		if (reg_ways != 12)
> > +			return -EINVAL;
> > +
> > +		devices_per_hb = 1;
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +	/* Calculate port and host bridge positions */
> > +	*pos_port = pos % devices_per_hb;
> > +	*pos_hb = pos / devices_per_hb;
> > +
> > +	return 0;
> > +}
> 
> ...
> 
> > +/*
> > + * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
> > + * start address is not aligned at Host Bridge Interleave Ways * 256MB.
> > + *
> > + * Unaligned start addresses only occur with MOD3 interleaves. All power-
> > + * of-two interleaves are guaranteed aligned.
> > + */
> > +static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
> > +				struct cxl_region_params *p, int pos, u64 dpa)
> > +{
> > +	int ways_port = p->interleave_ways / cxld->interleave_ways;
> > +	int gran_port = p->interleave_granularity;
> > +	int gran_hb = cxld->interleave_granularity;
> > +	int ways_hb = cxld->interleave_ways;
> > +	int pos_port, pos_hb, gran_shift;
> > +	u64 shifted, hpa, hpa_port = 0;
> 
> Trivial but I really don't like mixing assignment and non assignments
> in these.  Also can reduce scope of hpa and shifted which is probably a good
> idea for readability.

Done.
Moved shifted and hpa to their smaller scope.
Un-mixed assigns and non-assign declarations.

> 
> > +
> > +	/* Decode an endpoint 'pos' into port and host-bridge components */
> > +	if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
> > +		dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
> > +			p->interleave_ways);
> > +		return ULLONG_MAX;
> > +	}
> 
> Trivial but I'd put a blank line here if you end up respinning.

Done

> 
> > +	/* Restore the port parent address if needed */
> > +	if (gran_hb != gran_port)
> > +		hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
> > +	else
> > +		hpa_port = dpa;
> > +
> > +	/*
> > +	 * Complete the HPA reconstruction by restoring the address as if
> > +	 * each HB position is a candidate. Test against expected pos_hb
> > +	 * to confirm match.
> > +	 */
> > +	gran_shift = ilog2(gran_hb);
> > +	for (int index = 0; index < ways_hb; index++) {
> Why not just i for the index?

hmm...I was ready to just switch to i for brevity, but since you
asked 'why', I stared longer and realized it should be renamed 'pos'.
That is what it is and that matches the restore_parent() definition.

> 
> 		u64 hpa = restore_parent(hpa_port, index, gran_hb, ways_hb);
> 

Thanks for reviewing!
--Alison

> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-10-30  3:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-14  6:28 [PATCH v2] cxl/region: Translate DPA->HPA in unaligned MOD3 regions Alison Schofield
2025-10-14  8:47 ` kernel test robot
2025-10-14 18:03 ` Alison Schofield
2025-10-29 12:03 ` Jonathan Cameron
2025-10-30  3:07   ` Alison Schofield

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.