All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
To: Ben Widawsky <ben.widawsky@intel.com>
Cc: <linux-cxl@vger.kernel.org>,
	Alison Schofield <alison.schofield@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	"Ira Weiny" <ira.weiny@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>
Subject: Re: [PATCH 12/13] cxl/core/bus: Enumerate all HDM decoders
Date: Fri, 3 Sep 2021 18:43:47 +0100	[thread overview]
Message-ID: <20210903184347.00002b32@Huawei.com> (raw)
In-Reply-To: <20210902195017.2516472-13-ben.widawsky@intel.com>

On Thu, 2 Sep 2021 12:50:16 -0700
Ben Widawsky <ben.widawsky@intel.com> wrote:

> As of the CXL 2.0 specification, every port will have between 1 and 10
> HDM decoders available in hardware. These exist in the endpoint, switch,
> and top level hostbridges. HDM decoders are required for configuration
> CXL regions, and therefore enumerating them is an important first step.
> 
> As an example, the below has 4 decoders, a top level CFMWS decoder
> (0.0), a single decoder in a single host bridge (1.0), and two devices
> each with 1 decoder (2.0 and 3.0)
> 
> ├── decoder0.0 -> ../../../devices/platform/ACPI0017:00/root0/decoder0.0
> ├── decoder1.0 -> ../../../devices/platform/ACPI0017:00/root0/port1/decoder1.0
> ├── decoder2.0 -> ../../../devices/platform/ACPI0017:00/root0/port1/devport2/decoder2.0
> ├── decoder3.0 -> ../../../devices/platform/ACPI0017:00/root0/port1/devport3/decoder3.0
> 
> Additionally, attributes are added for a port:
> 
> /sys/bus/cxl/devices/port1
> ├── active_decoders
> ├── decoder_count
> ├── decoder_enabled
> ├── max_target_count
> ...
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>

Documentation/ABI/testing/sysfs-bus-cxl  needs updating.

Various minor things inline.


> ---
>  drivers/cxl/core/bus.c | 161 ++++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/cxl.h      |  54 ++++++++++++--
>  2 files changed, 209 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
> index d056dbd794a4..b75e42965e89 100644
> --- a/drivers/cxl/core/bus.c
> +++ b/drivers/cxl/core/bus.c
> @@ -43,6 +43,15 @@ struct attribute_group cxl_base_attribute_group = {
>  	.attrs = cxl_base_attributes,
>  };
>  
> +static ssize_t enabled_show(struct device *dev, struct device_attribute *attr,
> +			    char *buf)
> +{
> +	struct cxl_decoder *cxld = to_cxl_decoder(dev);
> +
> +	return sysfs_emit(buf, "%d\n", !!cxld->decoder_enabled);
> +}
> +static DEVICE_ATTR_RO(enabled);
> +
>  static ssize_t start_show(struct device *dev, struct device_attribute *attr,
>  			  char *buf)
>  {
> @@ -130,6 +139,7 @@ static ssize_t target_list_show(struct device *dev,
>  static DEVICE_ATTR_RO(target_list);
>  
>  static struct attribute *cxl_decoder_base_attrs[] = {
> +	&dev_attr_enabled.attr,
>  	&dev_attr_start.attr,
>  	&dev_attr_size.attr,
>  	&dev_attr_locked.attr,
> @@ -249,8 +259,48 @@ static void cxl_port_release(struct device *dev)
>  	kfree(port);
>  }
>  
> +static ssize_t active_decoders_show(struct device *dev,
> +				    struct device_attribute *attr, char *buf)
> +{
> +	struct cxl_port *port = to_cxl_port(dev);
> +
> +	return sysfs_emit(buf, "%*pbl\n", port->decoder_cap.count,
> +			  port->used_decoders);
> +}
> +static DEVICE_ATTR_RO(active_decoders);
> +
> +static ssize_t decoder_count_show(struct device *dev,
> +				  struct device_attribute *attr, char *buf)
> +{
> +	struct cxl_port *port = to_cxl_port(dev);
> +
> +	return sysfs_emit(buf, "%d\n", port->decoder_cap.count);
> +}
> +static DEVICE_ATTR_RO(decoder_count);
> +
> +static ssize_t max_target_count_show(struct device *dev,
> +				     struct device_attribute *attr, char *buf)
> +{
> +	struct cxl_port *port = to_cxl_port(dev);
> +
> +	return sysfs_emit(buf, "%d\n", port->decoder_cap.target_count);
> +}
> +static DEVICE_ATTR_RO(max_target_count);
> +
> +static struct attribute *cxl_port_caps_attributes[] = {
> +	&dev_attr_active_decoders.attr,
> +	&dev_attr_decoder_count.attr,
> +	&dev_attr_max_target_count.attr,
> +	NULL,
> +};
> +
> +struct attribute_group cxl_port_attribute_group = {
> +	.attrs = cxl_port_caps_attributes,
> +};
> +
>  static const struct attribute_group *cxl_port_attribute_groups[] = {
>  	&cxl_base_attribute_group,
> +	&cxl_port_attribute_group,
>  	NULL,
>  };
>  
> @@ -341,6 +391,107 @@ static int cxl_port_map_component_registers(struct cxl_port *port)
>  	return 0;
>  }
>  
> +static int port_populate_caps(struct cxl_port *port)
> +{
> +	void __iomem *hdm_decoder = port->regs.hdm_decoder;
> +	u32 hdm_cap;
> +
> +	hdm_cap = readl(hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
> +
> +	port->used_decoders = devm_bitmap_zalloc(&port->dev,
> +						 cxl_hdm_decoder_count(hdm_cap),
> +						 GFP_KERNEL);

Mentioned below. No advantage that I can see in dynamic allocation.
Also, not really populating anything..

> +	if (!port->used_decoders)
> +		return -ENOMEM;
> +
> +	port->decoder_cap.count = cxl_hdm_decoder_count(hdm_cap);
> +	port->decoder_cap.target_count =
> +		FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
> +	port->decoder_cap.interleave11_8 =
> +		FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap);
> +	port->decoder_cap.interleave14_12 =
> +		FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap);
> +
> +	return 0;
> +}
> +
> +static int cxl_port_enumerate_hdm_decoders(struct device *host,
> +					   struct cxl_port *port)
> +{
> +	void __iomem *hdm_decoder = port->regs.hdm_decoder;
> +	u32 hdm_ctrl;
> +	int i, rc = 0;
> +
> +	rc = port_populate_caps(port);
> +	if (rc)
> +		return rc;
> +
> +	if (port->decoder_cap.count == 0) {
> +		dev_warn(host, "Found no HDM decoders\n");

No HDM decoders found.

> +		return -ENODEV;
> +	}
> +
> +	for (i = 0; i < port->decoder_cap.count; i++) {
> +		enum cxl_decoder_type type = CXL_DECODER_EXPANDER;
> +		struct resource res = DEFINE_RES_MEM(0, 0);
> +		struct cxl_decoder *cxld;
> +		int iw = 0, ig = 0;
> +		u32 ctrl;
> +
> +		cxld = cxl_decoder_alloc(port, is_endpoint_decoder(host) ? 0 :
> +					 port->decoder_cap.target_count);
> +		if (IS_ERR(cxld)) {
> +			dev_warn(host, "Failed to allocate the decoder\n");
> +			return PTR_ERR(cxld);
> +		}
> +
> +		ctrl = readl(hdm_decoder + CXL_HDM_DECODER0_CTRL_OFFSET(i));
> +		cxld->decoder_enabled =
> +			!!FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl);
> +		/* If the decoder is already active, parse info */
> +		if (cxld->decoder_enabled) {
> +			set_bit(i, port->used_decoders);
> +			iw = cxl_hdm_decoder_iw(ctrl);
> +			ig = cxl_hdm_decoder_ig(ctrl);
> +			if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl) == 0)
> +				type = CXL_DECODER_ACCELERATOR;
> +			res.start = readl(hdm_decoder +
> +					  CXL_HDM_DECODER0_BASE_LOW_OFFSET(i));
> +			res.start |=
> +				(u64)readl(hdm_decoder +
> +					   CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i))
> +				<< 32;

Perhaps worth thinking about exposing if the decoder is locked as well?
Might be good to let userspace know that..

> +		}
> +
> +		cxld->target_type = type;
> +		cxld->res = res;
> +		cxld->interleave_ways = iw;
> +		cxld->interleave_granularity = ig;
> +
> +		rc = cxl_decoder_add(host, cxld, NULL);
> +		if (rc) {
> +			dev_warn(host, "Failed to add decoder (%d)\n", rc);
> +			kfree(cxld);
> +			goto out;
> +		}
> +	}
> +
> +	/*
> +	 * Enable CXL.mem decoding via MMIO for endpoint devices
> +	 *
> +	 * TODO: If a memory device was configured to participate in a region by
> +	 * system firmware via DVSEC, this will break that region.

That seems to me like fatal for this patch set until fixed.
I'm not sure I understand why it will break a region though as I'd assume it
would be already on?

> +	 */
> +	if (is_endpoint_decoder(host)) {
> +		hdm_ctrl = readl(hdm_decoder + CXL_HDM_DECODER_CTRL_OFFSET);
> +		writel(hdm_ctrl | CXL_HDM_DECODER_ENABLE,
> +		       hdm_decoder + CXL_HDM_DECODER_CTRL_OFFSET);
> +	}
> +
> +out:

direct returns are much easier to review...


> +	return rc;
> +}
> +
>  static struct cxl_port *cxl_port_alloc(struct device *uport,
>  				       resource_size_t component_reg_phys,
>  				       struct cxl_port *parent_port)
> @@ -432,8 +583,16 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
>  	/* Platform "switch" has no parent port or component registers */
>  	if (parent_port) {
>  		rc = cxl_port_map_component_registers(port);
> -		if (rc)
> +		if (rc) {
> +			dev_err(host, "Failed to map component registers\n");

*grump*  Unrelated change.  I guess you could sort of argue it's needed to
distinguish the different errors, but that's a stretch.


>  			return ERR_PTR(rc);
> +		}
> +
> +		rc = cxl_port_enumerate_hdm_decoders(host, port);
> +		if (rc) {
> +			dev_err(host, "Failed to enumerate HDM decoders\n");
> +			return ERR_PTR(rc);
> +		}
>  	}
>  
>  	return port;
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index e610fa9dd6c8..6759fe097e12 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -36,11 +36,19 @@
>  #define CXL_HDM_DECODER_CAP_OFFSET 0x0
>  #define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
>  #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
> -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10
> -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14
> -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18
> -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c
> -#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20
> +#define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
> +#define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
> +#define CXL_HDM_DECODER_CTRL_OFFSET 0x0
> +#define   CXL_HDM_DECODER_ENABLE BIT(1)
> +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x10 + (i) * 0x20)

If you are doing the macro to access higher decoders, don't keep
the DECODER0 naming.  DECODERX perhaps?

> +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x14 + (i) * 0x20)
> +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x18 + (i) * 0x20)
> +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x1c + (i) * 0x20)
> +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 + (i) * 0x20)
> +#define   CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
> +#define   CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
> +#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
> +#define   CXL_HDM_DECODER0_CTRL_TYPE BIT(12)

Perhaps name this bit to indicate that 1 == type 3 device. Otherwise
need a macro to define type2 and one for type3.

>  
>  static inline int cxl_hdm_decoder_count(u32 cap_hdr)
>  {
> @@ -49,6 +57,20 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr)
>  	return val ? val * 2 : 1;
>  }
>  
> +static inline int cxl_hdm_decoder_ig(u32 ctrl)
> +{
> +	int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl);
> +
> +	return 8 + val;
> +}
> +
> +static inline int cxl_hdm_decoder_iw(u32 ctrl)
> +{
> +	int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl);
> +
> +	return 1 << val;
I guess there isn't a strong reason to clamp this to values the
spec actually allows.  Perhaps a comment to say that for this and ig?
> +}
> +
>  /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
>  #define CXLDEV_CAP_ARRAY_OFFSET 0x0
>  #define   CXLDEV_CAP_ARRAY_CAP_ID 0
> @@ -188,6 +210,12 @@ enum cxl_decoder_type {
>   */
>  #define CXL_DECODER_MAX_INTERLEAVE 16
>  
> +/*
> + * Current specification goes up to 10 double that seems a reasonable
> + * software max for the foreseeable future

I'd stick to 10 until we have evidence otherwise... If you happen to
have a mysterious strong reason to go with 20 though I'm not that fussed
but I suspect others may have equally strong reasons to pick another number ;)

> + */
> +#define CXL_DECODER_MAX_COUNT 20
> +
>  /**
>   * struct cxl_decoder - CXL address range decode configuration
>   * @dev: this decoder's device
> @@ -197,6 +225,7 @@ enum cxl_decoder_type {
>   * @interleave_granularity: data stride per dport

Is it currently documented anywhere that ig is in 2**(ig) bytes?
Might be worth adding if not.

>   * @target_type: accelerator vs expander (type2 vs type3) selector
>   * @flags: memory type capabilities and locking
> + * @decoder_enabled: Is this decoder currently decoding
>   * @nr_targets: number of elements in @target
>   * @target: active ordered target list in current decoder configuration
>   */
> @@ -208,6 +237,7 @@ struct cxl_decoder {
>  	int interleave_granularity;
>  	enum cxl_decoder_type target_type;
>  	unsigned long flags;
> +	bool decoder_enabled;
>  	int nr_targets;
>  	struct cxl_dport *target[];
>  };
> @@ -255,6 +285,12 @@ struct cxl_walk_context {
>   * @decoder_ida: allocator for decoder ids
>   * @component_reg_phys: component register capability base address (optional)
>   * @regs: Mapped version of @component_reg_phys
> + * @used_decoders: Bitmap of currently active decoders for the port
> + * @decoder_cap: Capabilities of all decoders contained by the port
> + * @decoder_cap.count: Count of HDM decoders for the port
> + * @decoder_cap.target_count: Max number of interleaved downstream ports
> + * @decoder_cap.interleave11_8: Are address bits 11-8 available for interleave
> + * @decoder_cap.interleave14_12: Are address bits 14-12 available for interleave
>   */
>  struct cxl_port {
>  	struct device dev;
> @@ -264,6 +300,14 @@ struct cxl_port {
>  	struct ida decoder_ida;
>  	resource_size_t component_reg_phys;
>  	struct cxl_component_regs regs;
> +
> +	unsigned long *used_decoders;

Given it's not huge use appropriate macro to allocate
it directly here with the maximum of 10.
DECLARE_BITMAP() in similar fashion to patch 19 in Dan's
recent series.

https://lore.kernel.org/all/162982122744.1124374.6742215706893563515.stgit@dwillia2-desk3.amr.corp.intel.com/

(I'm liking lore having "all" now !)

> +	struct {
> +		int count;
> +		int target_count;
> +		bool interleave11_8;
> +		bool interleave14_12;
> +	} decoder_cap;
>  };
>  
>  /**


  reply	other threads:[~2021-09-03 17:43 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-02 19:50 [PATCH 00/13] Enumerate midlevel and endpoint decoders Ben Widawsky
2021-09-02 19:50 ` [PATCH 01/13] Documentation/cxl: Add bus internal docs Ben Widawsky
2021-09-03 14:05   ` Jonathan Cameron
2021-09-10 18:20     ` Dan Williams
2021-09-02 19:50 ` [PATCH 02/13] cxl/core/bus: Add kernel docs for decoder ops Ben Widawsky
2021-09-03 14:17   ` Jonathan Cameron
2021-09-10 18:51   ` Dan Williams
2021-09-11 17:25     ` Ben Widawsky
2021-09-02 19:50 ` [PATCH 03/13] cxl/core: Ignore interleave when adding decoders Ben Widawsky
2021-09-03 14:25   ` Jonathan Cameron
2021-09-10 19:00     ` Dan Williams
2021-09-11 17:30       ` Ben Widawsky
2021-09-02 19:50 ` [PATCH 04/13] cxl: Introduce endpoint decoders Ben Widawsky
2021-09-03 14:35   ` Jonathan Cameron
2021-09-13 16:19     ` Ben Widawsky
2021-09-10 19:19   ` Dan Williams
2021-09-13 16:11     ` Ben Widawsky
2021-09-13 22:07       ` Dan Williams
2021-09-13 23:19         ` Ben Widawsky
2021-09-14 21:16           ` Dan Williams
2021-09-02 19:50 ` [PATCH 05/13] cxl/pci: Disambiguate cxl_pci further from cxl_mem Ben Widawsky
2021-09-03 14:45   ` Jonathan Cameron
2021-09-10 19:27   ` Dan Williams
2021-09-02 19:50 ` [PATCH 06/13] cxl/mem: Introduce cxl_mem driver Ben Widawsky
2021-09-03 14:52   ` Jonathan Cameron
2021-09-10 21:32   ` Dan Williams
2021-09-13 16:46     ` Ben Widawsky
2021-09-13 19:37       ` Dan Williams
2021-09-02 19:50 ` [PATCH 07/13] cxl/memdev: Determine CXL.mem capability Ben Widawsky
2021-09-03 15:21   ` Jonathan Cameron
2021-09-13 19:01     ` Ben Widawsky
2021-09-10 21:59   ` Dan Williams
2021-09-13 22:10     ` Ben Widawsky
2021-09-14 22:42       ` Dan Williams
2021-09-14 22:55         ` Ben Widawsky
2021-09-02 19:50 ` [PATCH 08/13] cxl/mem: Add memdev as a port Ben Widawsky
2021-09-03 15:31   ` Jonathan Cameron
2021-09-10 23:09   ` Dan Williams
2021-09-02 19:50 ` [PATCH 09/13] cxl/pci: Retain map information in cxl_mem_probe Ben Widawsky
2021-09-10 23:12   ` Dan Williams
2021-09-10 23:45     ` Dan Williams
2021-09-02 19:50 ` [PATCH 10/13] cxl/core: Map component registers for ports Ben Widawsky
2021-09-02 22:41   ` Ben Widawsky
2021-09-02 22:42     ` Ben Widawsky
2021-09-03 16:14   ` Jonathan Cameron
2021-09-10 23:52     ` Dan Williams
2021-09-13  8:29       ` Jonathan Cameron
2021-09-10 23:44   ` Dan Williams
2021-09-02 19:50 ` [PATCH 11/13] cxl/core: Convert decoder range to resource Ben Widawsky
2021-09-03 16:16   ` Jonathan Cameron
2021-09-11  0:59   ` Dan Williams
2021-09-02 19:50 ` [PATCH 12/13] cxl/core/bus: Enumerate all HDM decoders Ben Widawsky
2021-09-03 17:43   ` Jonathan Cameron [this message]
2021-09-11  1:37     ` Dan Williams
2021-09-11  1:13   ` Dan Williams
2021-09-02 19:50 ` [PATCH 13/13] cxl/mem: Enumerate switch decoders Ben Widawsky
2021-09-03 17:56   ` Jonathan Cameron
2021-09-13 22:12     ` Ben Widawsky
2021-09-14 23:31   ` Dan Williams
2021-09-10 18:15 ` [PATCH 00/13] Enumerate midlevel and endpoint decoders Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210903184347.00002b32@Huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=alison.schofield@intel.com \
    --cc=ben.widawsky@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=ira.weiny@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=vishal.l.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.