public inbox for linux-cxl@vger.kernel.org
 help / color / mirror / Atom feed
From: "Cheatham, Benjamin" <benjamin.cheatham@amd.com>
To: Dave Jiang <dave.jiang@intel.com>, <nvdimm@lists.linux.dev>
Cc: <linux-cxl@vger.kernel.org>, <alison.schofield@intel.com>
Subject: Re: [ndctl PATCH v3 2/7] libcxl: Add CXL protocol errors
Date: Thu, 23 Oct 2025 15:15:14 -0500	[thread overview]
Message-ID: <d89d9d02-6b9a-4818-9f27-58f565237fe6@amd.com> (raw)
In-Reply-To: <bd50a175-0e4b-4c65-910d-df2d1ae52be8@intel.com>

On 10/21/2025 6:15 PM, Dave Jiang wrote:
> 
> 
> On 10/21/25 11:31 AM, Ben Cheatham wrote:
>> The v6.11 Linux kernel adds CXL protocl (CXL.cache & CXL.mem) error
>> injection for platforms that implement the error types as according to
>> the v6.5+ ACPI specification. The interface for injecting these errors
>> are provided by the kernel under the CXL debugfs. The relevant files in
>> the interface are the einj_types file, which provides the available CXL
>> error types for injection, and the einj_inject file, which injects the
>> error into a CXL VH root port or CXL RCH downstream port.
>>
>> Add a library API to retrieve the CXL error types and inject them. This
>> API will be used in a later commit by the 'cxl-inject-error' and
>> 'cxl-list' commands.
>>
>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
>> ---
>>  cxl/lib/libcxl.c   | 174 +++++++++++++++++++++++++++++++++++++++++++++
>>  cxl/lib/libcxl.sym |   5 ++
>>  cxl/lib/private.h  |  14 ++++
>>  cxl/libcxl.h       |  13 ++++
>>  4 files changed, 206 insertions(+)
>>
>> diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
>> index ea5831f..9486b0f 100644
>> --- a/cxl/lib/libcxl.c
>> +++ b/cxl/lib/libcxl.c
>> @@ -46,11 +46,13 @@ struct cxl_ctx {
>>  	void *userdata;
>>  	int memdevs_init;
>>  	int buses_init;
>> +	int perrors_init;
>>  	unsigned long timeout;
>>  	struct udev *udev;
>>  	struct udev_queue *udev_queue;
>>  	struct list_head memdevs;
>>  	struct list_head buses;
>> +	struct list_head perrors;
>>  	struct kmod_ctx *kmod_ctx;
>>  	struct daxctl_ctx *daxctl_ctx;
>>  	void *private_data;
>> @@ -205,6 +207,14 @@ static void free_bus(struct cxl_bus *bus, struct list_head *head)
>>  	free(bus);
>>  }
>>  
>> +static void free_protocol_error(struct cxl_protocol_error *perror,
>> +				struct list_head *head)
>> +{
>> +	if (head)
>> +		list_del_from(head, &perror->list);
> 
> I would go if (!head) return;
> 

Would that work? I think I would still need to free perror below.

>> +	free(perror);
>> +}
>> +
>>  /**
>>   * cxl_get_userdata - retrieve stored data pointer from library context
>>   * @ctx: cxl library context
>> @@ -328,6 +338,7 @@ CXL_EXPORT int cxl_new(struct cxl_ctx **ctx)
>>  	*ctx = c;
>>  	list_head_init(&c->memdevs);
>>  	list_head_init(&c->buses);
>> +	list_head_init(&c->perrors);
>>  	c->kmod_ctx = kmod_ctx;
>>  	c->daxctl_ctx = daxctl_ctx;
>>  	c->udev = udev;
>> @@ -369,6 +380,7 @@ CXL_EXPORT struct cxl_ctx *cxl_ref(struct cxl_ctx *ctx)
>>   */
>>  CXL_EXPORT void cxl_unref(struct cxl_ctx *ctx)
>>  {
>> +	struct cxl_protocol_error *perror, *_p;
>>  	struct cxl_memdev *memdev, *_d;
>>  	struct cxl_bus *bus, *_b;
>>  
>> @@ -384,6 +396,9 @@ CXL_EXPORT void cxl_unref(struct cxl_ctx *ctx)
>>  	list_for_each_safe(&ctx->buses, bus, _b, port.list)
>>  		free_bus(bus, &ctx->buses);
>>  
>> +	list_for_each_safe(&ctx->perrors, perror, _p, list)
>> +		free_protocol_error(perror, &ctx->perrors);
>> +
>>  	udev_queue_unref(ctx->udev_queue);
>>  	udev_unref(ctx->udev);
>>  	kmod_unref(ctx->kmod_ctx);
>> @@ -3416,6 +3431,165 @@ CXL_EXPORT int cxl_port_decoders_committed(struct cxl_port *port)
>>  	return port->decoders_committed;
>>  }
>>  
>> +const struct cxl_protocol_error cxl_protocol_errors[] = {
>> +	CXL_PROTOCOL_ERROR(12, "cache-correctable"),
>> +	CXL_PROTOCOL_ERROR(13, "cache-uncorrectable"),
>> +	CXL_PROTOCOL_ERROR(14, "cache-fatal"),
>> +	CXL_PROTOCOL_ERROR(15, "mem-correctable"),
>> +	CXL_PROTOCOL_ERROR(16, "mem-uncorrectable"),
>> +	CXL_PROTOCOL_ERROR(17, "mem-fatal")
>> +};
>> +
>> +static struct cxl_protocol_error *create_cxl_protocol_error(struct cxl_ctx *ctx,
>> +							    unsigned long n)
> 
> why unsigned long instead of int? are there that many errors?
>

No there aren't. I'll change it over to unsigned int instead.

>> +{
>> +	struct cxl_protocol_error *perror;
>> +
>> +	for (unsigned long i = 0; i < ARRAY_SIZE(cxl_protocol_errors); i++) {
>> +		if (n != BIT(cxl_protocol_errors[i].num))
>> +			continue;
>> +
>> +		perror = calloc(1, sizeof(*perror));
>> +		if (!perror)
>> +			return NULL;
>> +
>> +		*perror = cxl_protocol_errors[i];
>> +		perror->ctx = ctx;
>> +		return perror;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static void cxl_add_protocol_errors(struct cxl_ctx *ctx)
>> +{
>> +	struct cxl_protocol_error *perror;
>> +	char *path, *num, *save;
>> +	unsigned long n;
>> +	size_t path_len;
>> +	char buf[512];
> 
> Use SYSFS_ATTR_SIZE rather than 512

Wasn't aware of that, will do!

> 
>> +	int rc = 0;
>> +
>> +	if (!ctx->debugfs)
>> +		return;
>> +
>> +	path_len = strlen(ctx->debugfs) + 100;
>> +	path = calloc(1, path_len);
>> +	if (!path)
>> +		return;
>> +
>> +	snprintf(path, path_len, "%s/cxl/einj_types", ctx->debugfs);
>> +	rc = access(path, F_OK);
>> +	if (rc) {
>> +		err(ctx, "failed to access %s: %s\n", path, strerror(-rc));
> strerror(errno)? access() returns -1 and the actual error is in errno.

My bad, will update it (and elsewhere).

>> +		goto err;
>> +	}
>> +
>> +	rc = sysfs_read_attr(ctx, path, buf);
>> +	if (rc) {
>> +		err(ctx, "failed to read %s: %s\n", path, strerror(-rc));
>> +		goto err;
>> +	}
>> +
>> +	/*
>> +	 * The format of the output of the einj_types attr is:
>> +	 * <Error number in hex 1> <Error name 1>
>> +	 * <Error number in hex 2> <Error name 2>
>> +	 * ...
>> +	 *
>> +	 * We only need the number, so parse that and skip the rest of
>> +	 * the line.
>> +	 */
>> +	num = strtok_r(buf, " \n", &save);
>> +	while (num) {
>> +		n = strtoul(num, NULL, 16);
>> +		perror = create_cxl_protocol_error(ctx, n);
>> +		if (perror)
>> +			list_add(&ctx->perrors, &perror->list);
>> +
>> +		num = strtok_r(NULL, "\n", &save);
>> +		if (!num)
>> +			break;
>> +
>> +		num = strtok_r(NULL, " \n", &save);
>> +	}
>> +
>> +err:
>> +	free(path);
>> +}
>> +
>> +static void cxl_protocol_errors_init(struct cxl_ctx *ctx)
>> +{
>> +	if (ctx->perrors_init)
>> +		return;
>> +
>> +	ctx->perrors_init = 1;
>> +	cxl_add_protocol_errors(ctx);
>> +}
>> +
>> +CXL_EXPORT struct cxl_protocol_error *
>> +cxl_protocol_error_get_first(struct cxl_ctx *ctx)
>> +{
>> +	cxl_protocol_errors_init(ctx);
>> +
>> +	return list_top(&ctx->perrors, struct cxl_protocol_error, list);
>> +}
>> +
>> +CXL_EXPORT struct cxl_protocol_error *
>> +cxl_protocol_error_get_next(struct cxl_protocol_error *perror)
>> +{
>> +	struct cxl_ctx *ctx = perror->ctx;
>> +
>> +	return list_next(&ctx->perrors, perror, list);
>> +}
>> +
>> +CXL_EXPORT unsigned long
>> +cxl_protocol_error_get_num(struct cxl_protocol_error *perror)
>> +{
>> +	return perror->num;
>> +}
>> +
>> +CXL_EXPORT const char *
>> +cxl_protocol_error_get_str(struct cxl_protocol_error *perror)
>> +{
>> +	return perror->string;
>> +}
>> +
>> +CXL_EXPORT int cxl_dport_protocol_error_inject(struct cxl_dport *dport,
>> +					       unsigned long error)
>> +{
>> +	struct cxl_ctx *ctx = dport->port->ctx;
>> +	unsigned long path_len;
>> +	char buf[32] = { 0 };
>> +	char *path;
>> +	int rc;
>> +
>> +	if (!ctx->debugfs)
>> +		return -ENOENT;
>> +
>> +	path_len = strlen(ctx->debugfs) + 100;
>> +	path = calloc(path_len, sizeof(char));
>> +	if (!path)
>> +		return -ENOMEM;
>> +
>> +	snprintf(path, path_len, "%s/cxl/%s/einj_inject", ctx->debugfs,
>> +		 cxl_dport_get_devname(dport));
> 
> check return value

Yep, will do (elsewhere as well).

> 
>> +	rc = access(path, F_OK);
>> +	if (rc) {
>> +		err(ctx, "failed to access %s: %s\n", path, strerror(-rc));
> 
> errno
> 
>> +		free(path);
>> +		return rc;
> -errno instead of rc
> 
>> +	}
>> +
>> +	snprintf(buf, sizeof(buf), "0x%lx\n", error);
> 
> check return value?
> 
> DJ
>

  reply	other threads:[~2025-10-23 20:15 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-21 18:31 [ndctl PATCH v3 0/7] Add error injection support Ben Cheatham
2025-10-21 18:31 ` [ndctl PATCH v3 1/7] libcxl: Add debugfs path to CXL context Ben Cheatham
2025-10-21 22:55   ` Dave Jiang
2025-10-23 20:15     ` Cheatham, Benjamin
2025-10-21 18:31 ` [ndctl PATCH v3 2/7] libcxl: Add CXL protocol errors Ben Cheatham
2025-10-21 23:15   ` Dave Jiang
2025-10-23 20:15     ` Cheatham, Benjamin [this message]
2025-10-23 22:50       ` Dave Jiang
2025-10-21 18:31 ` [ndctl PATCH v3 3/7] libcxl: Add poison injection support Ben Cheatham
2025-10-21 23:44   ` Dave Jiang
2025-10-23 20:15     ` Cheatham, Benjamin
2025-10-21 18:31 ` [ndctl PATCH v3 4/7] cxl: Add inject-error command Ben Cheatham
2025-10-22 17:06   ` Dave Jiang
2025-10-23 20:15     ` Cheatham, Benjamin
2025-10-23 22:51       ` Dave Jiang
2025-10-21 18:31 ` [ndctl PATCH v3 5/7] cxl: Add clear-error command Ben Cheatham
2025-10-21 18:31 ` [ndctl PATCH v3 6/7] cxl/list: Add injectable errors in output Ben Cheatham
2025-10-22 17:18   ` Dave Jiang
2025-10-21 18:31 ` [ndctl PATCH v3 7/7] Documentation: Add docs for inject/clear-error commands Ben Cheatham
2025-10-22 17:22   ` Dave Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d89d9d02-6b9a-4818-9f27-58f565237fe6@amd.com \
    --to=benjamin.cheatham@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox