* [PATCH v5 1/7] libcxl: Add debugfs path to CXL context
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-17 4:26 ` Alison Schofield
2025-12-15 21:36 ` [PATCH v5 2/7] libcxl: Add CXL protocol errors Ben Cheatham
` (7 subsequent siblings)
8 siblings, 1 reply; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Find the CXL debugfs mount point and add it to the CXL library context.
This will be used by poison and procotol error library functions to
access the information presented by the filesystem.
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
cxl/lib/libcxl.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index cafde1c..71eff6d 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>
+#include <mntent.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -54,6 +55,7 @@ struct cxl_ctx {
struct kmod_ctx *kmod_ctx;
struct daxctl_ctx *daxctl_ctx;
void *private_data;
+ const char *debugfs;
};
static void free_pmem(struct cxl_pmem *pmem)
@@ -240,6 +242,28 @@ CXL_EXPORT void *cxl_get_private_data(struct cxl_ctx *ctx)
return ctx->private_data;
}
+static const char* get_debugfs_dir(void)
+{
+ char *debugfs_dir = NULL;
+ struct mntent *ent;
+ FILE *mntf;
+
+ mntf = setmntent("/proc/mounts", "r");
+ if (!mntf)
+ return NULL;
+
+ while ((ent = getmntent(mntf)) != NULL) {
+ if (!strcmp(ent->mnt_type, "debugfs")) {
+ debugfs_dir = calloc(strlen(ent->mnt_dir) + 1, 1);
+ strcpy(debugfs_dir, ent->mnt_dir);
+ break;
+ }
+ }
+
+ endmntent(mntf);
+ return debugfs_dir;
+}
+
/**
* cxl_new - instantiate a new library context
* @ctx: context to establish
@@ -295,6 +319,7 @@ CXL_EXPORT int cxl_new(struct cxl_ctx **ctx)
c->udev = udev;
c->udev_queue = udev_queue;
c->timeout = 5000;
+ c->debugfs = get_debugfs_dir();
return 0;
@@ -350,6 +375,7 @@ CXL_EXPORT void cxl_unref(struct cxl_ctx *ctx)
kmod_unref(ctx->kmod_ctx);
daxctl_unref(ctx->daxctl_ctx);
info(ctx, "context %p released\n", ctx);
+ free((void *)ctx->debugfs);
free(ctx);
}
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v5 1/7] libcxl: Add debugfs path to CXL context
2025-12-15 21:36 ` [PATCH v5 1/7] libcxl: Add debugfs path to CXL context Ben Cheatham
@ 2025-12-17 4:26 ` Alison Schofield
2025-12-17 19:55 ` Cheatham, Benjamin
0 siblings, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:26 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:24PM -0600, Ben Cheatham wrote:
> Find the CXL debugfs mount point and add it to the CXL library context.
> This will be used by poison and procotol error library functions to
> access the information presented by the filesystem.
>
> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
> ---
> cxl/lib/libcxl.c | 26 ++++++++++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
> index cafde1c..71eff6d 100644
> --- a/cxl/lib/libcxl.c
> +++ b/cxl/lib/libcxl.c
> @@ -8,6 +8,7 @@
> #include <stdlib.h>
> #include <dirent.h>
> #include <unistd.h>
> +#include <mntent.h>
> #include <sys/mman.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> @@ -54,6 +55,7 @@ struct cxl_ctx {
> struct kmod_ctx *kmod_ctx;
> struct daxctl_ctx *daxctl_ctx;
> void *private_data;
> + const char *debugfs;
Do you want this const? Later we alloc and eventually free it.
> };
>
> static void free_pmem(struct cxl_pmem *pmem)
> @@ -240,6 +242,28 @@ CXL_EXPORT void *cxl_get_private_data(struct cxl_ctx *ctx)
> return ctx->private_data;
> }
>
> +static const char* get_debugfs_dir(void)
drop const above?
> +{
> + char *debugfs_dir = NULL;
> + struct mntent *ent;
> + FILE *mntf;
> +
> + mntf = setmntent("/proc/mounts", "r");
> + if (!mntf)
> + return NULL;
> +
> + while ((ent = getmntent(mntf)) != NULL) {
> + if (!strcmp(ent->mnt_type, "debugfs")) {
include <string.h>
> + debugfs_dir = calloc(strlen(ent->mnt_dir) + 1, 1);
> + strcpy(debugfs_dir, ent->mnt_dir);
perhaps -
debugfs_dir = strdup(ent->mnt_dir);
> + break;
> + }
> + }
> +
> + endmntent(mntf);
> + return debugfs_dir;
> +}
snip
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 1/7] libcxl: Add debugfs path to CXL context
2025-12-17 4:26 ` Alison Schofield
@ 2025-12-17 19:55 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:55 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:26 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:24PM -0600, Ben Cheatham wrote:
>> Find the CXL debugfs mount point and add it to the CXL library context.
>> This will be used by poison and procotol error library functions to
>> access the information presented by the filesystem.
>>
>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
>> ---
>> cxl/lib/libcxl.c | 26 ++++++++++++++++++++++++++
>> 1 file changed, 26 insertions(+)
>>
>> diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
>> index cafde1c..71eff6d 100644
>> --- a/cxl/lib/libcxl.c
>> +++ b/cxl/lib/libcxl.c
>> @@ -8,6 +8,7 @@
>> #include <stdlib.h>
>> #include <dirent.h>
>> #include <unistd.h>
>> +#include <mntent.h>
>> #include <sys/mman.h>
>> #include <sys/stat.h>
>> #include <sys/types.h>
>> @@ -54,6 +55,7 @@ struct cxl_ctx {
>> struct kmod_ctx *kmod_ctx;
>> struct daxctl_ctx *daxctl_ctx;
>> void *private_data;
>> + const char *debugfs;
>
> Do you want this const? Later we alloc and eventually free it.
I would expect it to only be initialized once, so my initial instinct was to mark
it const. The actual value it points to isn't const though, so it doesn't make too much
sense. I'll drop it.
>
>
>> };
>>
>> static void free_pmem(struct cxl_pmem *pmem)
>> @@ -240,6 +242,28 @@ CXL_EXPORT void *cxl_get_private_data(struct cxl_ctx *ctx)
>> return ctx->private_data;
>> }
>>
>> +static const char* get_debugfs_dir(void)
>
> drop const above?
Yeah, I'll get rid of it.
>
>
>
>> +{
>> + char *debugfs_dir = NULL;
>> + struct mntent *ent;
>> + FILE *mntf;
>> +
>> + mntf = setmntent("/proc/mounts", "r");
>> + if (!mntf)
>> + return NULL;
>> +
>> + while ((ent = getmntent(mntf)) != NULL) {
>> + if (!strcmp(ent->mnt_type, "debugfs")) {
>
> include <string.h>
Sure.
>
>
>> + debugfs_dir = calloc(strlen(ent->mnt_dir) + 1, 1);
>> + strcpy(debugfs_dir, ent->mnt_dir);
>
> perhaps -
> debugfs_dir = strdup(ent->mnt_dir);
>
I forgot about strdup() (I rarely do userspace C). I'll update it.
>
>
>
>> + break;
>> + }
>> + }
>> +
>> + endmntent(mntf);
>> + return debugfs_dir;
>> +}
>
> snip
>
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v5 2/7] libcxl: Add CXL protocol errors
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
2025-12-15 21:36 ` [PATCH v5 1/7] libcxl: Add debugfs path to CXL context Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-17 4:31 ` Alison Schofield
2025-12-19 4:38 ` Alison Schofield
2025-12-15 21:36 ` [PATCH v5 3/7] libcxl: Add poison injection support Ben Cheatham
` (6 subsequent siblings)
8 siblings, 2 replies; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
The v6.11 Linux kernel adds CXL protocl (CXL.cache & CXL.mem) error
injection for platforms that implement the error types as according to
the v6.5+ ACPI specification. The interface for injecting these errors
are provided by the kernel under the CXL debugfs. The relevant files in
the interface are the einj_types file, which provides the available CXL
error types for injection, and the einj_inject file, which injects the
error into a CXL VH root port or CXL RCH downstream port.
Add a library API to retrieve the CXL error types and inject them. This
API will be used in a later commit by the 'cxl-inject-error' and
'cxl-list' commands.
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
cxl/lib/libcxl.c | 193 +++++++++++++++++++++++++++++++++++++++++++++
cxl/lib/libcxl.sym | 5 ++
cxl/lib/private.h | 14 ++++
cxl/libcxl.h | 13 +++
4 files changed, 225 insertions(+)
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index 71eff6d..af34db0 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -47,11 +47,13 @@ struct cxl_ctx {
void *userdata;
int memdevs_init;
int buses_init;
+ int perrors_init;
unsigned long timeout;
struct udev *udev;
struct udev_queue *udev_queue;
struct list_head memdevs;
struct list_head buses;
+ struct list_head perrors;
struct kmod_ctx *kmod_ctx;
struct daxctl_ctx *daxctl_ctx;
void *private_data;
@@ -206,6 +208,14 @@ static void free_bus(struct cxl_bus *bus, struct list_head *head)
free(bus);
}
+static void free_protocol_error(struct cxl_protocol_error *perror,
+ struct list_head *head)
+{
+ if (head)
+ list_del_from(head, &perror->list);
+ free(perror);
+}
+
/**
* cxl_get_userdata - retrieve stored data pointer from library context
* @ctx: cxl library context
@@ -314,6 +324,7 @@ CXL_EXPORT int cxl_new(struct cxl_ctx **ctx)
*ctx = c;
list_head_init(&c->memdevs);
list_head_init(&c->buses);
+ list_head_init(&c->perrors);
c->kmod_ctx = kmod_ctx;
c->daxctl_ctx = daxctl_ctx;
c->udev = udev;
@@ -355,6 +366,7 @@ CXL_EXPORT struct cxl_ctx *cxl_ref(struct cxl_ctx *ctx)
*/
CXL_EXPORT void cxl_unref(struct cxl_ctx *ctx)
{
+ struct cxl_protocol_error *perror, *_p;
struct cxl_memdev *memdev, *_d;
struct cxl_bus *bus, *_b;
@@ -370,6 +382,9 @@ CXL_EXPORT void cxl_unref(struct cxl_ctx *ctx)
list_for_each_safe(&ctx->buses, bus, _b, port.list)
free_bus(bus, &ctx->buses);
+ list_for_each_safe(&ctx->perrors, perror, _p, list)
+ free_protocol_error(perror, &ctx->perrors);
+
udev_queue_unref(ctx->udev_queue);
udev_unref(ctx->udev);
kmod_unref(ctx->kmod_ctx);
@@ -3402,6 +3417,184 @@ CXL_EXPORT int cxl_port_decoders_committed(struct cxl_port *port)
return port->decoders_committed;
}
+const struct cxl_protocol_error cxl_protocol_errors[] = {
+ CXL_PROTOCOL_ERROR(12, "cache-correctable"),
+ CXL_PROTOCOL_ERROR(13, "cache-uncorrectable"),
+ CXL_PROTOCOL_ERROR(14, "cache-fatal"),
+ CXL_PROTOCOL_ERROR(15, "mem-correctable"),
+ CXL_PROTOCOL_ERROR(16, "mem-uncorrectable"),
+ CXL_PROTOCOL_ERROR(17, "mem-fatal")
+};
+
+static struct cxl_protocol_error *create_cxl_protocol_error(struct cxl_ctx *ctx,
+ unsigned int n)
+{
+ struct cxl_protocol_error *perror;
+
+ for (unsigned long i = 0; i < ARRAY_SIZE(cxl_protocol_errors); i++) {
+ if (n != BIT(cxl_protocol_errors[i].num))
+ continue;
+
+ perror = calloc(1, sizeof(*perror));
+ if (!perror)
+ return NULL;
+
+ *perror = cxl_protocol_errors[i];
+ perror->ctx = ctx;
+ return perror;
+ }
+
+ return NULL;
+}
+
+static void cxl_add_protocol_errors(struct cxl_ctx *ctx)
+{
+ struct cxl_protocol_error *perror;
+ char buf[SYSFS_ATTR_SIZE];
+ char *path, *num, *save;
+ size_t path_len, len;
+ unsigned long n;
+ int rc = 0;
+
+ if (!ctx->debugfs)
+ return;
+
+ path_len = strlen(ctx->debugfs) + 100;
+ path = calloc(1, path_len);
+ if (!path)
+ return;
+
+ len = snprintf(path, path_len, "%s/cxl/einj_types", ctx->debugfs);
+ if (len >= path_len) {
+ err(ctx, "Buffer too small\n");
+ goto err;
+ }
+
+ rc = access(path, F_OK);
+ if (rc) {
+ err(ctx, "failed to access %s: %s\n", path, strerror(errno));
+ goto err;
+ }
+
+ rc = sysfs_read_attr(ctx, path, buf);
+ if (rc) {
+ err(ctx, "failed to read %s: %s\n", path, strerror(-rc));
+ goto err;
+ }
+
+ /*
+ * The format of the output of the einj_types attr is:
+ * <Error number in hex 1> <Error name 1>
+ * <Error number in hex 2> <Error name 2>
+ * ...
+ *
+ * We only need the number, so parse that and skip the rest of
+ * the line.
+ */
+ num = strtok_r(buf, " \n", &save);
+ while (num) {
+ n = strtoul(num, NULL, 16);
+ perror = create_cxl_protocol_error(ctx, n);
+ if (perror)
+ list_add(&ctx->perrors, &perror->list);
+
+ num = strtok_r(NULL, "\n", &save);
+ if (!num)
+ break;
+
+ num = strtok_r(NULL, " \n", &save);
+ }
+
+err:
+ free(path);
+}
+
+static void cxl_protocol_errors_init(struct cxl_ctx *ctx)
+{
+ if (ctx->perrors_init)
+ return;
+
+ ctx->perrors_init = 1;
+ cxl_add_protocol_errors(ctx);
+}
+
+CXL_EXPORT struct cxl_protocol_error *
+cxl_protocol_error_get_first(struct cxl_ctx *ctx)
+{
+ cxl_protocol_errors_init(ctx);
+
+ return list_top(&ctx->perrors, struct cxl_protocol_error, list);
+}
+
+CXL_EXPORT struct cxl_protocol_error *
+cxl_protocol_error_get_next(struct cxl_protocol_error *perror)
+{
+ struct cxl_ctx *ctx = perror->ctx;
+
+ return list_next(&ctx->perrors, perror, list);
+}
+
+CXL_EXPORT unsigned int
+cxl_protocol_error_get_num(struct cxl_protocol_error *perror)
+{
+ return perror->num;
+}
+
+CXL_EXPORT const char *
+cxl_protocol_error_get_str(struct cxl_protocol_error *perror)
+{
+ return perror->string;
+}
+
+CXL_EXPORT int cxl_dport_protocol_error_inject(struct cxl_dport *dport,
+ unsigned int error)
+{
+ struct cxl_ctx *ctx = dport->port->ctx;
+ char buf[32] = { 0 };
+ size_t path_len, len;
+ char *path;
+ int rc;
+
+ if (!ctx->debugfs)
+ return -ENOENT;
+
+ path_len = strlen(ctx->debugfs) + 100;
+ path = calloc(path_len, sizeof(char));
+ if (!path)
+ return -ENOMEM;
+
+ len = snprintf(path, path_len, "%s/cxl/%s/einj_inject", ctx->debugfs,
+ cxl_dport_get_devname(dport));
+ if (len >= path_len) {
+ err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
+ free(path);
+ return -ENOMEM;
+ }
+
+ rc = access(path, F_OK);
+ if (rc) {
+ err(ctx, "failed to access %s: %s\n", path, strerror(errno));
+ free(path);
+ return -errno;
+ }
+
+ len = snprintf(buf, sizeof(buf), "0x%lx\n", BIT(error));
+ if (len >= sizeof(buf)) {
+ err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
+ free(path);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_write_attr(ctx, path, buf);
+ if (rc) {
+ err(ctx, "failed to write %s: %s\n", path, strerror(-rc));
+ free(path);
+ return -errno;
+ }
+
+ return 0;
+}
+
static void *add_cxl_bus(void *parent, int id, const char *cxlbus_base)
{
const char *devname = devpath_to_devname(cxlbus_base);
diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
index e01a676..02d5119 100644
--- a/cxl/lib/libcxl.sym
+++ b/cxl/lib/libcxl.sym
@@ -299,4 +299,9 @@ global:
LIBCXL_10 {
global:
cxl_memdev_is_port_ancestor;
+ cxl_protocol_error_get_first;
+ cxl_protocol_error_get_next;
+ cxl_protocol_error_get_num;
+ cxl_protocol_error_get_str;
+ cxl_dport_protocol_error_inject;
} LIBCXL_9;
diff --git a/cxl/lib/private.h b/cxl/lib/private.h
index 7d5a1bc..8860669 100644
--- a/cxl/lib/private.h
+++ b/cxl/lib/private.h
@@ -108,6 +108,20 @@ struct cxl_port {
struct list_head dports;
};
+struct cxl_protocol_error {
+ unsigned int num;
+ const char *string;
+ struct cxl_ctx *ctx;
+ struct list_node list;
+};
+
+#define CXL_PROTOCOL_ERROR(n, str) \
+ ((struct cxl_protocol_error){ \
+ .num = (n), \
+ .string = (str), \
+ .ctx = NULL, \
+ })
+
struct cxl_bus {
struct cxl_port port;
};
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
index 54bc025..adb5716 100644
--- a/cxl/libcxl.h
+++ b/cxl/libcxl.h
@@ -496,6 +496,19 @@ int cxl_cmd_alert_config_set_enable_alert_actions(struct cxl_cmd *cmd,
int enable);
struct cxl_cmd *cxl_cmd_new_set_alert_config(struct cxl_memdev *memdev);
+struct cxl_protocol_error;
+struct cxl_protocol_error *cxl_protocol_error_get_first(struct cxl_ctx *ctx);
+struct cxl_protocol_error *
+cxl_protocol_error_get_next(struct cxl_protocol_error *perror);
+unsigned int cxl_protocol_error_get_num(struct cxl_protocol_error *perror);
+const char *cxl_protocol_error_get_str(struct cxl_protocol_error *perror);
+int cxl_dport_protocol_error_inject(struct cxl_dport *dport,
+ unsigned int error);
+
+#define cxl_protocol_error_foreach(ctx, perror) \
+ for (perror = cxl_protocol_error_get_first(ctx); perror != NULL; \
+ perror = cxl_protocol_error_get_next(perror))
+
#ifdef __cplusplus
} /* extern "C" */
#endif
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v5 2/7] libcxl: Add CXL protocol errors
2025-12-15 21:36 ` [PATCH v5 2/7] libcxl: Add CXL protocol errors Ben Cheatham
@ 2025-12-17 4:31 ` Alison Schofield
2025-12-17 19:55 ` Cheatham, Benjamin
2025-12-19 4:38 ` Alison Schofield
1 sibling, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:31 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:25PM -0600, Ben Cheatham wrote:
snip
> diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
> index e01a676..02d5119 100644
> --- a/cxl/lib/libcxl.sym
> +++ b/cxl/lib/libcxl.sym
> @@ -299,4 +299,9 @@ global:
> LIBCXL_10 {
> global:
> cxl_memdev_is_port_ancestor;
> + cxl_protocol_error_get_first;
> + cxl_protocol_error_get_next;
> + cxl_protocol_error_get_num;
> + cxl_protocol_error_get_str;
> + cxl_dport_protocol_error_inject;
> } LIBCXL_9;
Please rebase on pending [1] so this merges nicely. These new symbols will
land in LIBCXL_11 with the new ELC symbol, all destined for the ndctl v84
release.
[1] https://github.com/pmem/ndctl/tree/pending
Thanks!
snip
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 2/7] libcxl: Add CXL protocol errors
2025-12-17 4:31 ` Alison Schofield
@ 2025-12-17 19:55 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:55 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:31 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:25PM -0600, Ben Cheatham wrote:
>
> snip
>
>> diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
>> index e01a676..02d5119 100644
>> --- a/cxl/lib/libcxl.sym
>> +++ b/cxl/lib/libcxl.sym
>> @@ -299,4 +299,9 @@ global:
>> LIBCXL_10 {
>> global:
>> cxl_memdev_is_port_ancestor;
>> + cxl_protocol_error_get_first;
>> + cxl_protocol_error_get_next;
>> + cxl_protocol_error_get_num;
>> + cxl_protocol_error_get_str;
>> + cxl_dport_protocol_error_inject;
>> } LIBCXL_9;
>
> Please rebase on pending [1] so this merges nicely. These new symbols will
> land in LIBCXL_11 with the new ELC symbol, all destined for the ndctl v84
> release.
Sorry about that. I'll rebase on pending for v6 (assuming v84 isn't out by then).
>
> [1] https://github.com/pmem/ndctl/tree/pending
>
> Thanks!
>
> snip
>
>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v5 2/7] libcxl: Add CXL protocol errors
2025-12-15 21:36 ` [PATCH v5 2/7] libcxl: Add CXL protocol errors Ben Cheatham
2025-12-17 4:31 ` Alison Schofield
@ 2025-12-19 4:38 ` Alison Schofield
2026-01-05 21:00 ` Cheatham, Benjamin
1 sibling, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-19 4:38 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:25PM -0600, Ben Cheatham wrote:
snip
>
> +const struct cxl_protocol_error cxl_protocol_errors[] = {
> + CXL_PROTOCOL_ERROR(12, "cache-correctable"),
> + CXL_PROTOCOL_ERROR(13, "cache-uncorrectable"),
> + CXL_PROTOCOL_ERROR(14, "cache-fatal"),
> + CXL_PROTOCOL_ERROR(15, "mem-correctable"),
> + CXL_PROTOCOL_ERROR(16, "mem-uncorrectable"),
> + CXL_PROTOCOL_ERROR(17, "mem-fatal")
> +};
Can the above 'num' fields be the same nums as sysfs emits?
ie. s/12/0x00001000
Then no BIT(X) needed in the look ups and reads as more obvious
mapping from sysfs, where it looks like this:
0x00001000 CXL.cache Protocol Correctable
0x00002000 CXL.cache Protocol Uncorrectable non-fatal
0x00004000 CXL.cache Protocol Uncorrectable fatal
0x00008000 CXL.mem Protocol Correctable
0x00010000 CXL.mem Protocol Uncorrectable non-fatal
0x00020000 CXL.mem Protocol Uncorrectable fatal
A spec reference for those would be useful too.
I notice that the cxl list emit of einj_types reverses the order that
is presented in sysfs. Would be nice to match.
snip
> +
> +CXL_EXPORT int cxl_dport_protocol_error_inject(struct cxl_dport *dport,
> + unsigned int error)
> +{
> + struct cxl_ctx *ctx = dport->port->ctx;
> + char buf[32] = { 0 };
> + size_t path_len, len;
> + char *path;
> + int rc;
> +
> + if (!ctx->debugfs)
> + return -ENOENT;
> +
> + path_len = strlen(ctx->debugfs) + 100;
> + path = calloc(path_len, sizeof(char));
> + if (!path)
> + return -ENOMEM;
> +
> + len = snprintf(path, path_len, "%s/cxl/%s/einj_inject", ctx->debugfs,
> + cxl_dport_get_devname(dport));
> + if (len >= path_len) {
> + err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
> + free(path);
> + return -ENOMEM;
> + }
> +
> + rc = access(path, F_OK);
> + if (rc) {
> + err(ctx, "failed to access %s: %s\n", path, strerror(errno));
> + free(path);
> + return -errno;
> + }
> +
> + len = snprintf(buf, sizeof(buf), "0x%lx\n", BIT(error));
> + if (len >= sizeof(buf)) {
> + err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
> + free(path);
> + return -ENOMEM;
> + }
> +
> + rc = sysfs_write_attr(ctx, path, buf);
> + if (rc) {
> + err(ctx, "failed to write %s: %s\n", path, strerror(-rc));
> + free(path);
> + return -errno;
> + }
Coverity scan reports missing free(path) before return.
> +
> + return 0;
> +}
> +
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 2/7] libcxl: Add CXL protocol errors
2025-12-19 4:38 ` Alison Schofield
@ 2026-01-05 21:00 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2026-01-05 21:00 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/18/2025 10:38 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:25PM -0600, Ben Cheatham wrote:
>
> snip
>
>>
>> +const struct cxl_protocol_error cxl_protocol_errors[] = {
>> + CXL_PROTOCOL_ERROR(12, "cache-correctable"),
>> + CXL_PROTOCOL_ERROR(13, "cache-uncorrectable"),
>> + CXL_PROTOCOL_ERROR(14, "cache-fatal"),
>> + CXL_PROTOCOL_ERROR(15, "mem-correctable"),
>> + CXL_PROTOCOL_ERROR(16, "mem-uncorrectable"),
>> + CXL_PROTOCOL_ERROR(17, "mem-fatal")
>> +};
>
> Can the above 'num' fields be the same nums as sysfs emits?
> ie. s/12/0x00001000
Sure! I'll change it.
>
> Then no BIT(X) needed in the look ups and reads as more obvious
> mapping from sysfs, where it looks like this:
>
> 0x00001000 CXL.cache Protocol Correctable
> 0x00002000 CXL.cache Protocol Uncorrectable non-fatal
> 0x00004000 CXL.cache Protocol Uncorrectable fatal
> 0x00008000 CXL.mem Protocol Correctable
> 0x00010000 CXL.mem Protocol Uncorrectable non-fatal
> 0x00020000 CXL.mem Protocol Uncorrectable fatal
>
> A spec reference for those would be useful too.
Will add.
>
> I notice that the cxl list emit of einj_types reverses the order that
> is presented in sysfs. Would be nice to match.
>
Yeah, I'll update it.
>
> snip
>> +
>> +CXL_EXPORT int cxl_dport_protocol_error_inject(struct cxl_dport *dport,
>> + unsigned int error)
>> +{
>> + struct cxl_ctx *ctx = dport->port->ctx;
>> + char buf[32] = { 0 };
>> + size_t path_len, len;
>> + char *path;
>> + int rc;
>> +
>> + if (!ctx->debugfs)
>> + return -ENOENT;
>> +
>> + path_len = strlen(ctx->debugfs) + 100;
>> + path = calloc(path_len, sizeof(char));
>> + if (!path)
>> + return -ENOMEM;
>> +
>> + len = snprintf(path, path_len, "%s/cxl/%s/einj_inject", ctx->debugfs,
>> + cxl_dport_get_devname(dport));
>> + if (len >= path_len) {
>> + err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
>> + free(path);
>> + return -ENOMEM;
>> + }
>> +
>> + rc = access(path, F_OK);
>> + if (rc) {
>> + err(ctx, "failed to access %s: %s\n", path, strerror(errno));
>> + free(path);
>> + return -errno;
>> + }
>> +
>> + len = snprintf(buf, sizeof(buf), "0x%lx\n", BIT(error));
>> + if (len >= sizeof(buf)) {
>> + err(ctx, "%s: buffer too small\n", cxl_dport_get_devname(dport));
>> + free(path);
>> + return -ENOMEM;
>> + }
>> +
>> + rc = sysfs_write_attr(ctx, path, buf);
>> + if (rc) {
>> + err(ctx, "failed to write %s: %s\n", path, strerror(-rc));
>> + free(path);
>> + return -errno;
>> + }
>
> Coverity scan reports missing free(path) before return.
Yep, forgot the success case :/.
Thanks,
Ben
>
>
>> +
>> + return 0;
>> +}
>> +
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v5 3/7] libcxl: Add poison injection support
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
2025-12-15 21:36 ` [PATCH v5 1/7] libcxl: Add debugfs path to CXL context Ben Cheatham
2025-12-15 21:36 ` [PATCH v5 2/7] libcxl: Add CXL protocol errors Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-15 21:36 ` [PATCH v5 4/7] cxl: Add inject-error command Ben Cheatham
` (5 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Add a library API for clearing and injecting poison into a CXL memory
device through the CXL debugfs.
This API will be used by the 'cxl-inject-error' and 'cxl-clear-error'
commands in later commits.
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
cxl/lib/libcxl.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++
cxl/lib/libcxl.sym | 3 ++
cxl/libcxl.h | 3 ++
3 files changed, 89 insertions(+)
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index af34db0..655eef0 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -5024,3 +5024,86 @@ CXL_EXPORT struct cxl_cmd *cxl_cmd_new_set_alert_config(struct cxl_memdev *memde
{
return cxl_cmd_new_generic(memdev, CXL_MEM_COMMAND_ID_SET_ALERT_CONFIG);
}
+
+CXL_EXPORT bool cxl_memdev_has_poison_injection(struct cxl_memdev *memdev)
+{
+ struct cxl_ctx *ctx = memdev->ctx;
+ size_t path_len, len;
+ bool exists = true;
+ char *path;
+ int rc;
+
+ if (!ctx->debugfs)
+ return false;
+
+ path_len = strlen(ctx->debugfs) + 100;
+ path = calloc(path_len, sizeof(char));
+ if (!path)
+ return false;
+
+ len = snprintf(path, path_len, "%s/cxl/%s/inject_poison", ctx->debugfs,
+ cxl_memdev_get_devname(memdev));
+ if (len >= path_len) {
+ err(ctx, "%s: buffer too small\n",
+ cxl_memdev_get_devname(memdev));
+ free(path);
+ return false;
+ }
+
+ rc = access(path, F_OK);
+ if (rc)
+ exists = false;
+
+ free(path);
+ return exists;
+}
+
+static int cxl_memdev_poison_action(struct cxl_memdev *memdev, size_t dpa,
+ bool clear)
+{
+ struct cxl_ctx *ctx = memdev->ctx;
+ size_t path_len, len;
+ char addr[32];
+ char *path;
+ int rc;
+
+ if (!ctx->debugfs)
+ return -ENOENT;
+
+ path_len = strlen(ctx->debugfs) + 100;
+ path = calloc(path_len, sizeof(char));
+ if (!path)
+ return -ENOMEM;
+
+ len = snprintf(path, path_len, "%s/cxl/%s/%s", ctx->debugfs,
+ cxl_memdev_get_devname(memdev),
+ clear ? "clear_poison" : "inject_poison");
+ if (len >= path_len) {
+ err(ctx, "%s: buffer too small\n",
+ cxl_memdev_get_devname(memdev));
+ free(path);
+ return -ENOMEM;
+ }
+
+ len = snprintf(addr, sizeof(addr), "0x%lx\n", dpa);
+ if (len >= sizeof(addr)) {
+ err(ctx, "%s: buffer too small\n",
+ cxl_memdev_get_devname(memdev));
+ free(path);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_write_attr(ctx, path, addr);
+ free(path);
+ return rc;
+}
+
+CXL_EXPORT int cxl_memdev_inject_poison(struct cxl_memdev *memdev, size_t addr)
+{
+ return cxl_memdev_poison_action(memdev, addr, false);
+}
+
+CXL_EXPORT int cxl_memdev_clear_poison(struct cxl_memdev *memdev, size_t addr)
+{
+ return cxl_memdev_poison_action(memdev, addr, true);
+}
diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
index 02d5119..3bce60d 100644
--- a/cxl/lib/libcxl.sym
+++ b/cxl/lib/libcxl.sym
@@ -304,4 +304,7 @@ global:
cxl_protocol_error_get_num;
cxl_protocol_error_get_str;
cxl_dport_protocol_error_inject;
+ cxl_memdev_has_poison_injection;
+ cxl_memdev_inject_poison;
+ cxl_memdev_clear_poison;
} LIBCXL_9;
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
index adb5716..56cba8f 100644
--- a/cxl/libcxl.h
+++ b/cxl/libcxl.h
@@ -105,6 +105,9 @@ int cxl_memdev_read_label(struct cxl_memdev *memdev, void *buf, size_t length,
size_t offset);
int cxl_memdev_write_label(struct cxl_memdev *memdev, void *buf, size_t length,
size_t offset);
+bool cxl_memdev_has_poison_injection(struct cxl_memdev *memdev);
+int cxl_memdev_inject_poison(struct cxl_memdev *memdev, size_t dpa);
+int cxl_memdev_clear_poison(struct cxl_memdev *memdev, size_t dpa);
struct cxl_cmd *cxl_cmd_new_get_fw_info(struct cxl_memdev *memdev);
unsigned int cxl_cmd_fw_info_get_num_slots(struct cxl_cmd *cmd);
unsigned int cxl_cmd_fw_info_get_active_slot(struct cxl_cmd *cmd);
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH v5 4/7] cxl: Add inject-error command
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (2 preceding siblings ...)
2025-12-15 21:36 ` [PATCH v5 3/7] libcxl: Add poison injection support Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-17 4:36 ` Alison Schofield
2025-12-15 21:36 ` [PATCH v5 5/7] cxl: Add clear-error command Ben Cheatham
` (4 subsequent siblings)
8 siblings, 1 reply; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Add the 'cxl-inject-error' command. This command will provide CXL
protocol error injection for CXL VH root ports and CXL RCH downstream
ports, as well as poison injection for CXL memory devices.
Add util_cxl_dport_filter() to find downstream ports by either dport id
or device name.
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
cxl/builtin.h | 1 +
cxl/cxl.c | 1 +
cxl/filter.c | 26 +++++++
cxl/filter.h | 2 +
cxl/inject-error.c | 189 +++++++++++++++++++++++++++++++++++++++++++++
cxl/meson.build | 1 +
6 files changed, 220 insertions(+)
create mode 100644 cxl/inject-error.c
diff --git a/cxl/builtin.h b/cxl/builtin.h
index c483f30..e82fcb5 100644
--- a/cxl/builtin.h
+++ b/cxl/builtin.h
@@ -25,6 +25,7 @@ int cmd_create_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_enable_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_disable_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_destroy_region(int argc, const char **argv, struct cxl_ctx *ctx);
+int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx);
#ifdef ENABLE_LIBTRACEFS
int cmd_monitor(int argc, const char **argv, struct cxl_ctx *ctx);
#else
diff --git a/cxl/cxl.c b/cxl/cxl.c
index 1643667..a98bd6b 100644
--- a/cxl/cxl.c
+++ b/cxl/cxl.c
@@ -80,6 +80,7 @@ static struct cmd_struct commands[] = {
{ "disable-region", .c_fn = cmd_disable_region },
{ "destroy-region", .c_fn = cmd_destroy_region },
{ "monitor", .c_fn = cmd_monitor },
+ { "inject-error", .c_fn = cmd_inject_error },
};
int main(int argc, const char **argv)
diff --git a/cxl/filter.c b/cxl/filter.c
index b135c04..8c7dc6e 100644
--- a/cxl/filter.c
+++ b/cxl/filter.c
@@ -171,6 +171,32 @@ util_cxl_endpoint_filter_by_port(struct cxl_endpoint *endpoint,
return NULL;
}
+struct cxl_dport *util_cxl_dport_filter(struct cxl_dport *dport,
+ const char *__ident)
+{
+
+ char *ident, *save;
+ const char *arg;
+
+ if (!__ident)
+ return dport;
+
+ ident = strdup(__ident);
+ if (!ident)
+ return NULL;
+
+ for (arg = strtok_r(ident, which_sep(__ident), &save); arg;
+ arg = strtok_r(NULL, which_sep(__ident), &save)) {
+ if (strcmp(arg, cxl_dport_get_devname(dport)) == 0)
+ break;
+ }
+
+ free(ident);
+ if (arg)
+ return dport;
+ return NULL;
+}
+
static struct cxl_decoder *
util_cxl_decoder_filter_by_port(struct cxl_decoder *decoder, const char *ident,
enum cxl_port_filter_mode mode)
diff --git a/cxl/filter.h b/cxl/filter.h
index 956a46e..70463c4 100644
--- a/cxl/filter.h
+++ b/cxl/filter.h
@@ -55,6 +55,8 @@ enum cxl_port_filter_mode {
struct cxl_port *util_cxl_port_filter(struct cxl_port *port, const char *ident,
enum cxl_port_filter_mode mode);
+struct cxl_dport *util_cxl_dport_filter(struct cxl_dport *dport,
+ const char *__ident);
struct cxl_bus *util_cxl_bus_filter(struct cxl_bus *bus, const char *__ident);
struct cxl_endpoint *util_cxl_endpoint_filter(struct cxl_endpoint *endpoint,
const char *__ident);
diff --git a/cxl/inject-error.c b/cxl/inject-error.c
new file mode 100644
index 0000000..c0a9eeb
--- /dev/null
+++ b/cxl/inject-error.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 AMD. All rights reserved. */
+#include <util/parse-options.h>
+#include <cxl/libcxl.h>
+#include <cxl/filter.h>
+#include <util/log.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <limits.h>
+
+#define EINJ_TYPES_BUF_SIZE 512
+
+static bool debug;
+
+static struct inject_params {
+ const char *type;
+ const char *address;
+} inj_param;
+
+static const struct option inject_options[] = {
+ OPT_STRING('t', "type", &inj_param.type, "Error type",
+ "Error type to inject into <device>"),
+ OPT_STRING('a', "address", &inj_param.address, "Address for poison injection",
+ "Device physical address for poison injection in hex or decimal"),
+#ifdef ENABLE_DEBUG
+ OPT_BOOLEAN(0, "debug", &debug, "turn on debug output"),
+#endif
+ OPT_END(),
+};
+
+static struct log_ctx iel;
+
+static struct cxl_protocol_error *find_cxl_proto_err(struct cxl_ctx *ctx,
+ const char *type)
+{
+ struct cxl_protocol_error *perror;
+
+ cxl_protocol_error_foreach(ctx, perror) {
+ if (strcmp(type, cxl_protocol_error_get_str(perror)) == 0)
+ return perror;
+ }
+
+ log_err(&iel, "Invalid CXL protocol error type: %s\n", type);
+ return NULL;
+}
+
+static struct cxl_dport *find_cxl_dport(struct cxl_ctx *ctx, const char *devname)
+{
+ struct cxl_dport *dport;
+ struct cxl_port *port;
+ struct cxl_bus *bus;
+
+ cxl_bus_foreach(ctx, bus)
+ cxl_port_foreach_all(cxl_bus_get_port(bus), port)
+ cxl_dport_foreach(port, dport)
+ if (util_cxl_dport_filter(dport, devname))
+ return dport;
+
+ log_err(&iel, "Downstream port \"%s\" not found\n", devname);
+ return NULL;
+}
+
+static struct cxl_memdev *find_cxl_memdev(struct cxl_ctx *ctx,
+ const char *filter)
+{
+ struct cxl_memdev *memdev;
+
+ cxl_memdev_foreach(ctx, memdev) {
+ if (util_cxl_memdev_filter(memdev, filter, NULL))
+ return memdev;
+ }
+
+ log_err(&iel, "Memdev \"%s\" not found\n", filter);
+ return NULL;
+}
+
+static int inject_proto_err(struct cxl_ctx *ctx, const char *devname,
+ struct cxl_protocol_error *perror)
+{
+ struct cxl_dport *dport;
+ int rc;
+
+ if (!devname) {
+ log_err(&iel, "No downstream port specified for injection\n");
+ return -EINVAL;
+ }
+
+ dport = find_cxl_dport(ctx, devname);
+ if (!dport)
+ return -ENODEV;
+
+ rc = cxl_dport_protocol_error_inject(dport,
+ cxl_protocol_error_get_num(perror));
+ if (rc)
+ return rc;
+
+ log_info(&iel, "injected %s protocol error.\n",
+ cxl_protocol_error_get_str(perror));
+ return 0;
+}
+
+static int poison_action(struct cxl_ctx *ctx, const char *filter,
+ const char *addr_str)
+{
+ struct cxl_memdev *memdev;
+ size_t addr;
+ int rc;
+
+ memdev = find_cxl_memdev(ctx, filter);
+ if (!memdev)
+ return -ENODEV;
+
+ if (!cxl_memdev_has_poison_injection(memdev)) {
+ log_err(&iel, "%s does not support error injection\n",
+ cxl_memdev_get_devname(memdev));
+ return -EINVAL;
+ }
+
+ if (!addr_str) {
+ log_err(&iel, "no address provided\n");
+ return -EINVAL;
+ }
+
+ addr = strtoull(addr_str, NULL, 0);
+ if (addr == ULLONG_MAX && errno == ERANGE) {
+ log_err(&iel, "invalid address %s", addr_str);
+ return -EINVAL;
+ }
+
+ rc = cxl_memdev_inject_poison(memdev, addr);
+ if (rc)
+ log_err(&iel, "failed to inject poison at %s:%s: %s\n",
+ cxl_memdev_get_devname(memdev), addr_str, strerror(-rc));
+ else
+ log_info(&iel, "poison injected at %s:%s\n",
+ cxl_memdev_get_devname(memdev), addr_str);
+
+ return rc;
+}
+
+static int inject_action(int argc, const char **argv, struct cxl_ctx *ctx,
+ const struct option *options, const char *usage)
+{
+ struct cxl_protocol_error *perr;
+ const char * const u[] = {
+ usage,
+ NULL
+ };
+ int rc = -EINVAL;
+
+ log_init(&iel, "cxl inject-error", "CXL_INJECT_LOG");
+ argc = parse_options(argc, argv, options, u, 0);
+
+ if (debug) {
+ cxl_set_log_priority(ctx, LOG_DEBUG);
+ iel.log_priority = LOG_DEBUG;
+ } else {
+ iel.log_priority = LOG_INFO;
+ }
+
+ if (argc != 1) {
+ usage_with_options(u, options);
+ return rc;
+ }
+
+ if (strcmp(inj_param.type, "poison") == 0) {
+ rc = poison_action(ctx, argv[0], inj_param.address);
+ return rc;
+ }
+
+ perr = find_cxl_proto_err(ctx, inj_param.type);
+ if (perr) {
+ rc = inject_proto_err(ctx, argv[0], perr);
+ if (rc)
+ log_err(&iel, "Failed to inject error: %d\n", rc);
+ }
+
+ return rc;
+}
+
+int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx)
+{
+ int rc = inject_action(argc, argv, ctx, inject_options,
+ "inject-error <device> [<options>]");
+
+ return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/cxl/meson.build b/cxl/meson.build
index b9924ae..92031b5 100644
--- a/cxl/meson.build
+++ b/cxl/meson.build
@@ -7,6 +7,7 @@ cxl_src = [
'memdev.c',
'json.c',
'filter.c',
+ 'inject-error.c',
'../daxctl/json.c',
'../daxctl/filter.c',
]
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v5 4/7] cxl: Add inject-error command
2025-12-15 21:36 ` [PATCH v5 4/7] cxl: Add inject-error command Ben Cheatham
@ 2025-12-17 4:36 ` Alison Schofield
2025-12-17 19:56 ` Cheatham, Benjamin
0 siblings, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:36 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:27PM -0600, Ben Cheatham wrote:
> Add the 'cxl-inject-error' command. This command will provide CXL
> protocol error injection for CXL VH root ports and CXL RCH downstream
> ports, as well as poison injection for CXL memory devices.
>
> Add util_cxl_dport_filter() to find downstream ports by either dport id
> or device name.
Does above comment match code? Does util_cxl_dport_filter() match by
'id' or 'name' ?
snip
> +#define EINJ_TYPES_BUF_SIZE 512
above appears unused
snip
> +static int poison_action(struct cxl_ctx *ctx, const char *filter,
> + const char *addr_str)
> +{
> + struct cxl_memdev *memdev;
> + size_t addr;
> + int rc;
> +
> + memdev = find_cxl_memdev(ctx, filter);
> + if (!memdev)
> + return -ENODEV;
> +
> + if (!cxl_memdev_has_poison_injection(memdev)) {
> + log_err(&iel, "%s does not support error injection\n",
> + cxl_memdev_get_devname(memdev));
> + return -EINVAL;
> + }
> +
> + if (!addr_str) {
> + log_err(&iel, "no address provided\n");
> + return -EINVAL;
> + }
> +
> + addr = strtoull(addr_str, NULL, 0);
> + if (addr == ULLONG_MAX && errno == ERANGE) {
> + log_err(&iel, "invalid address %s", addr_str);
> + return -EINVAL;
> + }
errno best set to 0 before strtoull
there is a type mismatch btw addr of size_t and strtoull
snip
> +static int inject_action(int argc, const char **argv, struct cxl_ctx *ctx,
> + const struct option *options, const char *usage)
> +{
> + struct cxl_protocol_error *perr;
> + const char * const u[] = {
> + usage,
> + NULL
> + };
> + int rc = -EINVAL;
> +
> + log_init(&iel, "cxl inject-error", "CXL_INJECT_LOG");
> + argc = parse_options(argc, argv, options, u, 0);
> +
> + if (debug) {
> + cxl_set_log_priority(ctx, LOG_DEBUG);
> + iel.log_priority = LOG_DEBUG;
> + } else {
> + iel.log_priority = LOG_INFO;
> + }
> +
> + if (argc != 1) {
> + usage_with_options(u, options);
> + return rc;
> + }
The above catches bad syntax like this where I omit type:
# cxl inject-error mem10 -t -a 0x0
We also need to catch this where I omit the option altogether:
# cxl inject-error mem10 -a 0x0
Segmentation fault (core dumped)
> +
> + if (strcmp(inj_param.type, "poison") == 0) {
> + rc = poison_action(ctx, argv[0], inj_param.address);
> + return rc;
> + }
snip
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 4/7] cxl: Add inject-error command
2025-12-17 4:36 ` Alison Schofield
@ 2025-12-17 19:56 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:56 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:36 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:27PM -0600, Ben Cheatham wrote:
>> Add the 'cxl-inject-error' command. This command will provide CXL
>> protocol error injection for CXL VH root ports and CXL RCH downstream
>> ports, as well as poison injection for CXL memory devices.
>>
>> Add util_cxl_dport_filter() to find downstream ports by either dport id
>> or device name.
>
> Does above comment match code? Does util_cxl_dport_filter() match by
> 'id' or 'name' ?
It should just be 'name'; I forgot to update the commit message from v4.
>
>
> snip
>
>> +#define EINJ_TYPES_BUF_SIZE 512
>
> above appears unused
It is, I replaced it with SYSFS_ATTR_SIZE IIRC. I'll remove it.
>
>
> snip
>
>> +static int poison_action(struct cxl_ctx *ctx, const char *filter,
>> + const char *addr_str)
>> +{
>> + struct cxl_memdev *memdev;
>> + size_t addr;
>> + int rc;
>> +
>> + memdev = find_cxl_memdev(ctx, filter);
>> + if (!memdev)
>> + return -ENODEV;
>> +
>> + if (!cxl_memdev_has_poison_injection(memdev)) {
>> + log_err(&iel, "%s does not support error injection\n",
>> + cxl_memdev_get_devname(memdev));
>> + return -EINVAL;
>> + }
>> +
>> + if (!addr_str) {
>> + log_err(&iel, "no address provided\n");
>> + return -EINVAL;
>> + }
>> +
>> + addr = strtoull(addr_str, NULL, 0);
>> + if (addr == ULLONG_MAX && errno == ERANGE) {
>> + log_err(&iel, "invalid address %s", addr_str);
>> + return -EINVAL;
>> + }
>
> errno best set to 0 before strtoull
> there is a type mismatch btw addr of size_t and strtoull
Got it, I'll fix those.
>
> snip
>
>> +static int inject_action(int argc, const char **argv, struct cxl_ctx *ctx,
>> + const struct option *options, const char *usage)
>> +{
>> + struct cxl_protocol_error *perr;
>> + const char * const u[] = {
>> + usage,
>> + NULL
>> + };
>> + int rc = -EINVAL;
>> +
>> + log_init(&iel, "cxl inject-error", "CXL_INJECT_LOG");
>> + argc = parse_options(argc, argv, options, u, 0);
>> +
>> + if (debug) {
>> + cxl_set_log_priority(ctx, LOG_DEBUG);
>> + iel.log_priority = LOG_DEBUG;
>> + } else {
>> + iel.log_priority = LOG_INFO;
>> + }
>> +
>> + if (argc != 1) {
>> + usage_with_options(u, options);
>> + return rc;
>> + }
>
> The above catches bad syntax like this where I omit type:
> # cxl inject-error mem10 -t -a 0x0
>
> We also need to catch this where I omit the option altogether:
> # cxl inject-error mem10 -a 0x0
> Segmentation fault (core dumped)
Good point. I'll change it to catch omitted required options as well.
>
>> +
>> + if (strcmp(inj_param.type, "poison") == 0) {
>> + rc = poison_action(ctx, argv[0], inj_param.address);
>> + return rc;
>> + }
>
> snip
>
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v5 5/7] cxl: Add clear-error command
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (3 preceding siblings ...)
2025-12-15 21:36 ` [PATCH v5 4/7] cxl: Add inject-error command Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-17 4:39 ` Alison Schofield
2025-12-15 21:36 ` [PATCH v5 6/7] cxl/list: Add injectable errors in output Ben Cheatham
` (3 subsequent siblings)
8 siblings, 1 reply; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Add the 'cxl-clear-error' command. This command allows the user to clear
device poison from CXL memory devices.
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
cxl/builtin.h | 1 +
cxl/cxl.c | 1 +
cxl/inject-error.c | 70 ++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 67 insertions(+), 5 deletions(-)
diff --git a/cxl/builtin.h b/cxl/builtin.h
index e82fcb5..68ed1de 100644
--- a/cxl/builtin.h
+++ b/cxl/builtin.h
@@ -26,6 +26,7 @@ int cmd_enable_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_disable_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_destroy_region(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx);
+int cmd_clear_error(int argc, const char **argv, struct cxl_ctx *ctx);
#ifdef ENABLE_LIBTRACEFS
int cmd_monitor(int argc, const char **argv, struct cxl_ctx *ctx);
#else
diff --git a/cxl/cxl.c b/cxl/cxl.c
index a98bd6b..e1740b5 100644
--- a/cxl/cxl.c
+++ b/cxl/cxl.c
@@ -81,6 +81,7 @@ static struct cmd_struct commands[] = {
{ "destroy-region", .c_fn = cmd_destroy_region },
{ "monitor", .c_fn = cmd_monitor },
{ "inject-error", .c_fn = cmd_inject_error },
+ { "clear-error", .c_fn = cmd_clear_error },
};
int main(int argc, const char **argv)
diff --git a/cxl/inject-error.c b/cxl/inject-error.c
index c0a9eeb..4ba3de0 100644
--- a/cxl/inject-error.c
+++ b/cxl/inject-error.c
@@ -19,6 +19,10 @@ static struct inject_params {
const char *address;
} inj_param;
+static struct clear_params {
+ const char *address;
+} clear_param;
+
static const struct option inject_options[] = {
OPT_STRING('t', "type", &inj_param.type, "Error type",
"Error type to inject into <device>"),
@@ -30,6 +34,15 @@ static const struct option inject_options[] = {
OPT_END(),
};
+static const struct option clear_options[] = {
+ OPT_STRING('a', "address", &clear_param.address, "Address for poison clearing",
+ "Device physical address to clear poison from in hex or decimal"),
+#ifdef ENABLE_DEBUG
+ OPT_BOOLEAN(0, "debug", &debug, "turn on debug output"),
+#endif
+ OPT_END(),
+};
+
static struct log_ctx iel;
static struct cxl_protocol_error *find_cxl_proto_err(struct cxl_ctx *ctx,
@@ -102,7 +115,7 @@ static int inject_proto_err(struct cxl_ctx *ctx, const char *devname,
}
static int poison_action(struct cxl_ctx *ctx, const char *filter,
- const char *addr_str)
+ const char *addr_str, bool clear)
{
struct cxl_memdev *memdev;
size_t addr;
@@ -129,12 +142,18 @@ static int poison_action(struct cxl_ctx *ctx, const char *filter,
return -EINVAL;
}
- rc = cxl_memdev_inject_poison(memdev, addr);
+ if (clear)
+ rc = cxl_memdev_clear_poison(memdev, addr);
+ else
+ rc = cxl_memdev_inject_poison(memdev, addr);
+
if (rc)
- log_err(&iel, "failed to inject poison at %s:%s: %s\n",
+ log_err(&iel, "failed to %s %s:%s: %s\n",
+ clear ? "clear poison at" : "inject point at",
cxl_memdev_get_devname(memdev), addr_str, strerror(-rc));
else
- log_info(&iel, "poison injected at %s:%s\n",
+ log_info(&iel,
+ "poison %s at %s:%s\n", clear ? "cleared" : "injected",
cxl_memdev_get_devname(memdev), addr_str);
return rc;
@@ -166,7 +185,7 @@ static int inject_action(int argc, const char **argv, struct cxl_ctx *ctx,
}
if (strcmp(inj_param.type, "poison") == 0) {
- rc = poison_action(ctx, argv[0], inj_param.address);
+ rc = poison_action(ctx, argv[0], inj_param.address, false);
return rc;
}
@@ -187,3 +206,44 @@ int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx)
return rc ? EXIT_FAILURE : EXIT_SUCCESS;
}
+
+static int clear_action(int argc, const char **argv, struct cxl_ctx *ctx,
+ const struct option *options, const char *usage)
+{
+ const char * const u[] = {
+ usage,
+ NULL
+ };
+ int rc = -EINVAL;
+
+ log_init(&iel, "cxl clear-error", "CXL_CLEAR_LOG");
+ argc = parse_options(argc, argv, options, u, 0);
+
+ if (debug) {
+ cxl_set_log_priority(ctx, LOG_DEBUG);
+ iel.log_priority = LOG_DEBUG;
+ } else {
+ iel.log_priority = LOG_INFO;
+ }
+
+ if (argc != 1) {
+ usage_with_options(u, options);
+ return rc;
+ }
+
+ rc = poison_action(ctx, argv[0], clear_param.address, true);
+ if (rc) {
+ log_err(&iel, "Failed to inject poison into %s: %s\n",
+ argv[0], strerror(-rc));
+ return rc;
+ }
+
+ return rc;
+}
+
+int cmd_clear_error(int argc, const char **argv, struct cxl_ctx *ctx)
+{
+ int rc = clear_action(argc, argv, ctx, clear_options,
+ "clear-error <device> [<options>]");
+ return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v5 5/7] cxl: Add clear-error command
2025-12-15 21:36 ` [PATCH v5 5/7] cxl: Add clear-error command Ben Cheatham
@ 2025-12-17 4:39 ` Alison Schofield
2025-12-17 19:56 ` Cheatham, Benjamin
0 siblings, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:39 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:28PM -0600, Ben Cheatham wrote:
snip
> static int poison_action(struct cxl_ctx *ctx, const char *filter,
> - const char *addr_str)
> + const char *addr_str, bool clear)
> {
> struct cxl_memdev *memdev;
> size_t addr;
> @@ -129,12 +142,18 @@ static int poison_action(struct cxl_ctx *ctx, const char *filter,
> return -EINVAL;
> }
>
> - rc = cxl_memdev_inject_poison(memdev, addr);
> + if (clear)
> + rc = cxl_memdev_clear_poison(memdev, addr);
> + else
> + rc = cxl_memdev_inject_poison(memdev, addr);
> +
> if (rc)
> - log_err(&iel, "failed to inject poison at %s:%s: %s\n",
> + log_err(&iel, "failed to %s %s:%s: %s\n",
> + clear ? "clear poison at" : "inject point at",
s/point/poison
snip
> +static int clear_action(int argc, const char **argv, struct cxl_ctx *ctx,
> + const struct option *options, const char *usage)
> +{
> + const char * const u[] = {
> + usage,
> + NULL
> + };
> + int rc = -EINVAL;
> +
> + log_init(&iel, "cxl clear-error", "CXL_CLEAR_LOG");
> + argc = parse_options(argc, argv, options, u, 0);
> +
> + if (debug) {
> + cxl_set_log_priority(ctx, LOG_DEBUG);
> + iel.log_priority = LOG_DEBUG;
> + } else {
> + iel.log_priority = LOG_INFO;
> + }
> +
> + if (argc != 1) {
> + usage_with_options(u, options);
> + return rc;
> + }
> +
> + rc = poison_action(ctx, argv[0], clear_param.address, true);
> + if (rc) {
> + log_err(&iel, "Failed to inject poison into %s: %s\n",
s/inject/clear
> + argv[0], strerror(-rc));
> + return rc;
> + }
> +
> + return rc;
> +}
snip
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 5/7] cxl: Add clear-error command
2025-12-17 4:39 ` Alison Schofield
@ 2025-12-17 19:56 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:56 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:39 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:28PM -0600, Ben Cheatham wrote:
>
> snip
>
>> static int poison_action(struct cxl_ctx *ctx, const char *filter,
>> - const char *addr_str)
>> + const char *addr_str, bool clear)
>> {
>> struct cxl_memdev *memdev;
>> size_t addr;
>> @@ -129,12 +142,18 @@ static int poison_action(struct cxl_ctx *ctx, const char *filter,
>> return -EINVAL;
>> }
>>
>> - rc = cxl_memdev_inject_poison(memdev, addr);
>> + if (clear)
>> + rc = cxl_memdev_clear_poison(memdev, addr);
>> + else
>> + rc = cxl_memdev_inject_poison(memdev, addr);
>> +
>> if (rc)
>> - log_err(&iel, "failed to inject poison at %s:%s: %s\n",
>> + log_err(&iel, "failed to %s %s:%s: %s\n",
>> + clear ? "clear poison at" : "inject point at",
>
> s/point/poison
Good catch, don't know how I did that :/.
>
> snip
>
>> +static int clear_action(int argc, const char **argv, struct cxl_ctx *ctx,
>> + const struct option *options, const char *usage)
>> +{
>> + const char * const u[] = {
>> + usage,
>> + NULL
>> + };
>> + int rc = -EINVAL;
>> +
>> + log_init(&iel, "cxl clear-error", "CXL_CLEAR_LOG");
>> + argc = parse_options(argc, argv, options, u, 0);
>> +
>> + if (debug) {
>> + cxl_set_log_priority(ctx, LOG_DEBUG);
>> + iel.log_priority = LOG_DEBUG;
>> + } else {
>> + iel.log_priority = LOG_INFO;
>> + }
>> +
>> + if (argc != 1) {
>> + usage_with_options(u, options);
>> + return rc;
>> + }
>> +
>> + rc = poison_action(ctx, argv[0], clear_param.address, true);
>> + if (rc) {
>> + log_err(&iel, "Failed to inject poison into %s: %s\n",
>
> s/inject/clear
Will fix.
>
>
>> + argv[0], strerror(-rc));
>> + return rc;
>> + }
>> +
>> + return rc;
>> +}
>
> snip
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v5 6/7] cxl/list: Add injectable errors in output
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (4 preceding siblings ...)
2025-12-15 21:36 ` [PATCH v5 5/7] cxl: Add clear-error command Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-15 21:36 ` [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands Ben Cheatham
` (2 subsequent siblings)
8 siblings, 0 replies; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Add the "--injectable-errors"/"-N" option to show injectable error
information for CXL devices. The applicable devices are CXL memory
devices and CXL busses.
For CXL memory devices the option reports whether the device supports
poison injection (the "--media-errors"/"-L" option shows injected
poison).
For CXL busses the option shows injectable CXL protocol error types. The
information will be the same across busses because the error types are
system-wide. The information is presented under the bus for easier
filtering.
Update the man page for 'cxl-list' to show the usage of the new option.
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
Documentation/cxl/cxl-list.txt | 35 +++++++++++++++++++++++++++++++++-
cxl/filter.h | 3 +++
cxl/json.c | 30 +++++++++++++++++++++++++++++
cxl/list.c | 3 +++
util/json.h | 1 +
5 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
index 0595638..35ff542 100644
--- a/Documentation/cxl/cxl-list.txt
+++ b/Documentation/cxl/cxl-list.txt
@@ -471,6 +471,38 @@ The media-errors option is only available with '-Dlibtracefs=enabled'.
}
----
+-N::
+--injectable-errors::
+ Include injectable error information in the output. For CXL memory devices
+ this includes whether poison is injectable through the kernel debug filesystem.
+ The types of CXL protocol errors available for injection into downstream ports
+ are listed as part of a CXL bus object.
+
+----
+# cxl list -NB
+[
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "injectable_protocol_errors":[
+ "mem-correctable",
+ "mem-fatal",
+ ]
+ }
+]
+
+# cxl list -N
+[
+ {
+ "memdev":"mem0",
+ "pmem_size":268435456,
+ "ram_size":268435456,
+ "serial":2,
+ "poison_injectable":true
+ }
+]
+
+----
-v::
--verbose::
Increase verbosity of the output. This can be specified
@@ -487,7 +519,8 @@ The media-errors option is only available with '-Dlibtracefs=enabled'.
devices with --idle.
- *-vvv*
Everything *-vv* provides, plus enable
- --health, --partition, and --media-errors.
+ --health, --partition, --media-errors, and
+ --injectable-errors.
--debug::
If the cxl tool was built with debug enabled, turn on debug
diff --git a/cxl/filter.h b/cxl/filter.h
index 70463c4..6c5fe68 100644
--- a/cxl/filter.h
+++ b/cxl/filter.h
@@ -31,6 +31,7 @@ struct cxl_filter_params {
bool alert_config;
bool dax;
bool media_errors;
+ bool inj_errors;
int verbose;
struct log_ctx ctx;
};
@@ -93,6 +94,8 @@ static inline unsigned long cxl_filter_to_flags(struct cxl_filter_params *param)
flags |= UTIL_JSON_DAX | UTIL_JSON_DAX_DEVS;
if (param->media_errors)
flags |= UTIL_JSON_MEDIA_ERRORS;
+ if (param->inj_errors)
+ flags |= UTIL_JSON_INJ_ERRORS;
return flags;
}
diff --git a/cxl/json.c b/cxl/json.c
index bde4589..2917477 100644
--- a/cxl/json.c
+++ b/cxl/json.c
@@ -675,6 +675,12 @@ struct json_object *util_cxl_memdev_to_json(struct cxl_memdev *memdev,
json_object_object_add(jdev, "firmware", jobj);
}
+ if (flags & UTIL_JSON_INJ_ERRORS) {
+ jobj = json_object_new_boolean(cxl_memdev_has_poison_injection(memdev));
+ if (jobj)
+ json_object_object_add(jdev, "poison_injectable", jobj);
+ }
+
if (flags & UTIL_JSON_MEDIA_ERRORS) {
jobj = util_cxl_poison_list_to_json(NULL, memdev, flags);
if (jobj)
@@ -750,6 +756,8 @@ struct json_object *util_cxl_bus_to_json(struct cxl_bus *bus,
unsigned long flags)
{
const char *devname = cxl_bus_get_devname(bus);
+ struct cxl_ctx *ctx = cxl_bus_get_ctx(bus);
+ struct cxl_protocol_error *perror;
struct json_object *jbus, *jobj;
jbus = json_object_new_object();
@@ -765,6 +773,28 @@ struct json_object *util_cxl_bus_to_json(struct cxl_bus *bus,
json_object_object_add(jbus, "provider", jobj);
json_object_set_userdata(jbus, bus, NULL);
+
+ if (flags & UTIL_JSON_INJ_ERRORS) {
+ jobj = json_object_new_array();
+ if (!jobj)
+ return jbus;
+
+ cxl_protocol_error_foreach(ctx, perror)
+ {
+ struct json_object *jerr_str;
+ const char *perror_str;
+
+ perror_str = cxl_protocol_error_get_str(perror);
+
+ jerr_str = json_object_new_string(perror_str);
+ if (jerr_str)
+ json_object_array_add(jobj, jerr_str);
+ }
+
+ json_object_object_add(jbus, "injectable_protocol_errors",
+ jobj);
+ }
+
return jbus;
}
diff --git a/cxl/list.c b/cxl/list.c
index 0b25d78..a505ed6 100644
--- a/cxl/list.c
+++ b/cxl/list.c
@@ -59,6 +59,8 @@ static const struct option options[] = {
"include alert configuration information"),
OPT_BOOLEAN('L', "media-errors", ¶m.media_errors,
"include media-error information "),
+ OPT_BOOLEAN('N', "injectable-errors", ¶m.inj_errors,
+ "include injectable error information"),
OPT_INCR('v', "verbose", ¶m.verbose, "increase output detail"),
#ifdef ENABLE_DEBUG
OPT_BOOLEAN(0, "debug", &debug, "debug list walk"),
@@ -124,6 +126,7 @@ int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
param.alert_config = true;
param.dax = true;
param.media_errors = true;
+ param.inj_errors = true;
/* fallthrough */
case 2:
param.idle = true;
diff --git a/util/json.h b/util/json.h
index 560f845..57278cb 100644
--- a/util/json.h
+++ b/util/json.h
@@ -21,6 +21,7 @@ enum util_json_flags {
UTIL_JSON_TARGETS = (1 << 11),
UTIL_JSON_PARTITION = (1 << 12),
UTIL_JSON_ALERT_CONFIG = (1 << 13),
+ UTIL_JSON_INJ_ERRORS = (1 << 14),
};
void util_display_json_array(FILE *f_out, struct json_object *jarray,
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (5 preceding siblings ...)
2025-12-15 21:36 ` [PATCH v5 6/7] cxl/list: Add injectable errors in output Ben Cheatham
@ 2025-12-15 21:36 ` Ben Cheatham
2025-12-17 4:42 ` Alison Schofield
2025-12-19 4:52 ` Alison Schofield
2025-12-17 4:47 ` [ndctl PATCH v5 0/7] Add error injection support Alison Schofield
2025-12-17 5:00 ` [ndctl PATCH] cxl/test: use inject and clear cmds in cxl-poison.sh Alison Schofield
8 siblings, 2 replies; 28+ messages in thread
From: Ben Cheatham @ 2025-12-15 21:36 UTC (permalink / raw)
To: nvdimm; +Cc: linux-cxl, alison.schofield, dave.jiang, benjamin.cheatham
Add man pages for the 'cxl-inject-error' and 'cxl-clear-error' commands.
These man pages show usage and examples for each of their use cases.
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
Documentation/cxl/cxl-clear-error.txt | 67 +++++++++++++
Documentation/cxl/cxl-inject-error.txt | 129 +++++++++++++++++++++++++
Documentation/cxl/meson.build | 2 +
3 files changed, 198 insertions(+)
create mode 100644 Documentation/cxl/cxl-clear-error.txt
create mode 100644 Documentation/cxl/cxl-inject-error.txt
diff --git a/Documentation/cxl/cxl-clear-error.txt b/Documentation/cxl/cxl-clear-error.txt
new file mode 100644
index 0000000..ccb0e63
--- /dev/null
+++ b/Documentation/cxl/cxl-clear-error.txt
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+cxl-clear-error(1)
+==================
+
+NAME
+----
+cxl-clear-error - Clear CXL errors from CXL devices
+
+SYNOPSIS
+--------
+[verse]
+'cxl clear-error' <device name> [<options>]
+
+Clear an error from a CXL device. The types of devices supported are:
+
+"memdevs":: A CXL memory device. Memory devices are specified by device
+name ("mem0"), device id ("0") and/or host device name ("0000:35:00.0").
+
+Only device poison (viewable using the '-L'/'--media-errors' option of
+'cxl-list') can be cleared from a device using this command. For example:
+
+----
+
+# cxl list -m mem0 -L -u
+{
+ "memdev":"mem0",
+ "ram_size":"1024.00 MiB (1073.74 MB)",
+ "ram_qos_class":42,
+ "serial":"0x0",
+ "numa_node:1,
+ "host":"0000:35:00.0",
+ "media_errors":[
+ {
+ "offset":"0x1000",
+ "length":64,
+ "source":"Injected"
+ }
+ ]
+}
+
+# cxl clear-error mem0 -a 0x1000
+poison cleared at mem0:0x1000
+
+# cxl list -m mem0 -L -u
+{
+ "memdev":"mem0",
+ "ram_size":"1024.00 MiB (1073.74 MB)",
+ "ram_qos_class":42,
+ "serial":"0x0",
+ "numa_node:1,
+ "host":"0000:35:00.0",
+ "media_errors":[
+ ]
+}
+
+----
+
+OPTIONS
+-------
+-a::
+--address::
+ Device physical address (DPA) to clear poison from. Address can be specified
+ in hex or decimal. Required for clearing poison.
+
+--debug::
+ Enable debug output
diff --git a/Documentation/cxl/cxl-inject-error.txt b/Documentation/cxl/cxl-inject-error.txt
new file mode 100644
index 0000000..e1bebd7
--- /dev/null
+++ b/Documentation/cxl/cxl-inject-error.txt
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+cxl-inject-error(1)
+===================
+
+NAME
+----
+cxl-inject-error - Inject CXL errors into CXL devices
+
+SYNOPSIS
+--------
+[verse]
+'cxl inject-error' <device name> [<options>]
+
+Inject an error into a CXL device. The type of errors supported depend on the
+device specified. The types of devices supported are:
+
+"Downstream Ports":: A CXL RCH downstream port (dport) or a CXL VH root port.
+Eligible CXL 2.0+ ports are dports of ports at depth 1 in the output of cxl-list.
+Dports are specified by host name ("0000:0e:01.1").
+"memdevs":: A CXL memory device. Memory devices are specified by device name
+("mem0"), device id ("0"), and/or host device name ("0000:35:00.0").
+
+There are two types of errors which can be injected: CXL protocol errors
+and device poison.
+
+CXL protocol errors can only be used with downstream ports (as defined above).
+Protocol errors follow the format of "<protocol>-<severity>". For example,
+a "mem-fatal" error is a CXL.mem fatal protocol error. Protocol errors can be
+found with the '-N' option of 'cxl-list' under a CXL bus object. For example:
+
+----
+
+# cxl list -NB
+[
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "injectable_protocol_errors":[
+ "mem-correctable",
+ "mem-fatal",
+ ]
+ }
+]
+
+----
+
+CXL protocol (CXL.cache/mem) error injection requires the platform to support
+ACPI v6.5+ error injection (EINJ). In addition to platform support, the
+CONFIG_ACPI_APEI_EINJ and CONFIG_ACPI_APEI_EINJ_CXL kernel configuration options
+will need to be enabled. For more information, view the Linux kernel documentation
+on EINJ.
+
+Device poison can only by used with CXL memory devices. A device physical address
+(DPA) is required to do poison injection. DPAs range from 0 to the size of
+device's memory, which can be found using 'cxl-list'. An example injection:
+
+----
+
+# cxl inject-error mem0 -t poison -a 0x1000
+poison injected at mem0:0x1000
+# cxl list -m mem0 -u --media-errors
+{
+ "memdev":"mem0",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0",
+ "host":"0000:0d:00.0",
+ "firmware_version":"BWFW VERSION 00",
+ "media_errors":[
+ {
+ "offset":"0x1000",
+ "length":64,
+ "source":"Injected"
+ }
+ ]
+}
+
+----
+
+Not all devices support poison injection. To see if a device supports poison injection
+through debugfs, use 'cxl-list' with the '-N' option and look for the "poison-injectable"
+attribute under the device. Example:
+
+----
+
+# cxl list -Nu -m mem0
+{
+ "memdev":"mem0",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0",
+ "host":"0000:0d:00.0",
+ "firmware_version":"BWFW VERSION 00",
+ "poison_injectable":true
+}
+
+----
+
+This command depends on the kernel debug filesystem (debugfs) to do CXL protocol
+error and device poison injection.
+
+OPTIONS
+-------
+-a::
+--address::
+ Device physical address (DPA) to use for poison injection. Address can
+ be specified in hex or decimal. Required for poison injection.
+
+-t::
+--type::
+ Type of error to inject into <device name>. The type of error is restricted
+ by device type. The following shows the possible types under their associated
+ device type(s):
+----
+
+Downstream Ports: ::
+ cache-correctable, cache-uncorrectable, cache-fatal, mem-correctable,
+ mem-fatal
+
+Memdevs: ::
+ poison
+
+----
+
+--debug::
+ Enable debug output
+
+SEE ALSO
+--------
+linkcxl:cxl-list[1]
diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
index 8085c1c..0b75eed 100644
--- a/Documentation/cxl/meson.build
+++ b/Documentation/cxl/meson.build
@@ -50,6 +50,8 @@ cxl_manpages = [
'cxl-update-firmware.txt',
'cxl-set-alert-config.txt',
'cxl-wait-sanitize.txt',
+ 'cxl-inject-error.txt',
+ 'cxl-clear-error.txt',
]
foreach man : cxl_manpages
--
2.52.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2025-12-15 21:36 ` [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands Ben Cheatham
@ 2025-12-17 4:42 ` Alison Schofield
2025-12-17 19:56 ` Cheatham, Benjamin
2025-12-19 4:52 ` Alison Schofield
1 sibling, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:42 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:30PM -0600, Ben Cheatham wrote:
snip
> +# cxl list -m mem0 -L -u
> +{
> + "memdev":"mem0",
> + "ram_size":"1024.00 MiB (1073.74 MB)",
> + "ram_qos_class":42,
> + "serial":"0x0",
> + "numa_node:1,
missing closing quote, should be:
"numa_node":1,
snip
> +cxl-inject-error(1)
> +===================
snip
> +Device poison can only by used with CXL memory devices. A device physical address
s/by/be
snip
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2025-12-17 4:42 ` Alison Schofield
@ 2025-12-17 19:56 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:56 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:42 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:30PM -0600, Ben Cheatham wrote:
>
> snip
>
>> +# cxl list -m mem0 -L -u
>> +{
>> + "memdev":"mem0",
>> + "ram_size":"1024.00 MiB (1073.74 MB)",
>> + "ram_qos_class":42,
>> + "serial":"0x0",
>> + "numa_node:1,
>
> missing closing quote, should be:
> "numa_node":1,
Sure.
>
> snip
>
>> +cxl-inject-error(1)
>> +===================
>
> snip
>
>> +Device poison can only by used with CXL memory devices. A device physical address
>
> s/by/be
Will fix.
>
> snip
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2025-12-15 21:36 ` [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands Ben Cheatham
2025-12-17 4:42 ` Alison Schofield
@ 2025-12-19 4:52 ` Alison Schofield
2026-01-05 21:13 ` Cheatham, Benjamin
1 sibling, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-19 4:52 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:30PM -0600, Ben Cheatham wrote:
> Add man pages for the 'cxl-inject-error' and 'cxl-clear-error' commands.
> These man pages show usage and examples for each of their use cases.
>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
> ---
> Documentation/cxl/cxl-clear-error.txt | 67 +++++++++++++
> Documentation/cxl/cxl-inject-error.txt | 129 +++++++++++++++++++++++++
> Documentation/cxl/meson.build | 2 +
> 3 files changed, 198 insertions(+)
> create mode 100644 Documentation/cxl/cxl-clear-error.txt
> create mode 100644 Documentation/cxl/cxl-inject-error.txt
snip
> diff --git a/Documentation/cxl/cxl-inject-error.txt b/Documentation/cxl/cxl-inject-error.txt
> new file mode 100644
> index 0000000..e1bebd7
> --- /dev/null
> +++ b/Documentation/cxl/cxl-inject-error.txt
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +cxl-inject-error(1)
> +===================
> +
> +NAME
> +----
> +cxl-inject-error - Inject CXL errors into CXL devices
> +
> +SYNOPSIS
> +--------
> +[verse]
> +'cxl inject-error' <device name> [<options>]
> +
> +Inject an error into a CXL device. The type of errors supported depend on the
> +device specified. The types of devices supported are:
> +
> +"Downstream Ports":: A CXL RCH downstream port (dport) or a CXL VH root port.
> +Eligible CXL 2.0+ ports are dports of ports at depth 1 in the output of cxl-list.
> +Dports are specified by host name ("0000:0e:01.1").
How are users to find that dport host?
Is there a cxl list "show me the dports where i can inject protocol errors"
incantation that we can recommend here.
I ended up looking at /sys/kernel/debug/cxl/ to find the hosts.
Would another attribute added to those dports make sense, be possible?
like is done for the poison injectable memdevs? ie 'protocol_injectable: true'
> +"memdevs":: A CXL memory device. Memory devices are specified by device name
> +("mem0"), device id ("0"), and/or host device name ("0000:35:00.0").
> +
> +There are two types of errors which can be injected: CXL protocol errors
> +and device poison.
> +
> +CXL protocol errors can only be used with downstream ports (as defined above).
> +Protocol errors follow the format of "<protocol>-<severity>". For example,
> +a "mem-fatal" error is a CXL.mem fatal protocol error. Protocol errors can be
> +found with the '-N' option of 'cxl-list' under a CXL bus object. For example:
> +
> +----
> +
> +# cxl list -NB
> +[
> + {
> + "bus":"root0",
> + "provider":"ACPI.CXL",
> + "injectable_protocol_errors":[
> + "mem-correctable",
> + "mem-fatal",
> + ]
> + }
> +]
> +
> +----
> +
> +CXL protocol (CXL.cache/mem) error injection requires the platform to support
> +ACPI v6.5+ error injection (EINJ). In addition to platform support, the
> +CONFIG_ACPI_APEI_EINJ and CONFIG_ACPI_APEI_EINJ_CXL kernel configuration options
> +will need to be enabled. For more information, view the Linux kernel documentation
> +on EINJ.
> +
> +Device poison can only by used with CXL memory devices. A device physical address
> +(DPA) is required to do poison injection. DPAs range from 0 to the size of
> +device's memory, which can be found using 'cxl-list'. An example injection:
> +
> +----
> +
> +# cxl inject-error mem0 -t poison -a 0x1000
> +poison injected at mem0:0x1000
> +# cxl list -m mem0 -u --media-errors
> +{
> + "memdev":"mem0",
> + "ram_size":"256.00 MiB (268.44 MB)",
> + "serial":"0",
> + "host":"0000:0d:00.0",
> + "firmware_version":"BWFW VERSION 00",
> + "media_errors":[
> + {
> + "offset":"0x1000",
> + "length":64,
> + "source":"Injected"
> + }
> + ]
> +}
> +
> +----
> +
> +Not all devices support poison injection. To see if a device supports poison injection
> +through debugfs, use 'cxl-list' with the '-N' option and look for the "poison-injectable"
> +attribute under the device. Example:
> +
> +----
> +
> +# cxl list -Nu -m mem0
> +{
> + "memdev":"mem0",
> + "ram_size":"256.00 MiB (268.44 MB)",
> + "serial":"0",
> + "host":"0000:0d:00.0",
> + "firmware_version":"BWFW VERSION 00",
> + "poison_injectable":true
> +}
> +
> +----
> +
> +This command depends on the kernel debug filesystem (debugfs) to do CXL protocol
> +error and device poison injection.
> +
> +OPTIONS
> +-------
> +-a::
> +--address::
> + Device physical address (DPA) to use for poison injection. Address can
> + be specified in hex or decimal. Required for poison injection.
> +
> +-t::
> +--type::
> + Type of error to inject into <device name>. The type of error is restricted
> + by device type. The following shows the possible types under their associated
> + device type(s):
> +----
> +
> +Downstream Ports: ::
> + cache-correctable, cache-uncorrectable, cache-fatal, mem-correctable,
> + mem-fatal
> +
> +Memdevs: ::
> + poison
> +
> +----
> +
> +--debug::
> + Enable debug output
> +
> +SEE ALSO
> +--------
> +linkcxl:cxl-list[1]
> diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
> index 8085c1c..0b75eed 100644
> --- a/Documentation/cxl/meson.build
> +++ b/Documentation/cxl/meson.build
> @@ -50,6 +50,8 @@ cxl_manpages = [
> 'cxl-update-firmware.txt',
> 'cxl-set-alert-config.txt',
> 'cxl-wait-sanitize.txt',
> + 'cxl-inject-error.txt',
> + 'cxl-clear-error.txt',
> ]
>
> foreach man : cxl_manpages
> --
> 2.52.0
>
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2025-12-19 4:52 ` Alison Schofield
@ 2026-01-05 21:13 ` Cheatham, Benjamin
2026-01-06 4:41 ` Alison Schofield
0 siblings, 1 reply; 28+ messages in thread
From: Cheatham, Benjamin @ 2026-01-05 21:13 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/18/2025 10:52 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:30PM -0600, Ben Cheatham wrote:
>> Add man pages for the 'cxl-inject-error' and 'cxl-clear-error' commands.
>> These man pages show usage and examples for each of their use cases.
>>
>> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
>> ---
>> Documentation/cxl/cxl-clear-error.txt | 67 +++++++++++++
>> Documentation/cxl/cxl-inject-error.txt | 129 +++++++++++++++++++++++++
>> Documentation/cxl/meson.build | 2 +
>> 3 files changed, 198 insertions(+)
>> create mode 100644 Documentation/cxl/cxl-clear-error.txt
>> create mode 100644 Documentation/cxl/cxl-inject-error.txt
>
> snip
>
>> diff --git a/Documentation/cxl/cxl-inject-error.txt b/Documentation/cxl/cxl-inject-error.txt
>> new file mode 100644
>> index 0000000..e1bebd7
>> --- /dev/null
>> +++ b/Documentation/cxl/cxl-inject-error.txt
>> @@ -0,0 +1,129 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +cxl-inject-error(1)
>> +===================
>> +
>> +NAME
>> +----
>> +cxl-inject-error - Inject CXL errors into CXL devices
>> +
>> +SYNOPSIS
>> +--------
>> +[verse]
>> +'cxl inject-error' <device name> [<options>]
>> +
>> +Inject an error into a CXL device. The type of errors supported depend on the
>> +device specified. The types of devices supported are:
>> +
>> +"Downstream Ports":: A CXL RCH downstream port (dport) or a CXL VH root port.
>> +Eligible CXL 2.0+ ports are dports of ports at depth 1 in the output of cxl-list.
>> +Dports are specified by host name ("0000:0e:01.1").
>
> How are users to find that dport host?
The user needs to know beforehand at the moment. More below.
>
> Is there a cxl list "show me the dports where i can inject protocol errors"
> incantation that we can recommend here.
>
> I ended up looking at /sys/kernel/debug/cxl/ to find the hosts.
>
> Would another attribute added to those dports make sense, be possible?
> like is done for the poison injectable memdevs? ie 'protocol_injectable: true'
Which ports support error injection depends on the CXL version of the host. For CXL 1.1
hosts it's any memory-mapped downstream port, while for 2.0+ it's only CXL root ports
(ACPI 6.5 Table 18-31).
The kernel adds a debugfs entry for all downstream ports regardless of those requirements IIRC.
Having the extra entries doesn't break anything since the platform firmware should reject invalid
injection targets, but it does add an extra hurdle for the user.
I think what I'll do here is submit a kernel patch to clean up the extra entries (needed to be done anyway)
and add a 'protocol_injectable' attribute for the downstream port when a debugfs entry exists. I'll probably
send out the kernel patch at the same time as v6.
Let me know if any of that sounds unreasonable or you'd rather I do something else!
Thanks,
Ben
>
>
>> +"memdevs":: A CXL memory device. Memory devices are specified by device name
>> +("mem0"), device id ("0"), and/or host device name ("0000:35:00.0").
>> +
>> +There are two types of errors which can be injected: CXL protocol errors
>> +and device poison.
>> +
>> +CXL protocol errors can only be used with downstream ports (as defined above).
>> +Protocol errors follow the format of "<protocol>-<severity>". For example,
>> +a "mem-fatal" error is a CXL.mem fatal protocol error. Protocol errors can be
>> +found with the '-N' option of 'cxl-list' under a CXL bus object. For example:
>> +
>> +----
>> +
>> +# cxl list -NB
>> +[
>> + {
>> + "bus":"root0",
>> + "provider":"ACPI.CXL",
>> + "injectable_protocol_errors":[
>> + "mem-correctable",
>> + "mem-fatal",
>> + ]
>> + }
>> +]
>> +
>> +----
>> +
>> +CXL protocol (CXL.cache/mem) error injection requires the platform to support
>> +ACPI v6.5+ error injection (EINJ). In addition to platform support, the
>> +CONFIG_ACPI_APEI_EINJ and CONFIG_ACPI_APEI_EINJ_CXL kernel configuration options
>> +will need to be enabled. For more information, view the Linux kernel documentation
>> +on EINJ.
>> +
>> +Device poison can only by used with CXL memory devices. A device physical address
>> +(DPA) is required to do poison injection. DPAs range from 0 to the size of
>> +device's memory, which can be found using 'cxl-list'. An example injection:
>> +
>> +----
>> +
>> +# cxl inject-error mem0 -t poison -a 0x1000
>> +poison injected at mem0:0x1000
>> +# cxl list -m mem0 -u --media-errors
>> +{
>> + "memdev":"mem0",
>> + "ram_size":"256.00 MiB (268.44 MB)",
>> + "serial":"0",
>> + "host":"0000:0d:00.0",
>> + "firmware_version":"BWFW VERSION 00",
>> + "media_errors":[
>> + {
>> + "offset":"0x1000",
>> + "length":64,
>> + "source":"Injected"
>> + }
>> + ]
>> +}
>> +
>> +----
>> +
>> +Not all devices support poison injection. To see if a device supports poison injection
>> +through debugfs, use 'cxl-list' with the '-N' option and look for the "poison-injectable"
>> +attribute under the device. Example:
>> +
>> +----
>> +
>> +# cxl list -Nu -m mem0
>> +{
>> + "memdev":"mem0",
>> + "ram_size":"256.00 MiB (268.44 MB)",
>> + "serial":"0",
>> + "host":"0000:0d:00.0",
>> + "firmware_version":"BWFW VERSION 00",
>> + "poison_injectable":true
>> +}
>> +
>> +----
>> +
>> +This command depends on the kernel debug filesystem (debugfs) to do CXL protocol
>> +error and device poison injection.
>> +
>> +OPTIONS
>> +-------
>> +-a::
>> +--address::
>> + Device physical address (DPA) to use for poison injection. Address can
>> + be specified in hex or decimal. Required for poison injection.
>> +
>> +-t::
>> +--type::
>> + Type of error to inject into <device name>. The type of error is restricted
>> + by device type. The following shows the possible types under their associated
>> + device type(s):
>> +----
>> +
>> +Downstream Ports: ::
>> + cache-correctable, cache-uncorrectable, cache-fatal, mem-correctable,
>> + mem-fatal
>> +
>> +Memdevs: ::
>> + poison
>> +
>> +----
>> +
>> +--debug::
>> + Enable debug output
>> +
>> +SEE ALSO
>> +--------
>> +linkcxl:cxl-list[1]
>> diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
>> index 8085c1c..0b75eed 100644
>> --- a/Documentation/cxl/meson.build
>> +++ b/Documentation/cxl/meson.build
>> @@ -50,6 +50,8 @@ cxl_manpages = [
>> 'cxl-update-firmware.txt',
>> 'cxl-set-alert-config.txt',
>> 'cxl-wait-sanitize.txt',
>> + 'cxl-inject-error.txt',
>> + 'cxl-clear-error.txt',
>> ]
>>
>> foreach man : cxl_manpages
>> --
>> 2.52.0
>>
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands
2026-01-05 21:13 ` Cheatham, Benjamin
@ 2026-01-06 4:41 ` Alison Schofield
0 siblings, 0 replies; 28+ messages in thread
From: Alison Schofield @ 2026-01-06 4:41 UTC (permalink / raw)
To: Cheatham, Benjamin; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Jan 05, 2026 at 03:13:03PM -0600, Cheatham, Benjamin wrote:
>
>
> On 12/18/2025 10:52 PM, Alison Schofield wrote:
> > On Mon, Dec 15, 2025 at 03:36:30PM -0600, Ben Cheatham wrote:
> >> Add man pages for the 'cxl-inject-error' and 'cxl-clear-error' commands.
> >> These man pages show usage and examples for each of their use cases.
> >>
> >> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> >> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
> >> ---
> >> Documentation/cxl/cxl-clear-error.txt | 67 +++++++++++++
> >> Documentation/cxl/cxl-inject-error.txt | 129 +++++++++++++++++++++++++
> >> Documentation/cxl/meson.build | 2 +
> >> 3 files changed, 198 insertions(+)
> >> create mode 100644 Documentation/cxl/cxl-clear-error.txt
> >> create mode 100644 Documentation/cxl/cxl-inject-error.txt
> >
> > snip
> >
> >> diff --git a/Documentation/cxl/cxl-inject-error.txt b/Documentation/cxl/cxl-inject-error.txt
> >> new file mode 100644
> >> index 0000000..e1bebd7
> >> --- /dev/null
> >> +++ b/Documentation/cxl/cxl-inject-error.txt
> >> @@ -0,0 +1,129 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +
> >> +cxl-inject-error(1)
> >> +===================
> >> +
> >> +NAME
> >> +----
> >> +cxl-inject-error - Inject CXL errors into CXL devices
> >> +
> >> +SYNOPSIS
> >> +--------
> >> +[verse]
> >> +'cxl inject-error' <device name> [<options>]
> >> +
> >> +Inject an error into a CXL device. The type of errors supported depend on the
> >> +device specified. The types of devices supported are:
> >> +
> >> +"Downstream Ports":: A CXL RCH downstream port (dport) or a CXL VH root port.
> >> +Eligible CXL 2.0+ ports are dports of ports at depth 1 in the output of cxl-list.
> >> +Dports are specified by host name ("0000:0e:01.1").
> >
> > How are users to find that dport host?
>
> The user needs to know beforehand at the moment. More below.
>
> >
> > Is there a cxl list "show me the dports where i can inject protocol errors"
> > incantation that we can recommend here.
> >
> > I ended up looking at /sys/kernel/debug/cxl/ to find the hosts.
> >
> > Would another attribute added to those dports make sense, be possible?
> > like is done for the poison injectable memdevs? ie 'protocol_injectable: true'
>
> Which ports support error injection depends on the CXL version of the host. For CXL 1.1
> hosts it's any memory-mapped downstream port, while for 2.0+ it's only CXL root ports
> (ACPI 6.5 Table 18-31).
>
> The kernel adds a debugfs entry for all downstream ports regardless of those requirements IIRC.
> Having the extra entries doesn't break anything since the platform firmware should reject invalid
> injection targets, but it does add an extra hurdle for the user.
>
> I think what I'll do here is submit a kernel patch to clean up the extra entries (needed to be done anyway)
> and add a 'protocol_injectable' attribute for the downstream port when a debugfs entry exists. I'll probably
> send out the kernel patch at the same time as v6.
>
> Let me know if any of that sounds unreasonable or you'd rather I do something else!
Ben,
That sounds good. I knew that using sysfs to help me figure out how
to use the cxl-cli command was a bad sign ;)
Don't hold back on examples in the man pages docs.
Thanks!
Alison
>
> Thanks,
> Ben
> >
> >
> >> +"memdevs":: A CXL memory device. Memory devices are specified by device name
> >> +("mem0"), device id ("0"), and/or host device name ("0000:35:00.0").
> >> +
> >> +There are two types of errors which can be injected: CXL protocol errors
> >> +and device poison.
> >> +
> >> +CXL protocol errors can only be used with downstream ports (as defined above).
> >> +Protocol errors follow the format of "<protocol>-<severity>". For example,
> >> +a "mem-fatal" error is a CXL.mem fatal protocol error. Protocol errors can be
> >> +found with the '-N' option of 'cxl-list' under a CXL bus object. For example:
> >> +
> >> +----
> >> +
> >> +# cxl list -NB
> >> +[
> >> + {
> >> + "bus":"root0",
> >> + "provider":"ACPI.CXL",
> >> + "injectable_protocol_errors":[
> >> + "mem-correctable",
> >> + "mem-fatal",
> >> + ]
> >> + }
> >> +]
> >> +
> >> +----
> >> +
> >> +CXL protocol (CXL.cache/mem) error injection requires the platform to support
> >> +ACPI v6.5+ error injection (EINJ). In addition to platform support, the
> >> +CONFIG_ACPI_APEI_EINJ and CONFIG_ACPI_APEI_EINJ_CXL kernel configuration options
> >> +will need to be enabled. For more information, view the Linux kernel documentation
> >> +on EINJ.
> >> +
> >> +Device poison can only by used with CXL memory devices. A device physical address
> >> +(DPA) is required to do poison injection. DPAs range from 0 to the size of
> >> +device's memory, which can be found using 'cxl-list'. An example injection:
> >> +
> >> +----
> >> +
> >> +# cxl inject-error mem0 -t poison -a 0x1000
> >> +poison injected at mem0:0x1000
> >> +# cxl list -m mem0 -u --media-errors
> >> +{
> >> + "memdev":"mem0",
> >> + "ram_size":"256.00 MiB (268.44 MB)",
> >> + "serial":"0",
> >> + "host":"0000:0d:00.0",
> >> + "firmware_version":"BWFW VERSION 00",
> >> + "media_errors":[
> >> + {
> >> + "offset":"0x1000",
> >> + "length":64,
> >> + "source":"Injected"
> >> + }
> >> + ]
> >> +}
> >> +
> >> +----
> >> +
> >> +Not all devices support poison injection. To see if a device supports poison injection
> >> +through debugfs, use 'cxl-list' with the '-N' option and look for the "poison-injectable"
> >> +attribute under the device. Example:
> >> +
> >> +----
> >> +
> >> +# cxl list -Nu -m mem0
> >> +{
> >> + "memdev":"mem0",
> >> + "ram_size":"256.00 MiB (268.44 MB)",
> >> + "serial":"0",
> >> + "host":"0000:0d:00.0",
> >> + "firmware_version":"BWFW VERSION 00",
> >> + "poison_injectable":true
> >> +}
> >> +
> >> +----
> >> +
> >> +This command depends on the kernel debug filesystem (debugfs) to do CXL protocol
> >> +error and device poison injection.
> >> +
> >> +OPTIONS
> >> +-------
> >> +-a::
> >> +--address::
> >> + Device physical address (DPA) to use for poison injection. Address can
> >> + be specified in hex or decimal. Required for poison injection.
> >> +
> >> +-t::
> >> +--type::
> >> + Type of error to inject into <device name>. The type of error is restricted
> >> + by device type. The following shows the possible types under their associated
> >> + device type(s):
> >> +----
> >> +
> >> +Downstream Ports: ::
> >> + cache-correctable, cache-uncorrectable, cache-fatal, mem-correctable,
> >> + mem-fatal
> >> +
> >> +Memdevs: ::
> >> + poison
> >> +
> >> +----
> >> +
> >> +--debug::
> >> + Enable debug output
> >> +
> >> +SEE ALSO
> >> +--------
> >> +linkcxl:cxl-list[1]
> >> diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
> >> index 8085c1c..0b75eed 100644
> >> --- a/Documentation/cxl/meson.build
> >> +++ b/Documentation/cxl/meson.build
> >> @@ -50,6 +50,8 @@ cxl_manpages = [
> >> 'cxl-update-firmware.txt',
> >> 'cxl-set-alert-config.txt',
> >> 'cxl-wait-sanitize.txt',
> >> + 'cxl-inject-error.txt',
> >> + 'cxl-clear-error.txt',
> >> ]
> >>
> >> foreach man : cxl_manpages
> >> --
> >> 2.52.0
> >>
>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [ndctl PATCH v5 0/7] Add error injection support
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (6 preceding siblings ...)
2025-12-15 21:36 ` [PATCH v5 7/7] Documentation: Add docs for inject/clear-error commands Ben Cheatham
@ 2025-12-17 4:47 ` Alison Schofield
2025-12-17 19:56 ` Cheatham, Benjamin
2025-12-17 5:00 ` [ndctl PATCH] cxl/test: use inject and clear cmds in cxl-poison.sh Alison Schofield
8 siblings, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 4:47 UTC (permalink / raw)
To: Ben Cheatham; +Cc: nvdimm, linux-cxl, dave.jiang
On Mon, Dec 15, 2025 at 03:36:23PM -0600, Ben Cheatham wrote:
> v5 Changes:
> - Use setmntent()/getmntent() instead of open-coding getting the
> debugfs path (Dave)
> - Use correct return code for sysfs_read_attr() (Dave)
>
> v4 Changes:
> - Variable renames for clarity (Dave)
> - Use errno instead of rc for access() calls (Dave)
> - Check returns for snprintf() (Dave)
> - Add util_cxl_dport_filter() (Dave)
> - Replace printf() calls with log_info() (Dave)
> - Write correct value to debugfs during protocol error injection
> (BIT(error) vs. error)
>
> v3 Changes:
> - Rebase on v83 release
> - Fix whitespace errors (Alison)
>
> v2 Changes:
> - Make the --clear option of 'inject-error' its own command (Alison)
> - Debugfs is now found using the /proc/mount entry instead of
> providing the path using a --debugfs option
> - Man page added for 'clear-error'
> - Reword commit descriptions for clarity
>
> This series adds support for injecting CXL protocol (CXL.cache/mem)
> errors[1] into CXL RCH Downstream ports and VH root ports[2] and
> poison into CXL memory devices through the CXL debugfs. Errors are
> injected using a new 'inject-error' command, while errors are reported
> using a new cxl-list "-N"/"--injectable-errors" option. Device poison
> can be cleared using the 'clear-error' command.
>
> The 'inject-error'/'clear-error' commands and "-N" option of cxl-list all
> require access to the CXL driver's debugfs.
>
> The documentation for the new cxl-inject-error command shows both usage
> and the possible device/error types, as well as how to retrieve them
> using cxl-list. The documentation for cxl-list has also been updated to
> show the usage of the new injectable errors option.
>
> [1]: ACPI v6.5 spec, section 18.6.4
> [2]: ACPI v6.5 spec, table 18.31
Hi Ben,
I did a patch by patch review but saved up a couple of usability things
to chat about here:
Consider removing the -N option and simply adding the new info to the
default memdev and bus listings. Both are only accessing debugfs files and
don't add much to the default listing, especially the memdev one.
For the protocol errors, the cxl list entry is always present, even when empty,
but the poison_injectable attribute is only present when true. Should that be
always present and true/false? Or maybe true/false/unknown, where unknown is
the status when CONFIG_DEBUG_FS is not enabled?
And, maybe something similar for protocol errors?
Please add more strong 'danger' warnings to the poison inject and clear
command man pages. See Documentation/ABI/testing/debugfs-cxl for the language
we converged on when adding the debugfs attributes.
I have no test for the protocol errors. Is there anything you can
share for that?
I'll send a separate reply asking if you to append an updated cxl-poison
unit test patch to this set.
--Alison
> Ben Cheatham (7):
> libcxl: Add debugfs path to CXL context
> libcxl: Add CXL protocol errors
> libcxl: Add poison injection support
> cxl: Add inject-error command
> cxl: Add clear-error command
> cxl/list: Add injectable errors in output
> Documentation: Add docs for inject/clear-error commands
>
> Documentation/cxl/cxl-clear-error.txt | 67 ++++++
> Documentation/cxl/cxl-inject-error.txt | 129 +++++++++++
> Documentation/cxl/cxl-list.txt | 35 ++-
> Documentation/cxl/meson.build | 2 +
> cxl/builtin.h | 2 +
> cxl/cxl.c | 2 +
> cxl/filter.c | 26 +++
> cxl/filter.h | 5 +
> cxl/inject-error.c | 249 ++++++++++++++++++++
> cxl/json.c | 30 +++
> cxl/lib/libcxl.c | 302 +++++++++++++++++++++++++
> cxl/lib/libcxl.sym | 8 +
> cxl/lib/private.h | 14 ++
> cxl/libcxl.h | 16 ++
> cxl/list.c | 3 +
> cxl/meson.build | 1 +
> util/json.h | 1 +
> 17 files changed, 891 insertions(+), 1 deletion(-)
> create mode 100644 Documentation/cxl/cxl-clear-error.txt
> create mode 100644 Documentation/cxl/cxl-inject-error.txt
> create mode 100644 cxl/inject-error.c
>
> --
> 2.52.0
>
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [ndctl PATCH v5 0/7] Add error injection support
2025-12-17 4:47 ` [ndctl PATCH v5 0/7] Add error injection support Alison Schofield
@ 2025-12-17 19:56 ` Cheatham, Benjamin
2025-12-19 5:08 ` Alison Schofield
0 siblings, 1 reply; 28+ messages in thread
From: Cheatham, Benjamin @ 2025-12-17 19:56 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/16/2025 10:47 PM, Alison Schofield wrote:
> On Mon, Dec 15, 2025 at 03:36:23PM -0600, Ben Cheatham wrote:
>> v5 Changes:
>> - Use setmntent()/getmntent() instead of open-coding getting the
>> debugfs path (Dave)
>> - Use correct return code for sysfs_read_attr() (Dave)
>>
>> v4 Changes:
>> - Variable renames for clarity (Dave)
>> - Use errno instead of rc for access() calls (Dave)
>> - Check returns for snprintf() (Dave)
>> - Add util_cxl_dport_filter() (Dave)
>> - Replace printf() calls with log_info() (Dave)
>> - Write correct value to debugfs during protocol error injection
>> (BIT(error) vs. error)
>>
>> v3 Changes:
>> - Rebase on v83 release
>> - Fix whitespace errors (Alison)
>>
>> v2 Changes:
>> - Make the --clear option of 'inject-error' its own command (Alison)
>> - Debugfs is now found using the /proc/mount entry instead of
>> providing the path using a --debugfs option
>> - Man page added for 'clear-error'
>> - Reword commit descriptions for clarity
>>
>> This series adds support for injecting CXL protocol (CXL.cache/mem)
>> errors[1] into CXL RCH Downstream ports and VH root ports[2] and
>> poison into CXL memory devices through the CXL debugfs. Errors are
>> injected using a new 'inject-error' command, while errors are reported
>> using a new cxl-list "-N"/"--injectable-errors" option. Device poison
>> can be cleared using the 'clear-error' command.
>>
>> The 'inject-error'/'clear-error' commands and "-N" option of cxl-list all
>> require access to the CXL driver's debugfs.
>>
>> The documentation for the new cxl-inject-error command shows both usage
>> and the possible device/error types, as well as how to retrieve them
>> using cxl-list. The documentation for cxl-list has also been updated to
>> show the usage of the new injectable errors option.
>>
>> [1]: ACPI v6.5 spec, section 18.6.4
>> [2]: ACPI v6.5 spec, table 18.31
>
> Hi Ben,
>
> I did a patch by patch review but saved up a couple of usability things
> to chat about here:
>
> Consider removing the -N option and simply adding the new info to the
> default memdev and bus listings. Both are only accessing debugfs files and
> don't add much to the default listing, especially the memdev one.
That makes sense, I'll do that.
>
> For the protocol errors, the cxl list entry is always present, even when empty,
> but the poison_injectable attribute is only present when true. Should that be
> always present and true/false? Or maybe true/false/unknown, where unknown is
> the status when CONFIG_DEBUG_FS is not enabled?
> And, maybe something similar for protocol errors?
It's probably fine to have them be always present when CONFIG_DEBUG_FS is enabled.
I think it would be cool to have them be removed when CONFIG_DEBUG_FS is disabled,
but I'm not sure how that would work. If I can do that, that's what I'll do.
Otherwise, I'll just set poison_injectable to false. That's what makes sense to me
since poison injection and error injection into non-RCH ports aren't available
unless the debugfs files are there (AFAIK). For the protocol errors, I'll either do
"None", "N/A", or leave it as an empty list.
>
> Please add more strong 'danger' warnings to the poison inject and clear
> command man pages. See Documentation/ABI/testing/debugfs-cxl for the language
> we converged on when adding the debugfs attributes.
For sure, I'll take a look and update.
>
> I have no test for the protocol errors. Is there anything you can
> share for that?
I don't have any at the moment. My first idea for one is to modify the CXL test module(s)
to replace /sys/kernel/debug/cxl/einj_inject with a dummy that prints a message to the dmesg
then check for that message in the test after running the command. That would somewhat match
the real use case, but doesn't test any actual error injection. If you think that would
be useful let me know and I'll put something together.
>
> I'll send a separate reply asking if you to append an updated cxl-poison
> unit test patch to this set.
That sounds fine to me, I'll append it on v6.
Thanks,
Ben
>
> --Alison
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [ndctl PATCH v5 0/7] Add error injection support
2025-12-17 19:56 ` Cheatham, Benjamin
@ 2025-12-19 5:08 ` Alison Schofield
2026-01-05 21:14 ` Cheatham, Benjamin
0 siblings, 1 reply; 28+ messages in thread
From: Alison Schofield @ 2025-12-19 5:08 UTC (permalink / raw)
To: Cheatham, Benjamin; +Cc: nvdimm, linux-cxl, dave.jiang
On Wed, Dec 17, 2025 at 01:56:35PM -0600, Cheatham, Benjamin wrote:
> On 12/16/2025 10:47 PM, Alison Schofield wrote:
> > On Mon, Dec 15, 2025 at 03:36:23PM -0600, Ben Cheatham wrote:
> >
> > I have no test for the protocol errors. Is there anything you can
> > share for that?
>
> I don't have any at the moment. My first idea for one is to modify the CXL test module(s)
> to replace /sys/kernel/debug/cxl/einj_inject with a dummy that prints a message to the dmesg
> then check for that message in the test after running the command. That would somewhat match
> the real use case, but doesn't test any actual error injection. If you think that would
> be useful let me know and I'll put something together.
As you may have guessed from my other comments, I got this running on
real system, and am able to sanity check the cxl-cli commands for the
protocol errors.
Does qemu have support for any of this?
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [ndctl PATCH v5 0/7] Add error injection support
2025-12-19 5:08 ` Alison Schofield
@ 2026-01-05 21:14 ` Cheatham, Benjamin
0 siblings, 0 replies; 28+ messages in thread
From: Cheatham, Benjamin @ 2026-01-05 21:14 UTC (permalink / raw)
To: Alison Schofield; +Cc: nvdimm, linux-cxl, dave.jiang
On 12/18/2025 11:08 PM, Alison Schofield wrote:
> On Wed, Dec 17, 2025 at 01:56:35PM -0600, Cheatham, Benjamin wrote:
>> On 12/16/2025 10:47 PM, Alison Schofield wrote:
>>> On Mon, Dec 15, 2025 at 03:36:23PM -0600, Ben Cheatham wrote:
>
>>>
>>> I have no test for the protocol errors. Is there anything you can
>>> share for that?
>>
>> I don't have any at the moment. My first idea for one is to modify the CXL test module(s)
>> to replace /sys/kernel/debug/cxl/einj_inject with a dummy that prints a message to the dmesg
>> then check for that message in the test after running the command. That would somewhat match
>> the real use case, but doesn't test any actual error injection. If you think that would
>> be useful let me know and I'll put something together.
>
> As you may have guessed from my other comments, I got this running on
> real system, and am able to sanity check the cxl-cli commands for the
> protocol errors.
>
> Does qemu have support for any of this?
No, QEMU doesn't support EINJ. I think it's expected you do error injection through
the QEMU monitor instead.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [ndctl PATCH] cxl/test: use inject and clear cmds in cxl-poison.sh
2025-12-15 21:36 [ndctl PATCH v5 0/7] Add error injection support Ben Cheatham
` (7 preceding siblings ...)
2025-12-17 4:47 ` [ndctl PATCH v5 0/7] Add error injection support Alison Schofield
@ 2025-12-17 5:00 ` Alison Schofield
8 siblings, 0 replies; 28+ messages in thread
From: Alison Schofield @ 2025-12-17 5:00 UTC (permalink / raw)
To: Ben Cheatham; +Cc: Alison Schofield, nvdimm, linux-cxl
cxl-cli commands were recently added for poison inject and clear
operations by memdev. Replace the writes to sysfs with the new
commands in the cxl-poison unit test.
Continue to use the sysfs writes for inject and clear poison
by region offset until that support arrives in cxl-cli.
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
Ben - Please append this to your in flight set and suggest
anything else you'd like to see covered here.
test/cxl-poison.sh | 79 ++++++++++++++++++++++++----------------------
1 file changed, 42 insertions(+), 37 deletions(-)
diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
index 59e807ece932..ebad38c14e3e 100644
--- a/test/cxl-poison.sh
+++ b/test/cxl-poison.sh
@@ -41,32 +41,37 @@ find_auto_region()
echo "$region"
}
-# When cxl-cli support for inject and clear arrives, replace
-# the writes to /sys/kernel/debug with the new cxl commands.
-
-_do_poison_sysfs()
+_do_poison()
{
local action="$1" dev="$2" addr="$3"
local expect_fail=${4:-false}
- if "$expect_fail"; then
- if echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"; then
- echo "Expected ${action}_poison to fail for $addr"
- err "$LINENO"
- fi
- else
- echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"
+ # Regions use sysfs, memdevs use cxl-cli commands
+ if [[ "$dev" =~ ^region ]]; then
+ local sysfs_path="/sys/kernel/debug/cxl/$dev/${action}_poison"
+ "$expect_fail" && echo "$addr" > "$sysfs_path" && err "$LINENO"
+ "$expect_fail" || echo "$addr" > "$sysfs_path"
+ return
fi
+
+ case "$action" in
+ inject) local cmd=("$CXL" inject-error "$dev" -t poison -a "$addr") ;;
+ clear) local cmd=("$CXL" clear-error "$dev" -a "$addr") ;;
+ *) err "$LINENO" ;;
+ esac
+
+ "$expect_fail" && "${cmd[@]}" && err "$LINENO"
+ "$expect_fail" || "${cmd[@]}"
}
-inject_poison_sysfs()
+inject_poison()
{
- _do_poison_sysfs 'inject' "$@"
+ _do_poison 'inject' "$@"
}
-clear_poison_sysfs()
+clear_poison()
{
- _do_poison_sysfs 'clear' "$@"
+ _do_poison 'clear' "$@"
}
check_trace_entry()
@@ -119,27 +124,27 @@ validate_poison_found()
test_poison_by_memdev_by_dpa()
{
find_memdev
- inject_poison_sysfs "$memdev" "0x40000000"
- inject_poison_sysfs "$memdev" "0x40001000"
- inject_poison_sysfs "$memdev" "0x600"
- inject_poison_sysfs "$memdev" "0x0"
+ inject_poison "$memdev" "0x40000000"
+ inject_poison "$memdev" "0x40001000"
+ inject_poison "$memdev" "0x600"
+ inject_poison "$memdev" "0x0"
validate_poison_found "-m $memdev" 4
- clear_poison_sysfs "$memdev" "0x40000000"
- clear_poison_sysfs "$memdev" "0x40001000"
- clear_poison_sysfs "$memdev" "0x600"
- clear_poison_sysfs "$memdev" "0x0"
+ clear_poison "$memdev" "0x40000000"
+ clear_poison "$memdev" "0x40001000"
+ clear_poison "$memdev" "0x600"
+ clear_poison "$memdev" "0x0"
validate_poison_found "-m $memdev" 0
}
test_poison_by_region_by_dpa()
{
- inject_poison_sysfs "$mem0" "0"
- inject_poison_sysfs "$mem1" "0"
+ inject_poison "$mem0" "0"
+ inject_poison "$mem1" "0"
validate_poison_found "-r $region" 2
- clear_poison_sysfs "$mem0" "0"
- clear_poison_sysfs "$mem1" "0"
+ clear_poison "$mem0" "0"
+ clear_poison "$mem1" "0"
validate_poison_found "-r $region" 0
}
@@ -166,15 +171,15 @@ test_poison_by_region_offset()
# Inject at the offset and check result using the hpa
# ABI takes an offset, but recall the hpa to check trace event
- inject_poison_sysfs "$region" "$cache_size"
+ inject_poison "$region" "$cache_size"
check_trace_entry "$region" "$hpa1"
- inject_poison_sysfs "$region" "$((gran + cache_size))"
+ inject_poison "$region" "$((gran + cache_size))"
check_trace_entry "$region" "$hpa2"
validate_poison_found "-r $region" 2
- clear_poison_sysfs "$region" "$cache_size"
+ clear_poison "$region" "$cache_size"
check_trace_entry "$region" "$hpa1"
- clear_poison_sysfs "$region" "$((gran + cache_size))"
+ clear_poison "$region" "$((gran + cache_size))"
check_trace_entry "$region" "$hpa2"
validate_poison_found "-r $region" 0
}
@@ -194,21 +199,21 @@ test_poison_by_region_offset_negative()
if [[ $cache_size -gt 0 ]]; then
cache_offset=$((cache_size - 1))
echo "Testing offset within cache: $cache_offset (cache_size: $cache_size)"
- inject_poison_sysfs "$region" "$cache_offset" true
- clear_poison_sysfs "$region" "$cache_offset" true
+ inject_poison "$region" "$cache_offset" true
+ clear_poison "$region" "$cache_offset" true
else
echo "Skipping cache test - cache_size is 0"
fi
# Offset exceeds region size
exceed_offset=$((region_size))
- inject_poison_sysfs "$region" "$exceed_offset" true
- clear_poison_sysfs "$region" "$exceed_offset" true
+ inject_poison "$region" "$exceed_offset" true
+ clear_poison "$region" "$exceed_offset" true
# Offset exceeds region size by a lot
large_offset=$((region_size * 2))
- inject_poison_sysfs "$region" "$large_offset" true
- clear_poison_sysfs "$region" "$large_offset" true
+ inject_poison "$region" "$large_offset" true
+ clear_poison "$region" "$large_offset" true
}
run_poison_test()
--
2.37.3
^ permalink raw reply related [flat|nested] 28+ messages in thread