* [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-20 14:03 ` Jonathan Cameron via
2025-06-19 15:16 ` [PATCH v2 2/7] hw/cxl/events: Updates for rev3.2 general media event record shiju.jose--- via
` (5 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
CXL spec 3.2 section 8.2.9.2.1 Table 8-55, Common Event Record
format has updated with Maintenance Operation Subclass, LD ID and
ID of the device head information.
Add updates for the above spec changes in the related CXL events
reporting and QMP command to inject CXL events.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/cxl/cxl-mailbox-utils.c | 6 +++--
hw/mem/cxl_type3.c | 32 ++++++++++++++++++-------
hw/mem/cxl_type3_stubs.c | 12 +++++++---
include/hw/cxl/cxl_device.h | 4 +++-
include/hw/cxl/cxl_events.h | 15 +++++++++---
qapi/cxl.json | 48 ++++++++++++++++++++++++++++++++++---
6 files changed, 97 insertions(+), 20 deletions(-)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4c01b25110..4d0c0b3edc 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -3574,7 +3574,8 @@ static CXLRetCode cmd_fm_set_dc_region_config(const struct cxl_cmd *cmd,
&dynamic_capacity_uuid,
(1 << CXL_EVENT_TYPE_INFO),
sizeof(dcEvent),
- cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ cxl_device_get_timestamp(&ct3d->cxl_dstate),
+ 0, 0, 0, 0);
dcEvent.type = DC_EVENT_REGION_CONFIG_UPDATED;
dcEvent.validity_flags = 1;
dcEvent.host_id = 0;
@@ -3692,7 +3693,8 @@ static void cxl_mbox_create_dc_event_records_for_extents(CXLType3Dev *ct3d,
&dynamic_capacity_uuid,
(1 << CXL_EVENT_TYPE_INFO),
sizeof(event_rec),
- cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ cxl_device_get_timestamp(&ct3d->cxl_dstate),
+ 0, 0, 0, 0);
event_rec.type = type;
event_rec.validity_flags = 1;
event_rec.host_id = 0;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b5482f58a3..0787a9bfca 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1780,12 +1780,18 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
void cxl_assign_event_header(CXLEventRecordHdr *hdr,
const QemuUUID *uuid, uint32_t flags,
- uint8_t length, uint64_t timestamp)
+ uint8_t length, uint64_t timestamp,
+ uint8_t maint_class, uint8_t maint_subclass,
+ uint16_t ld_id, uint8_t head_id)
{
st24_le_p(&hdr->flags, flags);
hdr->length = length;
memcpy(&hdr->id, uuid, sizeof(hdr->id));
stq_le_p(&hdr->timestamp, timestamp);
+ hdr->maint_op_class = maint_class;
+ hdr->maint_op_subclass = maint_subclass;
+ hdr->ld_id = ld_id;
+ hdr->head_id = head_id;
}
static const QemuUUID gen_media_uuid = {
@@ -1825,7 +1831,9 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
}
/* Component ID is device specific. Define this as a string. */
void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
- uint8_t flags, uint64_t dpa,
+ uint32_t flags, uint8_t class,
+ uint8_t subclass, uint16_t ld_id,
+ uint8_t head_id, uint64_t dpa,
uint8_t descriptor, uint8_t type,
uint8_t transaction_type,
bool has_channel, uint8_t channel,
@@ -1863,7 +1871,8 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
memset(&gem, 0, sizeof(gem));
cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
- cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ cxl_device_get_timestamp(&ct3d->cxl_dstate),
+ class, subclass, ld_id, head_id);
stq_le_p(&gem.phys_addr, dpa);
gem.descriptor = descriptor;
@@ -1907,7 +1916,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
#define CXL_DRAM_VALID_COLUMN BIT(6)
#define CXL_DRAM_VALID_CORRECTION_MASK BIT(7)
-void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
+void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
+ uint8_t class, uint8_t subclass,
+ uint16_t ld_id, uint8_t head_id,
uint64_t dpa, uint8_t descriptor,
uint8_t type, uint8_t transaction_type,
bool has_channel, uint8_t channel,
@@ -1950,7 +1961,8 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
memset(&dram, 0, sizeof(dram));
cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
- cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ cxl_device_get_timestamp(&ct3d->cxl_dstate),
+ class, subclass, ld_id, head_id);
stq_le_p(&dram.phys_addr, dpa);
dram.descriptor = descriptor;
dram.type = type;
@@ -2010,7 +2022,9 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
}
void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
- uint8_t flags, uint8_t type,
+ uint32_t flags, uint8_t class,
+ uint8_t subclass, uint16_t ld_id,
+ uint8_t head_id, uint8_t type,
uint8_t health_status,
uint8_t media_status,
uint8_t additional_status,
@@ -2049,7 +2063,8 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
memset(&module, 0, sizeof(module));
cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
- cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ cxl_device_get_timestamp(&ct3d->cxl_dstate),
+ class, subclass, ld_id, head_id);
module.type = type;
module.health_status = health_status;
@@ -2284,7 +2299,8 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
* Event Log.
*/
cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
- cxl_device_get_timestamp(&dcd->cxl_dstate));
+ cxl_device_get_timestamp(&dcd->cxl_dstate),
+ 0, 0, 0, 0);
dCap.type = type;
/* FIXME: for now, validity flag is cleared */
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index c1a5e4a7c1..263d8b4609 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -14,7 +14,9 @@
#include "qapi/qapi-commands-cxl.h"
void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
- uint8_t flags, uint64_t dpa,
+ uint32_t flags, uint8_t class,
+ uint8_t subclass, uint16_t ld_id,
+ uint8_t head_id, uint64_t dpa,
uint8_t descriptor, uint8_t type,
uint8_t transaction_type,
bool has_channel, uint8_t channel,
@@ -23,7 +25,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
const char *component_id,
Error **errp) {}
-void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
+void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
+ uint8_t class, uint8_t subclass,
+ uint16_t ld_id, uint8_t head_id,
uint64_t dpa, uint8_t descriptor,
uint8_t type, uint8_t transaction_type,
bool has_channel, uint8_t channel,
@@ -38,7 +42,9 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
Error **errp) {}
void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
- uint8_t flags, uint8_t type,
+ uint32_t flags, uint8_t class,
+ uint8_t subclass, uint16_t ld_id,
+ uint8_t head_id, uint8_t type,
uint8_t health_status,
uint8_t media_status,
uint8_t additional_status,
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 831fdefbac..fc6ec82670 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -827,7 +827,9 @@ bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
uint64_t len);
void cxl_assign_event_header(CXLEventRecordHdr *hdr,
const QemuUUID *uuid, uint32_t flags,
- uint8_t length, uint64_t timestamp);
+ uint8_t length, uint64_t timestamp,
+ uint8_t maint_class, uint8_t maint_subclass,
+ uint16_t ld_id, uint8_t head_id);
bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
uint64_t dpa, uint64_t len);
bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 758b075a64..4d9cfdb621 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -29,9 +29,15 @@ typedef enum CXLEventLogType {
/*
* Common Event Record Format
- * CXL r3.1 section 8.2.9.2.1: Event Records; Table 8-43
+ * CXL r3.2 section 8.2.10.2.1: Event Records; Table 8-55
*/
-#define CXL_EVENT_REC_HDR_RES_LEN 0xf
+#define CXL_EVENT_REC_FLAGS_PERMANENT_COND BIT(2)
+#define CXL_EVENT_REC_FLAGS_MAINT_NEEDED BIT(3)
+#define CXL_EVENT_REC_FLAGS_PERF_DEGRADED BIT(4)
+#define CXL_EVENT_REC_FLAGS_HW_REPLACEMENT_NEEDED BIT(5)
+#define CXL_EVENT_REC_FLAGS_MAINT_OP_SUBCLASS_VALID BIT(6)
+#define CXL_EVENT_REC_FLAGS_LD_ID_VALID BIT(7)
+#define CXL_EVENT_REC_FLAGS_HEAD_ID_VALID BIT(8)
typedef struct CXLEventRecordHdr {
QemuUUID id;
uint8_t length;
@@ -40,7 +46,10 @@ typedef struct CXLEventRecordHdr {
uint16_t related_handle;
uint64_t timestamp;
uint8_t maint_op_class;
- uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
+ uint8_t maint_op_subclass;
+ uint16_t ld_id;
+ uint8_t head_id;
+ uint8_t reserved[0xb];
} QEMU_PACKED CXLEventRecordHdr;
#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 8f2e9237b1..c38585d3c8 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -42,6 +42,18 @@
# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
# Record Format, Event Record Flags for subfield definitions.
#
+# @class: Maintenance operation class the device requests to initiate.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @subclass: Maintenance operation subclass the device requests to
+# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @ld-id: LD ID of LD from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @head-id: ID of the device head from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
# @dpa: Device Physical Address (relative to @path device). Note
# lower bits include some flags. See CXL r3.0 Table 8-43 General
# Media Event Record, Physical Address.
@@ -73,7 +85,9 @@
# Since: 8.1
##
{ 'command': 'cxl-inject-general-media-event',
- 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+ 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
+ 'class':'uint8', 'subclass':'uint8',
+ 'ld-id':'uint16', 'head-id':'uint8',
'dpa': 'uint64', 'descriptor': 'uint8',
'type': 'uint8', 'transaction-type': 'uint8',
'*channel': 'uint8', '*rank': 'uint8',
@@ -93,6 +107,18 @@
# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
# Record Format, Event Record Flags for subfield definitions.
#
+# @class: Maintenance operation class the device requests to initiate.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @subclass: Maintenance operation subclass the device requests to
+# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @ld-id: LD ID of LD from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @head-id: ID of the device head from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
# @dpa: Device Physical Address (relative to @path device). Note
# lower bits include some flags. See CXL r3.0 Table 8-44 DRAM
# Event Record, Physical Address.
@@ -132,7 +158,9 @@
# Since: 8.1
##
{ 'command': 'cxl-inject-dram-event',
- 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+ 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
+ 'class':'uint8', 'subclass':'uint8',
+ 'ld-id':'uint16', 'head-id':'uint8',
'dpa': 'uint64', 'descriptor': 'uint8',
'type': 'uint8', 'transaction-type': 'uint8',
'*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
@@ -154,6 +182,18 @@
# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
# Record Format, Event Record Flags for subfield definitions.
#
+# @class: Maintenance operation class the device requests to initiate.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @subclass: Maintenance operation subclass the device requests to
+# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @ld-id: LD ID of LD from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
+# @head-id: ID of the device head from where the event originated.
+# See CXL r3.2 Table 8-55 Common Event Record Format.
+#
# @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module
# Event Record for bit definitions for bit definiions.
#
@@ -184,7 +224,9 @@
# Since: 8.1
##
{ 'command': 'cxl-inject-memory-module-event',
- 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
+ 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint32',
+ 'class':'uint8', 'subclass':'uint8',
+ 'ld-id':'uint16', 'head-id':'uint8',
'type': 'uint8', 'health-status': 'uint8',
'media-status': 'uint8', 'additional-status': 'uint8',
'life-used': 'uint8', 'temperature' : 'int16',
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format
2025-06-19 15:16 ` [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format shiju.jose--- via
@ 2025-06-20 14:03 ` Jonathan Cameron via
2025-06-24 9:10 ` Shiju Jose via
0 siblings, 1 reply; 14+ messages in thread
From: Jonathan Cameron via @ 2025-06-20 14:03 UTC (permalink / raw)
To: shiju.jose; +Cc: qemu-devel, linux-cxl, tanxiaofei, prime.zeng, linuxarm
On Thu, 19 Jun 2025 16:16:13 +0100
<shiju.jose@huawei.com> wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
>
> CXL spec 3.2 section 8.2.9.2.1 Table 8-55, Common Event Record
> format has updated with Maintenance Operation Subclass, LD ID and
> ID of the device head information.
Hi Shiju,
Wrap a little longer - this is sub 70 and should aim for about 75 for
commit descriptions.
>
> Add updates for the above spec changes in the related CXL events
> reporting and QMP command to inject CXL events.
Main comment in here is we need to keep these new parameters as optional
as we shouldn't go adding required stuff to qapi and perhaps more importantly
they are actually optional for most events.
Jonathan
>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b5482f58a3..0787a9bfca 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1780,12 +1780,18 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
>
> void cxl_assign_event_header(CXLEventRecordHdr *hdr,
> const QemuUUID *uuid, uint32_t flags,
> - uint8_t length, uint64_t timestamp)
> + uint8_t length, uint64_t timestamp,
> + uint8_t maint_class, uint8_t maint_subclass,
> + uint16_t ld_id, uint8_t head_id)
> {
> st24_le_p(&hdr->flags, flags);
> hdr->length = length;
> memcpy(&hdr->id, uuid, sizeof(hdr->id));
> stq_le_p(&hdr->timestamp, timestamp);
> + hdr->maint_op_class = maint_class;
> + hdr->maint_op_subclass = maint_subclass;
> + hdr->ld_id = ld_id;
2 bytes so
stw_le_p(&hdr->ld_id, ld_id);
> + hdr->head_id = head_id;
> }
>
> static const QemuUUID gen_media_uuid = {
> @@ -1825,7 +1831,9 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
> }
> /* Component ID is device specific. Define this as a string. */
> void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> - uint8_t flags, uint64_t dpa,
> + uint32_t flags, uint8_t class,
> + uint8_t subclass, uint16_t ld_id,
> + uint8_t head_id, uint64_t dpa,
> uint8_t descriptor, uint8_t type,
> uint8_t transaction_type,
> bool has_channel, uint8_t channel,
> @@ -1863,7 +1871,8 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
>
> memset(&gem, 0, sizeof(gem));
> cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
> + class, subclass, ld_id, head_id);
>
> stq_le_p(&gem.phys_addr, dpa);
> gem.descriptor = descriptor;
> @@ -1907,7 +1916,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> #define CXL_DRAM_VALID_COLUMN BIT(6)
> #define CXL_DRAM_VALID_CORRECTION_MASK BIT(7)
>
> -void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
> +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
> + uint8_t class, uint8_t subclass,
> + uint16_t ld_id, uint8_t head_id,
> uint64_t dpa, uint8_t descriptor,
> uint8_t type, uint8_t transaction_type,
> bool has_channel, uint8_t channel,
> @@ -1950,7 +1961,8 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
>
> memset(&dram, 0, sizeof(dram));
> cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
> + class, subclass, ld_id, head_id);
> stq_le_p(&dram.phys_addr, dpa);
> dram.descriptor = descriptor;
> dram.type = type;
> @@ -2010,7 +2022,9 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
> }
>
> void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
> - uint8_t flags, uint8_t type,
> + uint32_t flags, uint8_t class,
> + uint8_t subclass, uint16_t ld_id,
> + uint8_t head_id, uint8_t type,
> uint8_t health_status,
> uint8_t media_status,
> uint8_t additional_status,
> @@ -2049,7 +2063,8 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
>
> memset(&module, 0, sizeof(module));
> cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
> + class, subclass, ld_id, head_id);
>
> module.type = type;
> module.health_status = health_status;
> @@ -2284,7 +2299,8 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
> * Event Log.
> */
> cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> - cxl_device_get_timestamp(&dcd->cxl_dstate));
> + cxl_device_get_timestamp(&dcd->cxl_dstate),
> + 0, 0, 0, 0);
As below - we'll need to pass in the validity flags as well for subclass, ld-id and head-id
as we don't really want to do that bit of computing 'flags' at each caller.
>
> dCap.type = type;
> /* FIXME: for now, validity flag is cleared */
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> index c1a5e4a7c1..263d8b4609 100644
> --- a/hw/mem/cxl_type3_stubs.c
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -14,7 +14,9 @@
> #include "qapi/qapi-commands-cxl.h"
>
> void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> - uint8_t flags, uint64_t dpa,
> + uint32_t flags, uint8_t class,
> + uint8_t subclass, uint16_t ld_id,
> + uint8_t head_id, uint64_t dpa,
> uint8_t descriptor, uint8_t type,
> uint8_t transaction_type,
> bool has_channel, uint8_t channel,
> @@ -23,7 +25,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> const char *component_id,
> Error **errp) {}
>
> -void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
> +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
> + uint8_t class, uint8_t subclass,
> + uint16_t ld_id, uint8_t head_id,
> uint64_t dpa, uint8_t descriptor,
> uint8_t type, uint8_t transaction_type,
> bool has_channel, uint8_t channel,
> @@ -38,7 +42,9 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
> Error **errp) {}
>
> void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
> - uint8_t flags, uint8_t type,
> + uint32_t flags, uint8_t class,
> + uint8_t subclass, uint16_t ld_id,
> + uint8_t head_id, uint8_t type,
> uint8_t health_status,
> uint8_t media_status,
> uint8_t additional_status,
With suggestion below, this will gain bool has_xxx parameters like
qmp_cxl_inject_general_media already has.
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 831fdefbac..fc6ec82670 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -827,7 +827,9 @@ bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> uint64_t len);
> void cxl_assign_event_header(CXLEventRecordHdr *hdr,
> const QemuUUID *uuid, uint32_t flags,
> - uint8_t length, uint64_t timestamp);
> + uint8_t length, uint64_t timestamp,
> + uint8_t maint_class, uint8_t maint_subclass,
> + uint16_t ld_id, uint8_t head_id);
I think we need to add bools for the presence of maint_subclass, ld_id and head_id
or we have to modify flags at each caller which is rather ugly.
It's fine to just fill maint_class in with 0 if not set though as that has
no valid bit.
> bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
> uint64_t dpa, uint64_t len);
> bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
> index 758b075a64..4d9cfdb621 100644
> --- a/include/hw/cxl/cxl_events.h
> +++ b/include/hw/cxl/cxl_events.h
> @@ -29,9 +29,15 @@ typedef enum CXLEventLogType {
>
> /*
> * Common Event Record Format
> - * CXL r3.1 section 8.2.9.2.1: Event Records; Table 8-43
> + * CXL r3.2 section 8.2.10.2.1: Event Records; Table 8-55
> */
> -#define CXL_EVENT_REC_HDR_RES_LEN 0xf
> +#define CXL_EVENT_REC_FLAGS_PERMANENT_COND BIT(2)
> +#define CXL_EVENT_REC_FLAGS_MAINT_NEEDED BIT(3)
> +#define CXL_EVENT_REC_FLAGS_PERF_DEGRADED BIT(4)
> +#define CXL_EVENT_REC_FLAGS_HW_REPLACEMENT_NEEDED BIT(5)
> +#define CXL_EVENT_REC_FLAGS_MAINT_OP_SUBCLASS_VALID BIT(6)
> +#define CXL_EVENT_REC_FLAGS_LD_ID_VALID BIT(7)
> +#define CXL_EVENT_REC_FLAGS_HEAD_ID_VALID BIT(8)
As below - here we have the 3 valid bits.
> typedef struct CXLEventRecordHdr {
> QemuUUID id;
> uint8_t length;
> @@ -40,7 +46,10 @@ typedef struct CXLEventRecordHdr {
> uint16_t related_handle;
> uint64_t timestamp;
> uint8_t maint_op_class;
> - uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
> + uint8_t maint_op_subclass;
> + uint16_t ld_id;
> + uint8_t head_id;
> + uint8_t reserved[0xb];
> } QEMU_PACKED CXLEventRecordHdr;
>
> #define CXL_EVENT_RECORD_DATA_LENGTH 0x50
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 8f2e9237b1..c38585d3c8 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -42,6 +42,18 @@
> # @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
> # Record Format, Event Record Flags for subfield definitions.
> #
> +# @class: Maintenance operation class the device requests to initiate.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @subclass: Maintenance operation subclass the device requests to
> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @ld-id: LD ID of LD from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @head-id: ID of the device head from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> # @dpa: Device Physical Address (relative to @path device). Note
> # lower bits include some flags. See CXL r3.0 Table 8-43 General
> # Media Event Record, Physical Address.
> @@ -73,7 +85,9 @@
> # Since: 8.1
> ##
> { 'command': 'cxl-inject-general-media-event',
> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
> + 'class':'uint8', 'subclass':'uint8',
> + 'ld-id':'uint16', 'head-id':'uint8',
This is a bit of a problem as we should maintain backwards compatibility.
ld-id, head-id etc are optional anyway so should be "*ld-id" and we should
then check has_ld_id and set the valid bits appropriately.
For class and subclass we should make them optional in this interface
and follow the guidance to set them to 0 if they aren't explicitly set.
Actually on closer inspection subclass is also optional and has a valid flag
so the only one we are making optional in this interface that isn't in the
spec is class.
Also good to use fuller name to make it clear these are about maintenance
classes. Maybe maint-op-class and maint-op-subclass
Similar comments apply to the other cases.
> 'dpa': 'uint64', 'descriptor': 'uint8',
> 'type': 'uint8', 'transaction-type': 'uint8',
> '*channel': 'uint8', '*rank': 'uint8',
> @@ -93,6 +107,18 @@
> # @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
> # Record Format, Event Record Flags for subfield definitions.
> #
> +# @class: Maintenance operation class the device requests to initiate.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @subclass: Maintenance operation subclass the device requests to
> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @ld-id: LD ID of LD from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @head-id: ID of the device head from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> # @dpa: Device Physical Address (relative to @path device). Note
> # lower bits include some flags. See CXL r3.0 Table 8-44 DRAM
> # Event Record, Physical Address.
> @@ -132,7 +158,9 @@
> # Since: 8.1
> ##
> { 'command': 'cxl-inject-dram-event',
> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
> + 'class':'uint8', 'subclass':'uint8',
> + 'ld-id':'uint16', 'head-id':'uint8',
> 'dpa': 'uint64', 'descriptor': 'uint8',
> 'type': 'uint8', 'transaction-type': 'uint8',
> '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
> @@ -154,6 +182,18 @@
> # @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
> # Record Format, Event Record Flags for subfield definitions.
> #
> +# @class: Maintenance operation class the device requests to initiate.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @subclass: Maintenance operation subclass the device requests to
> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @ld-id: LD ID of LD from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> +# @head-id: ID of the device head from where the event originated.
> +# See CXL r3.2 Table 8-55 Common Event Record Format.
> +#
> # @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module
> # Event Record for bit definitions for bit definiions.
> #
> @@ -184,7 +224,9 @@
> # Since: 8.1
> ##
> { 'command': 'cxl-inject-memory-module-event',
> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint32',
> + 'class':'uint8', 'subclass':'uint8',
> + 'ld-id':'uint16', 'head-id':'uint8',
> 'type': 'uint8', 'health-status': 'uint8',
> 'media-status': 'uint8', 'additional-status': 'uint8',
> 'life-used': 'uint8', 'temperature' : 'int16',
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format
2025-06-20 14:03 ` Jonathan Cameron via
@ 2025-06-24 9:10 ` Shiju Jose via
0 siblings, 0 replies; 14+ messages in thread
From: Shiju Jose via @ 2025-06-24 9:10 UTC (permalink / raw)
To: Jonathan Cameron
Cc: qemu-devel@nongnu.org, linux-cxl@vger.kernel.org, tanxiaofei,
Zengtao (B), Linuxarm
>-----Original Message-----
>From: Jonathan Cameron <jonathan.cameron@huawei.com>
>Sent: 20 June 2025 15:04
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: qemu-devel@nongnu.org; linux-cxl@vger.kernel.org; tanxiaofei
><tanxiaofei@huawei.com>; Zengtao (B) <prime.zeng@hisilicon.com>; Linuxarm
><linuxarm@huawei.com>
>Subject: Re: [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event
>record format
>
>On Thu, 19 Jun 2025 16:16:13 +0100
><shiju.jose@huawei.com> wrote:
>
>> From: Shiju Jose <shiju.jose@huawei.com>
>>
>> CXL spec 3.2 section 8.2.9.2.1 Table 8-55, Common Event Record format
>> has updated with Maintenance Operation Subclass, LD ID and ID of the
>> device head information.
>Hi Shiju,
>
>Wrap a little longer - this is sub 70 and should aim for about 75 for commit
>descriptions.
>
>>
>> Add updates for the above spec changes in the related CXL events
>> reporting and QMP command to inject CXL events.
>
>Main comment in here is we need to keep these new parameters as optional as
>we shouldn't go adding required stuff to qapi and perhaps more importantly
>they are actually optional for most events.
>
Hi Jonathan,
Thanks for the feedback.
I will change these parameters as optional in qapi as you suggested.
Thanks,
Shiju
>Jonathan
>
>>
>> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
>
>> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index
>> b5482f58a3..0787a9bfca 100644
>> --- a/hw/mem/cxl_type3.c
>> +++ b/hw/mem/cxl_type3.c
>> @@ -1780,12 +1780,18 @@ void qmp_cxl_inject_correctable_error(const
>> char *path, CxlCorErrorType type,
>>
>> void cxl_assign_event_header(CXLEventRecordHdr *hdr,
>> const QemuUUID *uuid, uint32_t flags,
>> - uint8_t length, uint64_t timestamp)
>> + uint8_t length, uint64_t timestamp,
>> + uint8_t maint_class, uint8_t maint_subclass,
>> + uint16_t ld_id, uint8_t head_id)
>> {
>> st24_le_p(&hdr->flags, flags);
>> hdr->length = length;
>> memcpy(&hdr->id, uuid, sizeof(hdr->id));
>> stq_le_p(&hdr->timestamp, timestamp);
>> + hdr->maint_op_class = maint_class;
>> + hdr->maint_op_subclass = maint_subclass;
>> + hdr->ld_id = ld_id;
>2 bytes so
>
> stw_le_p(&hdr->ld_id, ld_id);
>
>> + hdr->head_id = head_id;
>> }
>>
>> static const QemuUUID gen_media_uuid = { @@ -1825,7 +1831,9 @@ static
>> int ct3d_qmp_cxl_event_log_enc(CxlEventLog log) }
>> /* Component ID is device specific. Define this as a string. */
>> void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
>> - uint8_t flags, uint64_t dpa,
>> + uint32_t flags, uint8_t class,
>> + uint8_t subclass, uint16_t ld_id,
>> + uint8_t head_id, uint64_t
>> + dpa,
>> uint8_t descriptor, uint8_t type,
>> uint8_t transaction_type,
>> bool has_channel, uint8_t
>> channel, @@ -1863,7 +1871,8 @@ void
>> qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
>>
>> memset(&gem, 0, sizeof(gem));
>> cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
>> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
>> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
>> + class, subclass, ld_id, head_id);
>>
>> stq_le_p(&gem.phys_addr, dpa);
>> gem.descriptor = descriptor;
>> @@ -1907,7 +1916,9 @@ void qmp_cxl_inject_general_media_event(const
>char *path, CxlEventLog log,
>> #define CXL_DRAM_VALID_COLUMN BIT(6)
>> #define CXL_DRAM_VALID_CORRECTION_MASK BIT(7)
>>
>> -void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log,
>> uint8_t flags,
>> +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t
>flags,
>> + uint8_t class, uint8_t subclass,
>> + uint16_t ld_id, uint8_t head_id,
>> uint64_t dpa, uint8_t descriptor,
>> uint8_t type, uint8_t transaction_type,
>> bool has_channel, uint8_t channel, @@
>> -1950,7 +1961,8 @@ void qmp_cxl_inject_dram_event(const char *path,
>> CxlEventLog log, uint8_t flags,
>>
>> memset(&dram, 0, sizeof(dram));
>> cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
>> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
>> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
>> + class, subclass, ld_id, head_id);
>> stq_le_p(&dram.phys_addr, dpa);
>> dram.descriptor = descriptor;
>> dram.type = type;
>> @@ -2010,7 +2022,9 @@ void qmp_cxl_inject_dram_event(const char *path,
>> CxlEventLog log, uint8_t flags, }
>>
>> void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog
>log,
>> - uint8_t flags, uint8_t type,
>> + uint32_t flags, uint8_t class,
>> + uint8_t subclass, uint16_t ld_id,
>> + uint8_t head_id, uint8_t
>> + type,
>> uint8_t health_status,
>> uint8_t media_status,
>> uint8_t additional_status, @@
>> -2049,7 +2063,8 @@ void qmp_cxl_inject_memory_module_event(const char
>> *path, CxlEventLog log,
>>
>> memset(&module, 0, sizeof(module));
>> cxl_assign_event_header(hdr, &memory_module_uuid, flags,
>sizeof(module),
>> - cxl_device_get_timestamp(&ct3d->cxl_dstate));
>> + cxl_device_get_timestamp(&ct3d->cxl_dstate),
>> + class, subclass, ld_id, head_id);
>>
>> module.type = type;
>> module.health_status = health_status; @@ -2284,7 +2299,8 @@
>> static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
>> * Event Log.
>> */
>> cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
>> - cxl_device_get_timestamp(&dcd->cxl_dstate));
>> + cxl_device_get_timestamp(&dcd->cxl_dstate),
>> + 0, 0, 0, 0);
>As below - we'll need to pass in the validity flags as well for subclass, ld-id and
>head-id as we don't really want to do that bit of computing 'flags' at each caller.
>
>>
>> dCap.type = type;
>> /* FIXME: for now, validity flag is cleared */ diff --git
>> a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index
>> c1a5e4a7c1..263d8b4609 100644
>> --- a/hw/mem/cxl_type3_stubs.c
>> +++ b/hw/mem/cxl_type3_stubs.c
>> @@ -14,7 +14,9 @@
>> #include "qapi/qapi-commands-cxl.h"
>>
>> void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
>> - uint8_t flags, uint64_t dpa,
>> + uint32_t flags, uint8_t class,
>> + uint8_t subclass, uint16_t ld_id,
>> + uint8_t head_id, uint64_t
>> + dpa,
>> uint8_t descriptor, uint8_t type,
>> uint8_t transaction_type,
>> bool has_channel, uint8_t
>> channel, @@ -23,7 +25,9 @@ void
>qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
>> const char *component_id,
>> Error **errp) {}
>>
>> -void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log,
>> uint8_t flags,
>> +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t
>flags,
>> + uint8_t class, uint8_t subclass,
>> + uint16_t ld_id, uint8_t head_id,
>> uint64_t dpa, uint8_t descriptor,
>> uint8_t type, uint8_t transaction_type,
>> bool has_channel, uint8_t channel, @@
>> -38,7 +42,9 @@ void qmp_cxl_inject_dram_event(const char *path,
>CxlEventLog log, uint8_t flags,
>> Error **errp) {}
>>
>> void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog
>log,
>> - uint8_t flags, uint8_t type,
>> + uint32_t flags, uint8_t class,
>> + uint8_t subclass, uint16_t ld_id,
>> + uint8_t head_id, uint8_t
>> + type,
>> uint8_t health_status,
>> uint8_t media_status,
>> uint8_t additional_status,
>With suggestion below, this will gain bool has_xxx parameters like
>qmp_cxl_inject_general_media already has.
>
>> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
>> index 831fdefbac..fc6ec82670 100644
>> --- a/include/hw/cxl/cxl_device.h
>> +++ b/include/hw/cxl/cxl_device.h
>> @@ -827,7 +827,9 @@ bool ct3_test_region_block_backed(CXLType3Dev
>*ct3d, uint64_t dpa,
>> uint64_t len); void
>> cxl_assign_event_header(CXLEventRecordHdr *hdr,
>> const QemuUUID *uuid, uint32_t flags,
>> - uint8_t length, uint64_t timestamp);
>> + uint8_t length, uint64_t timestamp,
>> + uint8_t maint_class, uint8_t maint_subclass,
>> + uint16_t ld_id, uint8_t head_id);
>
>I think we need to add bools for the presence of maint_subclass, ld_id and
>head_id or we have to modify flags at each caller which is rather ugly.
>It's fine to just fill maint_class in with 0 if not set though as that has no valid bit.
>
>> bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
>> uint64_t dpa, uint64_t len);
>> bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
>> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
>> index 758b075a64..4d9cfdb621 100644
>> --- a/include/hw/cxl/cxl_events.h
>> +++ b/include/hw/cxl/cxl_events.h
>> @@ -29,9 +29,15 @@ typedef enum CXLEventLogType {
>>
>> /*
>> * Common Event Record Format
>> - * CXL r3.1 section 8.2.9.2.1: Event Records; Table 8-43
>> + * CXL r3.2 section 8.2.10.2.1: Event Records; Table 8-55
>> */
>> -#define CXL_EVENT_REC_HDR_RES_LEN 0xf
>> +#define CXL_EVENT_REC_FLAGS_PERMANENT_COND BIT(2)
>> +#define CXL_EVENT_REC_FLAGS_MAINT_NEEDED BIT(3)
>> +#define CXL_EVENT_REC_FLAGS_PERF_DEGRADED BIT(4) #define
>> +CXL_EVENT_REC_FLAGS_HW_REPLACEMENT_NEEDED BIT(5) #define
>> +CXL_EVENT_REC_FLAGS_MAINT_OP_SUBCLASS_VALID BIT(6) #define
>> +CXL_EVENT_REC_FLAGS_LD_ID_VALID BIT(7) #define
>> +CXL_EVENT_REC_FLAGS_HEAD_ID_VALID BIT(8)
>As below - here we have the 3 valid bits.
>
>> typedef struct CXLEventRecordHdr {
>> QemuUUID id;
>> uint8_t length;
>> @@ -40,7 +46,10 @@ typedef struct CXLEventRecordHdr {
>> uint16_t related_handle;
>> uint64_t timestamp;
>> uint8_t maint_op_class;
>> - uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
>> + uint8_t maint_op_subclass;
>> + uint16_t ld_id;
>> + uint8_t head_id;
>> + uint8_t reserved[0xb];
>> } QEMU_PACKED CXLEventRecordHdr;
>>
>> #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 diff --git a/qapi/cxl.json
>> b/qapi/cxl.json index 8f2e9237b1..c38585d3c8 100644
>> --- a/qapi/cxl.json
>> +++ b/qapi/cxl.json
>> @@ -42,6 +42,18 @@
>> # @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event
>> # Record Format, Event Record Flags for subfield definitions.
>> #
>> +# @class: Maintenance operation class the device requests to initiate.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @subclass: Maintenance operation subclass the device requests to
>> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @ld-id: LD ID of LD from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @head-id: ID of the device head from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> # @dpa: Device Physical Address (relative to @path device). Note
>> # lower bits include some flags. See CXL r3.0 Table 8-43 General
>> # Media Event Record, Physical Address.
>> @@ -73,7 +85,9 @@
>> # Since: 8.1
>> ##
>> { 'command': 'cxl-inject-general-media-event',
>> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
>> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
>> + 'class':'uint8', 'subclass':'uint8',
>> + 'ld-id':'uint16', 'head-id':'uint8',
>
>This is a bit of a problem as we should maintain backwards compatibility.
>
>ld-id, head-id etc are optional anyway so should be "*ld-id" and we should then
>check has_ld_id and set the valid bits appropriately.
>
>For class and subclass we should make them optional in this interface and follow
>the guidance to set them to 0 if they aren't explicitly set.
>Actually on closer inspection subclass is also optional and has a valid flag so the
>only one we are making optional in this interface that isn't in the spec is class.
>
>Also good to use fuller name to make it clear these are about maintenance
>classes. Maybe maint-op-class and maint-op-subclass
>
>Similar comments apply to the other cases.
>
>
>> 'dpa': 'uint64', 'descriptor': 'uint8',
>> 'type': 'uint8', 'transaction-type': 'uint8',
>> '*channel': 'uint8', '*rank': 'uint8', @@ -93,6 +107,18
>> @@ # @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common
>> Event
>> # Record Format, Event Record Flags for subfield definitions.
>> #
>> +# @class: Maintenance operation class the device requests to initiate.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @subclass: Maintenance operation subclass the device requests to
>> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @ld-id: LD ID of LD from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @head-id: ID of the device head from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> # @dpa: Device Physical Address (relative to @path device). Note
>> # lower bits include some flags. See CXL r3.0 Table 8-44 DRAM
>> # Event Record, Physical Address.
>> @@ -132,7 +158,9 @@
>> # Since: 8.1
>> ##
>> { 'command': 'cxl-inject-dram-event',
>> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
>> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint32',
>> + 'class':'uint8', 'subclass':'uint8',
>> + 'ld-id':'uint16', 'head-id':'uint8',
>
>> 'dpa': 'uint64', 'descriptor': 'uint8',
>> 'type': 'uint8', 'transaction-type': 'uint8',
>> '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask':
>> 'uint32', @@ -154,6 +182,18 @@ # @flags: Event Record Flags. See CXL
>> r3.0 Table 8-42 Common Event
>> # Record Format, Event Record Flags for subfield definitions.
>> #
>> +# @class: Maintenance operation class the device requests to initiate.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @subclass: Maintenance operation subclass the device requests to
>> +# initiate. See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @ld-id: LD ID of LD from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> +# @head-id: ID of the device head from where the event originated.
>> +# See CXL r3.2 Table 8-55 Common Event Record Format.
>> +#
>> # @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module
>> # Event Record for bit definitions for bit definiions.
>> #
>> @@ -184,7 +224,9 @@
>> # Since: 8.1
>> ##
>> { 'command': 'cxl-inject-memory-module-event',
>> - 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
>> + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint32',
>> + 'class':'uint8', 'subclass':'uint8',
>> + 'ld-id':'uint16', 'head-id':'uint8',
>> 'type': 'uint8', 'health-status': 'uint8',
>> 'media-status': 'uint8', 'additional-status': 'uint8',
>> 'life-used': 'uint8', 'temperature' : 'int16',
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 2/7] hw/cxl/events: Updates for rev3.2 general media event record
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-20 14:13 ` Jonathan Cameron via
2025-06-19 15:16 ` [PATCH v2 3/7] hw/cxl/events: Updates for rev3.2 DRAM " shiju.jose--- via
` (4 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
CXL spec rev3.2 section 8.2.10.2.1.1 Table 8-57, general media event
table has updated with following new fields.
1. Advanced Programmable Corrected Memory Error Threshold Event Flags
2. Corrected Memory Error Count at Event
3. Memory Event Sub-Type
Add updates for the above spec changes in the CXL general media event
reporting and QMP command to inject general media event.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/mem/cxl_type3.c | 9 +++++++++
hw/mem/cxl_type3_stubs.c | 3 +++
include/hw/cxl/cxl_events.h | 7 +++++--
qapi/cxl.json | 14 +++++++++++++-
4 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0787a9bfca..410ff445d0 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1813,6 +1813,7 @@ static const QemuUUID memory_module_uuid = {
#define CXL_GMER_VALID_RANK BIT(1)
#define CXL_GMER_VALID_DEVICE BIT(2)
#define CXL_GMER_VALID_COMPONENT BIT(3)
+#define CXL_GMER_VALID_COMPONENT_ID_FORMAT BIT(4)
static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
{
@@ -1840,6 +1841,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
bool has_rank, uint8_t rank,
bool has_device, uint32_t device,
const char *component_id,
+ uint8_t cme_ev_flags,
+ uint32_t cme_count,
+ uint8_t sub_type,
Error **errp)
{
Object *obj = object_resolve_path(path, NULL);
@@ -1898,10 +1902,15 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
strncpy((char *)gem.component_id, component_id,
sizeof(gem.component_id) - 1);
valid_flags |= CXL_GMER_VALID_COMPONENT;
+ valid_flags |= CXL_GMER_VALID_COMPONENT_ID_FORMAT;
}
stw_le_p(&gem.validity_flags, valid_flags);
+ gem.cme_ev_flags = cme_ev_flags;
+ st24_le_p(gem.cme_count, cme_count);
+ gem.sub_type = sub_type;
+
if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
cxl_event_irq_assert(ct3d);
}
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index 263d8b4609..b2a11bded8 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -23,6 +23,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
bool has_rank, uint8_t rank,
bool has_device, uint32_t device,
const char *component_id,
+ uint8_t cme_ev_flags,
+ uint32_t cme_count,
+ uint8_t sub_type,
Error **errp) {}
void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 4d9cfdb621..352f9891bd 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -115,10 +115,10 @@ typedef struct CXLEventInterruptPolicy {
/*
* General Media Event Record
- * CXL r3.1 Section 8.2.9.2.1.1; Table 8-45
+ * CXL r3.2 Section 8.2.10.2.1.1; Table 8-57
*/
#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
-#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e
+#define CXL_EVENT_GEN_MED_RES_SIZE 0x29
typedef struct CXLEventGenMedia {
CXLEventRecordHdr hdr;
uint64_t phys_addr;
@@ -130,6 +130,9 @@ typedef struct CXLEventGenMedia {
uint8_t rank;
uint8_t device[3];
uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ uint8_t cme_ev_flags;
+ uint8_t cme_count[3];
+ uint8_t sub_type;
uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
} QEMU_PACKED CXLEventGenMedia;
diff --git a/qapi/cxl.json b/qapi/cxl.json
index c38585d3c8..dd01d50c25 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -82,6 +82,16 @@
# @component-id: Device specific component identifier for the event.
# May describe a field replaceable sub-component of the device.
#
+# @cme-ev-flags: Advanced programmable corrected memory error
+# threshold event flags.
+# See CXL r3.2 Table 8-57 General Media Event Record.
+#
+# @cme-count: Corrected memory error count at event.
+# See CXL r3.2 Table 8-57 General Media Event Record.
+#
+# @sub-type: Memory event sub-type.
+# See CXL r3.2 Table 8-57 General Media Event Record.
+#
# Since: 8.1
##
{ 'command': 'cxl-inject-general-media-event',
@@ -91,7 +101,9 @@
'dpa': 'uint64', 'descriptor': 'uint8',
'type': 'uint8', 'transaction-type': 'uint8',
'*channel': 'uint8', '*rank': 'uint8',
- '*device': 'uint32', '*component-id': 'str' } }
+ '*device': 'uint32', '*component-id': 'str',
+ 'cme-ev-flags':'uint8', 'cme-count':'uint32',
+ 'sub-type':'uint8' } }
##
# @cxl-inject-dram-event:
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 2/7] hw/cxl/events: Updates for rev3.2 general media event record
2025-06-19 15:16 ` [PATCH v2 2/7] hw/cxl/events: Updates for rev3.2 general media event record shiju.jose--- via
@ 2025-06-20 14:13 ` Jonathan Cameron via
0 siblings, 0 replies; 14+ messages in thread
From: Jonathan Cameron via @ 2025-06-20 14:13 UTC (permalink / raw)
To: shiju.jose; +Cc: qemu-devel, linux-cxl, tanxiaofei, prime.zeng, linuxarm
On Thu, 19 Jun 2025 16:16:14 +0100
<shiju.jose@huawei.com> wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
>
> CXL spec rev3.2 section 8.2.10.2.1.1 Table 8-57, general media event
> table has updated with following new fields.
> 1. Advanced Programmable Corrected Memory Error Threshold Event Flags
> 2. Corrected Memory Error Count at Event
> 3. Memory Event Sub-Type
>
> Add updates for the above spec changes in the CXL general media event
> reporting and QMP command to inject general media event.
>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
This has the same constraints on not changing the qemu api stuff
in a non backwards compatible way.
So make them optional parameters and fill them in with what
we'd get if the device didn't support cme. These only really
make sense if we have memory event type 5 anyway:
Advanced Programmable Corrected Memory Counter Expiration
We don't need to enforce that in the interface though.
I don't mind if we do the incompatible 'counter expiry'
and a count of 0 for a type 5 event. We could check for that
specific condition given the spec calls it out and reject the
injection but perhaps we are better leaving it fully flexible
so we can poke the OS with that invalid state. Any real
test paths will set it appropriately so the counter is non 0
when we signal the event.
> ---
> hw/mem/cxl_type3.c | 9 +++++++++
> hw/mem/cxl_type3_stubs.c | 3 +++
> include/hw/cxl/cxl_events.h | 7 +++++--
> qapi/cxl.json | 14 +++++++++++++-
> 4 files changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 0787a9bfca..410ff445d0 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1813,6 +1813,7 @@ static const QemuUUID memory_module_uuid = {
> #define CXL_GMER_VALID_RANK BIT(1)
> #define CXL_GMER_VALID_DEVICE BIT(2)
> #define CXL_GMER_VALID_COMPONENT BIT(3)
> +#define CXL_GMER_VALID_COMPONENT_ID_FORMAT BIT(4)
>
> static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
> {
> @@ -1840,6 +1841,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> bool has_rank, uint8_t rank,
> bool has_device, uint32_t device,
> const char *component_id,
> + uint8_t cme_ev_flags,
> + uint32_t cme_count,
> + uint8_t sub_type,
> Error **errp)
> {
> Object *obj = object_resolve_path(path, NULL);
> @@ -1898,10 +1902,15 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> strncpy((char *)gem.component_id, component_id,
> sizeof(gem.component_id) - 1);
> valid_flags |= CXL_GMER_VALID_COMPONENT;
> + valid_flags |= CXL_GMER_VALID_COMPONENT_ID_FORMAT;
> }
>
> stw_le_p(&gem.validity_flags, valid_flags);
>
> + gem.cme_ev_flags = cme_ev_flags;
> + st24_le_p(gem.cme_count, cme_count);
> + gem.sub_type = sub_type;
> +
> if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
> cxl_event_irq_assert(ct3d);
> }
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> index 263d8b4609..b2a11bded8 100644
> --- a/hw/mem/cxl_type3_stubs.c
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -23,6 +23,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
> bool has_rank, uint8_t rank,
> bool has_device, uint32_t device,
> const char *component_id,
> + uint8_t cme_ev_flags,
> + uint32_t cme_count,
> + uint8_t sub_type,
> Error **errp) {}
>
> void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
> index 4d9cfdb621..352f9891bd 100644
> --- a/include/hw/cxl/cxl_events.h
> +++ b/include/hw/cxl/cxl_events.h
> @@ -115,10 +115,10 @@ typedef struct CXLEventInterruptPolicy {
>
> /*
> * General Media Event Record
> - * CXL r3.1 Section 8.2.9.2.1.1; Table 8-45
> + * CXL r3.2 Section 8.2.10.2.1.1; Table 8-57
> */
> #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
> -#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e
> +#define CXL_EVENT_GEN_MED_RES_SIZE 0x29
> typedef struct CXLEventGenMedia {
> CXLEventRecordHdr hdr;
> uint64_t phys_addr;
> @@ -130,6 +130,9 @@ typedef struct CXLEventGenMedia {
> uint8_t rank;
> uint8_t device[3];
> uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
> + uint8_t cme_ev_flags;
> + uint8_t cme_count[3];
> + uint8_t sub_type;
> uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
> } QEMU_PACKED CXLEventGenMedia;
>
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index c38585d3c8..dd01d50c25 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -82,6 +82,16 @@
> # @component-id: Device specific component identifier for the event.
> # May describe a field replaceable sub-component of the device.
> #
> +# @cme-ev-flags: Advanced programmable corrected memory error
> +# threshold event flags.
> +# See CXL r3.2 Table 8-57 General Media Event Record.
> +#
> +# @cme-count: Corrected memory error count at event.
> +# See CXL r3.2 Table 8-57 General Media Event Record.
> +#
> +# @sub-type: Memory event sub-type.
> +# See CXL r3.2 Table 8-57 General Media Event Record.
> +#
> # Since: 8.1
> ##
> { 'command': 'cxl-inject-general-media-event',
> @@ -91,7 +101,9 @@
> 'dpa': 'uint64', 'descriptor': 'uint8',
> 'type': 'uint8', 'transaction-type': 'uint8',
> '*channel': 'uint8', '*rank': 'uint8',
> - '*device': 'uint32', '*component-id': 'str' } }
> + '*device': 'uint32', '*component-id': 'str',
> + 'cme-ev-flags':'uint8', 'cme-count':'uint32',
> + 'sub-type':'uint8' } }
>
> ##
> # @cxl-inject-dram-event:
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 3/7] hw/cxl/events: Updates for rev3.2 DRAM event record
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 1/7] hw/cxl/events: Update for rev3.2 common event record format shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 2/7] hw/cxl/events: Updates for rev3.2 general media event record shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 4/7] hw/cxl/events: Updates for rev3.2 memory module " shiju.jose--- via
` (3 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
CXL spec rev3.2 section 8.2.10.2.1.2 Table 8-58, DRAM event record
has updated with following new fields.
1. Component Identifier
2. Sub-channel of the memory event location
3. Advanced Programmable Corrected Memory Error Threshold Event Flags
4. Corrected Volatile Memory Error Count at Event
5. Memory Event Sub-Type
Add updates for the above spec changes in the CXL DRAM event
reporting and QMP command to inject DRAM event.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/mem/cxl_type3.c | 24 ++++++++++++++++++++++++
hw/mem/cxl_type3_stubs.c | 5 +++++
include/hw/cxl/cxl_events.h | 9 +++++++--
qapi/cxl.json | 22 +++++++++++++++++++++-
4 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 410ff445d0..b99a100fe0 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1924,6 +1924,9 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
#define CXL_DRAM_VALID_ROW BIT(5)
#define CXL_DRAM_VALID_COLUMN BIT(6)
#define CXL_DRAM_VALID_CORRECTION_MASK BIT(7)
+#define CXL_DRAM_VALID_COMPONENT BIT(8)
+#define CXL_DRAM_VALID_COMPONENT_ID_FORMAT BIT(9)
+#define CXL_DRAM_VALID_SUB_CHANNEL BIT(10)
void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags,
uint8_t class, uint8_t subclass,
@@ -1939,6 +1942,11 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
bool has_column, uint16_t column,
bool has_correction_mask,
uint64List *correction_mask,
+ const char *component_id,
+ bool has_sub_channel, uint8_t sub_channel,
+ uint8_t cme_ev_flags,
+ uint32_t cvme_count,
+ uint8_t sub_type,
Error **errp)
{
Object *obj = object_resolve_path(path, NULL);
@@ -2023,6 +2031,22 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
}
+ if (component_id) {
+ strncpy((char *)dram.component_id, component_id,
+ sizeof(dram.component_id) - 1);
+ valid_flags |= CXL_DRAM_VALID_COMPONENT;
+ valid_flags |= CXL_DRAM_VALID_COMPONENT_ID_FORMAT;
+ }
+
+ if (has_sub_channel) {
+ dram.sub_channel = sub_channel;
+ valid_flags |= CXL_DRAM_VALID_SUB_CHANNEL;
+ }
+
+ dram.cme_ev_flags = cme_ev_flags;
+ st24_le_p(dram.cvme_count, cvme_count);
+ dram.sub_type = sub_type;
+
stw_le_p(&dram.validity_flags, valid_flags);
if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index b2a11bded8..26161c086a 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -42,6 +42,11 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
bool has_column, uint16_t column,
bool has_correction_mask,
uint64List *correction_mask,
+ const char *component_id,
+ bool has_sub_channel, uint8_t sub_channel,
+ uint8_t cme_ev_flags,
+ uint32_t cvme_count,
+ uint8_t sub_type,
Error **errp) {}
void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 352f9891bd..a3c5f2ec20 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -138,7 +138,7 @@ typedef struct CXLEventGenMedia {
/*
* DRAM Event Record
- * CXL r3.1 Section 8.2.9.2.1.2: Table 8-46
+ * CXL r3.2 Section 8.2.10.2.1.2: Table 8-58
* All fields little endian.
*/
typedef struct CXLEventDram {
@@ -156,7 +156,12 @@ typedef struct CXLEventDram {
uint8_t row[3];
uint16_t column;
uint64_t correction_mask[4];
- uint8_t reserved[0x17];
+ uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ uint8_t sub_channel;
+ uint8_t cme_ev_flags;
+ uint8_t cvme_count[3];
+ uint8_t sub_type;
+ uint8_t reserved;
} QEMU_PACKED CXLEventDram;
/*
diff --git a/qapi/cxl.json b/qapi/cxl.json
index dd01d50c25..0ba3a8e0f3 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -167,6 +167,23 @@
# @correction-mask: Bits within each nibble. Used in order of bits
# set in the nibble-mask. Up to 4 nibbles may be covered.
#
+# @component-id: Device specific component identifier for the event.
+# May describe a field replaceable sub-component of the device.
+# See CXL r3.2 Table 8-58 DRAM Event Record.
+#
+# @sub-channel: The sub-channel of the memory event location.
+# See CXL r3.2 Table 8-58 DRAM Event Record.
+#
+# @cme-ev-flags: Advanced programmable corrected memory error
+# threshold event flags.
+# See CXL r3.2 Table 8-58 DRAM Event Record.
+#
+# @cvme-count: Corrected volatile memory error count at event.
+# See CXL r3.2 Table 8-58 DRAM Event Record.
+#
+# @sub-type: Memory event sub-type.
+# See CXL r3.2 Table 8-58 DRAM Event Record.
+#
# Since: 8.1
##
{ 'command': 'cxl-inject-dram-event',
@@ -177,7 +194,10 @@
'type': 'uint8', 'transaction-type': 'uint8',
'*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
'*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32',
- '*column': 'uint16', '*correction-mask': [ 'uint64' ]
+ '*column': 'uint16', '*correction-mask': [ 'uint64' ],
+ '*component-id': 'str', '*sub-channel':'uint8',
+ 'cme-ev-flags':'uint8', 'cvme-count':'uint32',
+ 'sub-type':'uint8'
}}
##
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 4/7] hw/cxl/events: Updates for rev3.2 memory module event record
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
` (2 preceding siblings ...)
2025-06-19 15:16 ` [PATCH v2 3/7] hw/cxl/events: Updates for rev3.2 DRAM " shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 5/7] hw/cxl/cxl-mailbox-utils: Move declaration of scrub and ECS feature attributes in cmd_features_set_feature() shiju.jose--- via
` (2 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
CXL spec rev3.2 section 8.2.10.2.1.3 Table 8-50, memory module
event record has updated with following new fields.
1. Validity Flags
2. Component Identifier
3. Device Event Sub-Type
Add updates for the above spec changes in the CXL memory module
event reporting and QMP command to inject memory module event.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/mem/cxl_type3.c | 16 ++++++++++++++++
hw/mem/cxl_type3_stubs.c | 2 ++
include/hw/cxl/cxl_events.h | 7 +++++--
qapi/cxl.json | 10 +++++++++-
4 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b99a100fe0..81774bf4b9 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -2054,6 +2054,9 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
}
}
+#define CXL_MMER_VALID_COMPONENT BIT(0)
+#define CXL_MMER_VALID_COMPONENT_ID_FORMAT BIT(1)
+
void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
uint32_t flags, uint8_t class,
uint8_t subclass, uint16_t ld_id,
@@ -2066,11 +2069,14 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
uint32_t dirty_shutdown_count,
uint32_t corrected_volatile_error_count,
uint32_t corrected_persist_error_count,
+ const char *component_id,
+ uint8_t sub_type,
Error **errp)
{
Object *obj = object_resolve_path(path, NULL);
CXLEventMemoryModule module;
CXLEventRecordHdr *hdr = &module.hdr;
+ uint16_t valid_flags = 0;
CXLDeviceState *cxlds;
CXLType3Dev *ct3d;
uint8_t enc_log;
@@ -2111,6 +2117,16 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
stl_le_p(&module.corrected_persistent_error_count,
corrected_persist_error_count);
+ if (component_id) {
+ strncpy((char *)module.component_id, component_id,
+ sizeof(module.component_id) - 1);
+ valid_flags |= CXL_MMER_VALID_COMPONENT;
+ valid_flags |= CXL_MMER_VALID_COMPONENT_ID_FORMAT;
+ }
+ module.sub_type = sub_type;
+
+ stw_le_p(&module.validity_flags, valid_flags);
+
if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
cxl_event_irq_assert(ct3d);
}
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index 26161c086a..04c36ed262 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -61,6 +61,8 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
uint32_t dirty_shutdown_count,
uint32_t corrected_volatile_error_count,
uint32_t corrected_persist_error_count,
+ const char *component_id,
+ uint8_t sub_type,
Error **errp) {}
void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index a3c5f2ec20..4a7836ad72 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -166,7 +166,7 @@ typedef struct CXLEventDram {
/*
* Memory Module Event Record
- * CXL r3.1 Section 8.2.9.2.1.3: Table 8-47
+ * CXL r3.2 Section 8.2.10.2.1.3: Table 8-59
* All fields little endian.
*/
typedef struct CXLEventMemoryModule {
@@ -180,7 +180,10 @@ typedef struct CXLEventMemoryModule {
uint32_t dirty_shutdown_count;
uint32_t corrected_volatile_error_count;
uint32_t corrected_persistent_error_count;
- uint8_t reserved[0x3d];
+ uint16_t validity_flags;
+ uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ uint8_t sub_type;
+ uint8_t reserved[0x2a];
} QEMU_PACKED CXLEventMemoryModule;
/*
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 0ba3a8e0f3..0a40596a30 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -253,6 +253,13 @@
# @corrected-persistent-error-count: Total number of correctable
# errors in persistent memory
#
+# @component-id: Device specific component identifier for the event.
+# May describe a field replaceable sub-component of the device.
+# See CXL r3.2 Table 8-59 Memory Module Event Record.
+#
+# @sub-type: Device event sub-type.
+# See CXL r3.2 Table 8-59 Memory Module Event Record.
+#
# Since: 8.1
##
{ 'command': 'cxl-inject-memory-module-event',
@@ -264,7 +271,8 @@
'life-used': 'uint8', 'temperature' : 'int16',
'dirty-shutdown-count': 'uint32',
'corrected-volatile-error-count': 'uint32',
- 'corrected-persistent-error-count': 'uint32'
+ 'corrected-persistent-error-count': 'uint32',
+ '*component-id': 'str', 'sub-type':'uint8'
}}
##
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 5/7] hw/cxl/cxl-mailbox-utils: Move declaration of scrub and ECS feature attributes in cmd_features_set_feature()
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
` (3 preceding siblings ...)
2025-06-19 15:16 ` [PATCH v2 4/7] hw/cxl/events: Updates for rev3.2 memory module " shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-20 14:16 ` Jonathan Cameron via
2025-06-19 15:16 ` [PATCH v2 6/7] hw/cxl: Add Maintenance support shiju.jose--- via
2025-06-19 15:16 ` [PATCH v2 7/7] hw/cxl: Add emulation for memory sparing control feature shiju.jose--- via
6 siblings, 1 reply; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
Move the declaration of scrub and ECS feature attributes in cmd_features_set_feature()
to the local scope where they are used.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/cxl/cxl-mailbox-utils.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4d0c0b3edc..83668d7d93 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1459,10 +1459,6 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
CXLCCI *cci)
{
CXLSetFeatureInHeader *hdr = (void *)payload_in;
- CXLMemPatrolScrubWriteAttrs *ps_write_attrs;
- CXLMemPatrolScrubSetFeature *ps_set_feature;
- CXLMemECSWriteAttrs *ecs_write_attrs;
- CXLMemECSSetFeature *ecs_set_feature;
CXLSetFeatureInfo *set_feat_info;
uint16_t bytes_to_copy = 0;
uint8_t data_transfer_flag;
@@ -1508,8 +1504,9 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
return CXL_MBOX_UNSUPPORTED;
}
- ps_set_feature = (void *)payload_in;
- ps_write_attrs = &ps_set_feature->feat_data;
+ CXLMemPatrolScrubSetFeature *ps_set_feature = (void *)payload_in;
+ CXLMemPatrolScrubWriteAttrs *ps_write_attrs =
+ &ps_set_feature->feat_data;
if ((uint32_t)hdr->offset + bytes_to_copy >
sizeof(ct3d->patrol_scrub_wr_attrs)) {
@@ -1535,8 +1532,8 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
return CXL_MBOX_UNSUPPORTED;
}
- ecs_set_feature = (void *)payload_in;
- ecs_write_attrs = ecs_set_feature->feat_data;
+ CXLMemECSSetFeature *ecs_set_feature = (void *)payload_in;
+ CXLMemECSWriteAttrs *ecs_write_attrs = ecs_set_feature->feat_data;
if ((uint32_t)hdr->offset + bytes_to_copy >
sizeof(ct3d->ecs_wr_attrs)) {
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 5/7] hw/cxl/cxl-mailbox-utils: Move declaration of scrub and ECS feature attributes in cmd_features_set_feature()
2025-06-19 15:16 ` [PATCH v2 5/7] hw/cxl/cxl-mailbox-utils: Move declaration of scrub and ECS feature attributes in cmd_features_set_feature() shiju.jose--- via
@ 2025-06-20 14:16 ` Jonathan Cameron via
0 siblings, 0 replies; 14+ messages in thread
From: Jonathan Cameron via @ 2025-06-20 14:16 UTC (permalink / raw)
To: shiju.jose; +Cc: qemu-devel, linux-cxl, tanxiaofei, prime.zeng, linuxarm
On Thu, 19 Jun 2025 16:16:17 +0100
<shiju.jose@huawei.com> wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
>
> Move the declaration of scrub and ECS feature attributes in cmd_features_set_feature()
> to the local scope where they are used.
>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Local scope is fine, but I'm fairly sure that style wise these
files always use variable declarations at start of scope (except
for when g_auto_free is in use where it gets more complex).
> ---
> hw/cxl/cxl-mailbox-utils.c | 13 +++++--------
> 1 file changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 4d0c0b3edc..83668d7d93 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -1459,10 +1459,6 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> CXLCCI *cci)
> {
> CXLSetFeatureInHeader *hdr = (void *)payload_in;
> - CXLMemPatrolScrubWriteAttrs *ps_write_attrs;
> - CXLMemPatrolScrubSetFeature *ps_set_feature;
> - CXLMemECSWriteAttrs *ecs_write_attrs;
> - CXLMemECSSetFeature *ecs_set_feature;
> CXLSetFeatureInfo *set_feat_info;
> uint16_t bytes_to_copy = 0;
> uint8_t data_transfer_flag;
> @@ -1508,8 +1504,9 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> return CXL_MBOX_UNSUPPORTED;
> }
>
> - ps_set_feature = (void *)payload_in;
> - ps_write_attrs = &ps_set_feature->feat_data;
> + CXLMemPatrolScrubSetFeature *ps_set_feature = (void *)payload_in;
Move the declaration to start of scope.
You can do assignment there as well as I don't think we care if we cast them
to the wrong type as header version isn't what we think it should be.
> + CXLMemPatrolScrubWriteAttrs *ps_write_attrs =
> + &ps_set_feature->feat_data;
>
> if ((uint32_t)hdr->offset + bytes_to_copy >
> sizeof(ct3d->patrol_scrub_wr_attrs)) {
> @@ -1535,8 +1532,8 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> return CXL_MBOX_UNSUPPORTED;
> }
>
> - ecs_set_feature = (void *)payload_in;
> - ecs_write_attrs = ecs_set_feature->feat_data;
> + CXLMemECSSetFeature *ecs_set_feature = (void *)payload_in;
> + CXLMemECSWriteAttrs *ecs_write_attrs = ecs_set_feature->feat_data;
>
> if ((uint32_t)hdr->offset + bytes_to_copy >
> sizeof(ct3d->ecs_wr_attrs)) {
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 6/7] hw/cxl: Add Maintenance support
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
` (4 preceding siblings ...)
2025-06-19 15:16 ` [PATCH v2 5/7] hw/cxl/cxl-mailbox-utils: Move declaration of scrub and ECS feature attributes in cmd_features_set_feature() shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-20 14:40 ` Jonathan Cameron via
2025-06-19 15:16 ` [PATCH v2 7/7] hw/cxl: Add emulation for memory sparing control feature shiju.jose--- via
6 siblings, 1 reply; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Davidlohr Bueso <dave@stgolabs.net>
This adds initial support for the Maintenance command, specifically
the soft and hard PPR operations on a dpa. The implementation allows
to be executed at runtime, therefore semantically, data is retained
and CXL.mem requests are correctly processed.
Keep track of the requests upon a general media or DRAM event.
Post Package Repair (PPR) maintenance operations may be supported by CXL
devices that implement CXL.mem protocol. A PPR maintenance operation
requests the CXL device to perform a repair operation on its media.
For example, a CXL device with DRAM components that support PPR features
may implement PPR Maintenance operations. DRAM components may support two
types of PPR, hard PPR (hPPR), for a permanent row repair, and Soft PPR
(sPPR), for a temporary row repair. Soft PPR is much faster than hPPR,
but the repair is lost with a power cycle.
CXL spec 3.2 section 8.2.10.7.1.2 describes the device's sPPR (soft PPR)
maintenance operation and section 8.2.10.7.1.3 describes the device's
hPPR (hard PPR) maintenance operation feature.
CXL spec 3.2 section 8.2.10.7.2.1 describes the sPPR feature discovery and
configuration.
CXL spec 3.2 section 8.2.10.7.2.2 describes the hPPR feature discovery and
configuration.
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/cxl/cxl-mailbox-utils.c | 191 ++++++++++++++++++++++++++++++++++++
hw/mem/cxl_type3.c | 57 +++++++++++
include/hw/cxl/cxl_device.h | 88 +++++++++++++++++
3 files changed, 336 insertions(+)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 83668d7d93..87c5df83b0 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -89,6 +89,8 @@ enum {
#define GET_SUPPORTED 0x0
#define GET_FEATURE 0x1
#define SET_FEATURE 0x2
+ MAINTENANCE = 0x06,
+ #define PERFORM 0x0
IDENTIFY = 0x40,
#define MEMORY_DEVICE 0x0
CCLS = 0x41,
@@ -1239,6 +1241,8 @@ typedef struct CXLSupportedFeatureEntry {
enum CXL_SUPPORTED_FEATURES_LIST {
CXL_FEATURE_PATROL_SCRUB = 0,
CXL_FEATURE_ECS,
+ CXL_FEATURE_SPPR,
+ CXL_FEATURE_HPPR,
CXL_FEATURE_MAX
};
@@ -1280,6 +1284,28 @@ enum CXL_SET_FEATURE_FLAG_DATA_TRANSFER {
};
#define CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET BIT(3)
+/* CXL r3.2 section 8.2.10.7.2.1: sPPR Feature Discovery and Configuration */
+static const QemuUUID soft_ppr_uuid = {
+ .data = UUID(0x892ba475, 0xfad8, 0x474e, 0x9d, 0x3e,
+ 0x69, 0x2c, 0x91, 0x75, 0x68, 0xbb)
+};
+
+typedef struct CXLMemSoftPPRSetFeature {
+ CXLSetFeatureInHeader hdr;
+ CXLMemSoftPPRWriteAttrs feat_data;
+} QEMU_PACKED QEMU_ALIGNED(16) CXLMemSoftPPRSetFeature;
+
+/* CXL r3.2 section 8.2.10.7.2.2: hPPR Feature Discovery and Configuration */
+static const QemuUUID hard_ppr_uuid = {
+ .data = UUID(0x80ea4521, 0x786f, 0x4127, 0xaf, 0xb1,
+ 0xec, 0x74, 0x59, 0xfb, 0x0e, 0x24)
+};
+
+typedef struct CXLMemHardPPRSetFeature {
+ CXLSetFeatureInHeader hdr;
+ CXLMemHardPPRWriteAttrs feat_data;
+} QEMU_PACKED QEMU_ALIGNED(16) CXLMemHardPPRSetFeature;
+
/* CXL r3.1 section 8.2.9.9.11.1: Device Patrol Scrub Control Feature */
static const QemuUUID patrol_scrub_uuid = {
.data = UUID(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33,
@@ -1343,6 +1369,38 @@ static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd,
for (entry = 0, index = get_feats_in->start_index;
entry < req_entries; index++) {
switch (index) {
+ case CXL_FEATURE_SPPR:
+ /* Fill supported feature entry for soft-PPR */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = soft_ppr_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemSoftPPRReadAttrs),
+ .set_feat_size = sizeof(CXLMemSoftPPRWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_SPPR_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_SPPR_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ case CXL_FEATURE_HPPR:
+ /* Fill supported feature entry for hard-PPR */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = hard_ppr_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemHardPPRReadAttrs),
+ .set_feat_size = sizeof(CXLMemHardPPRWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_HPPR_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_HPPR_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
case CXL_FEATURE_PATROL_SCRUB:
/* Fill supported feature entry for device patrol scrub control */
get_feats_out->feat_entries[entry++] =
@@ -1441,6 +1499,26 @@ static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd,
memcpy(payload_out,
(uint8_t *)&ct3d->ecs_attrs + get_feature->offset,
bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &soft_ppr_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemSoftPPRReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemSoftPPRReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->soft_ppr_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &hard_ppr_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemHardPPRReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemHardPPRReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->hard_ppr_attrs + get_feature->offset,
+ bytes_to_copy);
} else {
return CXL_MBOX_UNSUPPORTED;
}
@@ -1552,6 +1630,42 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
ct3d->ecs_wr_attrs.fru_attrs[count].ecs_config & 0x1F;
}
}
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &soft_ppr_uuid)) {
+ if (hdr->version != CXL_MEMDEV_SPPR_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemSoftPPRSetFeature *sppr_set_feature = (void *)payload_in;
+ CXLMemSoftPPRWriteAttrs *sppr_write_attrs =
+ &sppr_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->soft_ppr_wr_attrs + hdr->offset,
+ sppr_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->soft_ppr_attrs.op_mode = ct3d->soft_ppr_wr_attrs.op_mode;
+ ct3d->soft_ppr_attrs.sppr_op_mode = ct3d->soft_ppr_wr_attrs.sppr_op_mode;
+ }
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &hard_ppr_uuid)) {
+ if (hdr->version != CXL_MEMDEV_HPPR_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemHardPPRSetFeature *hppr_set_feature = (void *)payload_in;
+ CXLMemHardPPRWriteAttrs *hppr_write_attrs =
+ &hppr_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->hard_ppr_wr_attrs + hdr->offset,
+ hppr_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->hard_ppr_attrs.op_mode = ct3d->hard_ppr_wr_attrs.op_mode;
+ ct3d->hard_ppr_attrs.hppr_op_mode = ct3d->hard_ppr_wr_attrs.hppr_op_mode;
+ }
} else {
return CXL_MBOX_UNSUPPORTED;
}
@@ -1564,7 +1678,12 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size);
} else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) {
memset(&ct3d->ecs_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &soft_ppr_uuid)) {
+ memset(&ct3d->soft_ppr_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &hard_ppr_uuid)) {
+ memset(&ct3d->hard_ppr_wr_attrs, 0, set_feat_info->data_size);
}
+
set_feat_info->data_transfer_flag = 0;
set_feat_info->data_saved_across_reset = false;
set_feat_info->data_offset = 0;
@@ -1574,6 +1693,72 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+static void cxl_perform_ppr(CXLType3Dev *ct3d, uint64_t dpa)
+{
+ CXLMaintenance *ent, *next;
+
+ QLIST_FOREACH_SAFE(ent, &ct3d->maint_list, node, next) {
+ if (dpa == ent->dpa) {
+ QLIST_REMOVE(ent, node);
+ g_free(ent);
+ break;
+ }
+ }
+ /* TODO: produce a Memory Sparing Event Record */
+}
+
+/* CXL r3.2 section 8.2.10.7.1 - Perform Maintenance (Opcode 0600h) */
+#define MAINTENANCE_PPR_QUERY_RESOURCES BIT(0)
+
+static CXLRetCode cmd_media_perform_maintenance(const struct cxl_cmd *cmd,
+ uint8_t *payload_in, size_t len_in,
+ uint8_t *payload_out, size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t class;
+ uint8_t subclass;
+ union {
+ struct {
+ uint8_t flags;
+ uint64_t dpa;
+ uint8_t nibble_mask[3];
+ } QEMU_PACKED ppr;
+ };
+ } QEMU_PACKED *maint_in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+
+ if (maintenance_running(cci)) {
+ return CXL_MBOX_BUSY;
+ }
+
+ switch (maint_in->class) {
+ case 0:
+ return CXL_MBOX_SUCCESS; /* nop */
+ case 1:
+ if (maint_in->ppr.flags & MAINTENANCE_PPR_QUERY_RESOURCES) {
+ return CXL_MBOX_SUCCESS;
+ }
+
+ switch (maint_in->subclass) {
+ case 0: /* soft ppr */
+ case 1: /* hard ppr */
+ cxl_perform_ppr(ct3d, ldq_le_p(&maint_in->ppr.dpa));
+ return CXL_MBOX_SUCCESS;
+ default:
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ break;
+ case 2:
+ case 3:
+ return CXL_MBOX_UNSUPPORTED;
+ default:
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ return CXL_MBOX_SUCCESS;
+}
+
/* CXL r3.1 Section 8.2.9.9.1.1: Identify Memory Device (Opcode 4000h) */
static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
uint8_t *payload_in,
@@ -3902,6 +4087,12 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
CXL_MBOX_IMMEDIATE_POLICY_CHANGE |
CXL_MBOX_IMMEDIATE_LOG_CHANGE |
CXL_MBOX_SECURITY_STATE_CHANGE)},
+ [MAINTENANCE][PERFORM] = { "MAINTENANCE_PERFORM",
+ cmd_media_perform_maintenance, ~0,
+ CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
+ CXL_MBOX_IMMEDIATE_DATA_CHANGE |
+ CXL_MBOX_IMMEDIATE_LOG_CHANGE |
+ CXL_MBOX_BACKGROUND_OPERATION },
[IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE",
cmd_identify_memory_device, 0, 0 },
[CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO",
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 81774bf4b9..965ad3402d 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1205,6 +1205,30 @@ void ct3_realize(PCIDevice *pci_dev, Error **errp)
ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
}
+ /* Set default values for soft-PPR attributes */
+ ct3d->soft_ppr_attrs = (CXLMemSoftPPRReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_PPR_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_SPPR_MAINT_SUBCLASS,
+ .sppr_flags = CXL_MEMDEV_SPPR_DPA_SUPPORT_FLAG,
+ .restriction_flags = 0,
+ .sppr_op_mode = 0
+ };
+
+ /* Set default value for hard-PPR attributes */
+ ct3d->hard_ppr_attrs = (CXLMemHardPPRReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_PPR_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_HPPR_MAINT_SUBCLASS,
+ .hppr_flags = CXL_MEMDEV_HPPR_DPA_SUPPORT_FLAG,
+ .restriction_flags = 0,
+ .hppr_op_mode = 0
+ };
+
return;
err_release_cdat:
@@ -1830,6 +1854,21 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
return -EINVAL;
}
}
+
+static void cxl_maintenance_insert(CXLType3Dev *ct3d, uint64_t dpa)
+{
+ CXLMaintenance *ent, *m;
+
+ QLIST_FOREACH(ent, &ct3d->maint_list, node) {
+ if (dpa == ent->dpa) {
+ return;
+ }
+ }
+ m = g_new0(CXLMaintenance, 1);
+ m->dpa = dpa;
+ QLIST_INSERT_HEAD(&ct3d->maint_list, m, node);
+}
+
/* Component ID is device specific. Define this as a string. */
void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
uint32_t flags, uint8_t class,
@@ -1871,6 +1910,11 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
error_setg(errp, "Unhandled error log type");
return;
}
+ if (rc == CXL_EVENT_TYPE_INFO &&
+ (flags & CXL_EVENT_REC_FLAGS_MAINT_NEEDED)) {
+ error_setg(errp, "Informational event cannot require maintenance");
+ return;
+ }
enc_log = rc;
memset(&gem, 0, sizeof(gem));
@@ -1914,6 +1958,10 @@ void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
cxl_event_irq_assert(ct3d);
}
+
+ if (flags & CXL_EVENT_REC_FLAGS_MAINT_NEEDED) {
+ cxl_maintenance_insert(ct3d, dpa);
+ }
}
#define CXL_DRAM_VALID_CHANNEL BIT(0)
@@ -1974,6 +2022,11 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
error_setg(errp, "Unhandled error log type");
return;
}
+ if (rc == CXL_EVENT_TYPE_INFO &&
+ (flags & CXL_EVENT_REC_FLAGS_MAINT_NEEDED)) {
+ error_setg(errp, "Informational event cannot require maintenance");
+ return;
+ }
enc_log = rc;
memset(&dram, 0, sizeof(dram));
@@ -2052,6 +2105,10 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint32_t flags
if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
cxl_event_irq_assert(ct3d);
}
+
+ if (flags & CXL_EVENT_REC_FLAGS_MAINT_NEEDED) {
+ cxl_maintenance_insert(ct3d, dpa);
+ }
}
#define CXL_MMER_VALID_COMPONENT BIT(0)
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index fc6ec82670..b0e13b02b5 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -491,6 +491,12 @@ static inline bool cxl_dev_media_disabled(CXLDeviceState *cxl_dstate)
uint64_t dev_status_reg = cxl_dstate->mbox_reg_state64[R_CXL_MEM_DEV_STS];
return FIELD_EX64(dev_status_reg, CXL_MEM_DEV_STS, MEDIA_STATUS) == 0x3;
}
+
+static inline bool maintenance_running(CXLCCI *cci)
+{
+ return cci->bg.runtime && cci->bg.opcode == 0x0600;
+}
+
static inline bool scan_media_running(CXLCCI *cci)
{
return !!cci->bg.runtime && cci->bg.opcode == 0x4304;
@@ -504,6 +510,13 @@ typedef struct CXLError {
typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
+typedef struct CXLMaintenance {
+ uint64_t dpa;
+ QLIST_ENTRY(CXLMaintenance) node;
+} CXLMaintenance;
+
+typedef QLIST_HEAD(, CXLMaintenance) CXLMaintenanceList;
+
typedef struct CXLPoison {
uint64_t start, length;
uint8_t type;
@@ -516,6 +529,73 @@ typedef struct CXLPoison {
typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
#define CXL_POISON_LIST_LIMIT 256
+/* CXL memory Post Package Repair control attributes */
+#define CXL_MEMDEV_PPR_MAINT_CLASS 0x1
+#define CXL_MEMDEV_SPPR_MAINT_SUBCLASS 0x0
+#define CXL_MEMDEV_HPPR_MAINT_SUBCLASS 0x1
+
+/*
+ * CXL r3.2 section 8.2.10.7.2.1, Table 8-128 and 8-129:
+ * sPPR Feature Readable/Writable Attributes
+ */
+typedef struct CXLMemSoftPPRReadAttrs {
+ uint8_t max_maint_latency;
+ uint16_t op_caps;
+ uint16_t op_mode;
+ uint8_t maint_op_class;
+ uint8_t maint_op_subclass;
+ uint8_t rsvd[9];
+ uint8_t sppr_flags;
+ uint16_t restriction_flags;
+ uint8_t sppr_op_mode;
+} QEMU_PACKED CXLMemSoftPPRReadAttrs;
+
+typedef struct CXLMemSoftPPRWriteAttrs {
+ uint16_t op_mode;
+ uint8_t sppr_op_mode;
+} QEMU_PACKED CXLMemSoftPPRWriteAttrs;
+
+#define CXL_MEMDEV_SPPR_GET_FEATURE_VERSION 0x03
+#define CXL_MEMDEV_SPPR_SET_FEATURE_VERSION 0x03
+#define CXL_MEMDEV_SPPR_DPA_SUPPORT_FLAG BIT(0)
+#define CXL_MEMDEV_SPPR_NIBBLE_SUPPORT_FLAG BIT(1)
+#define CXL_MEMDEV_SPPR_MEM_SPARING_EV_REC_CAP_FLAG BIT(2)
+#define CXL_MEMDEV_SPPR_DEV_INITIATED_AT_BOOT_CAP_FLAG BIT(3)
+
+#define CXL_MEMDEV_SPPR_OP_MODE_MEM_SPARING_EV_REC_EN BIT(0)
+#define CXL_MEMDEV_SPPR_OP_MODE_DEV_INITIATED_AT_BOOT BIT(1)
+
+/*
+ * CXL r3.2 section 8.2.10.7.2.2, Table 8-131 and 8-132:
+ * hPPR Feature Readable/Writable Attributes
+ */
+typedef struct CXLMemHardPPRReadAttrs {
+ uint8_t max_maint_latency;
+ uint16_t op_caps;
+ uint16_t op_mode;
+ uint8_t maint_op_class;
+ uint8_t maint_op_subclass;
+ uint8_t rsvd[9];
+ uint8_t hppr_flags;
+ uint16_t restriction_flags;
+ uint8_t hppr_op_mode;
+} QEMU_PACKED CXLMemHardPPRReadAttrs;
+
+typedef struct CXLMemHardPPRWriteAttrs {
+ uint16_t op_mode;
+ uint8_t hppr_op_mode;
+} QEMU_PACKED CXLMemHardPPRWriteAttrs;
+
+#define CXL_MEMDEV_HPPR_GET_FEATURE_VERSION 0x03
+#define CXL_MEMDEV_HPPR_SET_FEATURE_VERSION 0x03
+#define CXL_MEMDEV_HPPR_DPA_SUPPORT_FLAG BIT(0)
+#define CXL_MEMDEV_HPPR_NIBBLE_SUPPORT_FLAG BIT(1)
+#define CXL_MEMDEV_HPPR_MEM_SPARING_EVENT_REC_CAP_FLAG BIT(2)
+#define CXL_MEMDEV_HPPR_DEV_INITIATED_AT_BOOT_CAP_FLAG BIT(3)
+
+#define CXL_MEMDEV_HPPR_OP_MODE_MEM_SPARING_EV_REC_EN BIT(0)
+#define CXL_MEMDEV_HPPR_OP_MODE_DEV_INITIATED_AT_BOOT BIT(1)
+
/* CXL memory device patrol scrub control attributes */
typedef struct CXLMemPatrolScrubReadAttrs {
uint8_t scrub_cycle_cap;
@@ -686,6 +766,9 @@ struct CXLType3Dev {
/* Error injection */
CXLErrorList error_list;
+ /* Keep track of maintenance requests */
+ CXLMaintenanceList maint_list;
+
/* Poison Injection - cache */
CXLPoisonList poison_list;
unsigned int poison_list_cnt;
@@ -698,6 +781,11 @@ struct CXLType3Dev {
CXLSetFeatureInfo set_feat_info;
+ /* PPR control attributes */
+ CXLMemSoftPPRReadAttrs soft_ppr_attrs;
+ CXLMemSoftPPRWriteAttrs soft_ppr_wr_attrs;
+ CXLMemHardPPRReadAttrs hard_ppr_attrs;
+ CXLMemHardPPRWriteAttrs hard_ppr_wr_attrs;
/* Patrol scrub control attributes */
CXLMemPatrolScrubReadAttrs patrol_scrub_attrs;
CXLMemPatrolScrubWriteAttrs patrol_scrub_wr_attrs;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 6/7] hw/cxl: Add Maintenance support
2025-06-19 15:16 ` [PATCH v2 6/7] hw/cxl: Add Maintenance support shiju.jose--- via
@ 2025-06-20 14:40 ` Jonathan Cameron via
0 siblings, 0 replies; 14+ messages in thread
From: Jonathan Cameron via @ 2025-06-20 14:40 UTC (permalink / raw)
To: shiju.jose; +Cc: qemu-devel, linux-cxl, tanxiaofei, prime.zeng, linuxarm
On Thu, 19 Jun 2025 16:16:18 +0100
<shiju.jose@huawei.com> wrote:
> From: Davidlohr Bueso <dave@stgolabs.net>
>
> This adds initial support for the Maintenance command, specifically
> the soft and hard PPR operations on a dpa. The implementation allows
> to be executed at runtime, therefore semantically, data is retained
> and CXL.mem requests are correctly processed.
>
> Keep track of the requests upon a general media or DRAM event.
>
> Post Package Repair (PPR) maintenance operations may be supported by CXL
> devices that implement CXL.mem protocol. A PPR maintenance operation
> requests the CXL device to perform a repair operation on its media.
> For example, a CXL device with DRAM components that support PPR features
> may implement PPR Maintenance operations. DRAM components may support two
> types of PPR, hard PPR (hPPR), for a permanent row repair, and Soft PPR
> (sPPR), for a temporary row repair. Soft PPR is much faster than hPPR,
> but the repair is lost with a power cycle.
>
> CXL spec 3.2 section 8.2.10.7.1.2 describes the device's sPPR (soft PPR)
> maintenance operation and section 8.2.10.7.1.3 describes the device's
> hPPR (hard PPR) maintenance operation feature.
>
> CXL spec 3.2 section 8.2.10.7.2.1 describes the sPPR feature discovery and
> configuration.
>
> CXL spec 3.2 section 8.2.10.7.2.2 describes the hPPR feature discovery and
> configuration.
>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Hi.
Various minor comments inline.
> ---
> hw/cxl/cxl-mailbox-utils.c | 191 ++++++++++++++++++++++++++++++++++++
> hw/mem/cxl_type3.c | 57 +++++++++++
> include/hw/cxl/cxl_device.h | 88 +++++++++++++++++
> 3 files changed, 336 insertions(+)
>
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 83668d7d93..87c5df83b0 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -89,6 +89,8 @@ enum {
> #define GET_SUPPORTED 0x0
> #define GET_FEATURE 0x1
> #define SET_FEATURE 0x2
> + MAINTENANCE = 0x06,
> + #define PERFORM 0x0
> IDENTIFY = 0x40,
> #define MEMORY_DEVICE 0x0
> CCLS = 0x41,
> @@ -1239,6 +1241,8 @@ typedef struct CXLSupportedFeatureEntry {
> enum CXL_SUPPORTED_FEATURES_LIST {
> CXL_FEATURE_PATROL_SCRUB = 0,
> CXL_FEATURE_ECS,
> + CXL_FEATURE_SPPR,
> + CXL_FEATURE_HPPR,
> CXL_FEATURE_MAX
> };
> @@ -1441,6 +1499,26 @@ static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd,
> memcpy(payload_out,
> (uint8_t *)&ct3d->ecs_attrs + get_feature->offset,
> bytes_to_copy);
> + } else if (qemu_uuid_is_equal(&get_feature->uuid, &soft_ppr_uuid)) {
> + if (get_feature->offset >= sizeof(CXLMemSoftPPRReadAttrs)) {
> + return CXL_MBOX_INVALID_INPUT;
> + }
> + bytes_to_copy = sizeof(CXLMemSoftPPRReadAttrs) -
> + get_feature->offset;
> + bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
> + memcpy(payload_out,
> + (uint8_t *)&ct3d->soft_ppr_attrs + get_feature->offset,
> + bytes_to_copy);
> + } else if (qemu_uuid_is_equal(&get_feature->uuid, &hard_ppr_uuid)) {
> + if (get_feature->offset >= sizeof(CXLMemHardPPRReadAttrs)) {
> + return CXL_MBOX_INVALID_INPUT;
> + }
> + bytes_to_copy = sizeof(CXLMemHardPPRReadAttrs) -
> + get_feature->offset;
This indent style doesn't match what we do elsewhere. Either put it
after the = or 4 spaces in from the line above.
> + bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
> + memcpy(payload_out,
> + (uint8_t *)&ct3d->hard_ppr_attrs + get_feature->offset,
> + bytes_to_copy);
> } else {
> return CXL_MBOX_UNSUPPORTED;
> }
> @@ -1552,6 +1630,42 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> ct3d->ecs_wr_attrs.fru_attrs[count].ecs_config & 0x1F;
> }
> }
> + } else if (qemu_uuid_is_equal(&hdr->uuid, &soft_ppr_uuid)) {
> + if (hdr->version != CXL_MEMDEV_SPPR_SET_FEATURE_VERSION) {
> + return CXL_MBOX_UNSUPPORTED;
> + }
> +
> + CXLMemSoftPPRSetFeature *sppr_set_feature = (void *)payload_in;
> + CXLMemSoftPPRWriteAttrs *sppr_write_attrs =
> + &sppr_set_feature->feat_data;
> + memcpy((uint8_t *)&ct3d->soft_ppr_wr_attrs + hdr->offset,
> + sppr_write_attrs,
> + bytes_to_copy);
> + set_feat_info->data_size += bytes_to_copy;
> +
> + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
> + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
> + ct3d->soft_ppr_attrs.op_mode = ct3d->soft_ppr_wr_attrs.op_mode;
> + ct3d->soft_ppr_attrs.sppr_op_mode = ct3d->soft_ppr_wr_attrs.sppr_op_mode;
> + }
> + } else if (qemu_uuid_is_equal(&hdr->uuid, &hard_ppr_uuid)) {
> + if (hdr->version != CXL_MEMDEV_HPPR_SET_FEATURE_VERSION) {
> + return CXL_MBOX_UNSUPPORTED;
> + }
> +
> + CXLMemHardPPRSetFeature *hppr_set_feature = (void *)payload_in;
> + CXLMemHardPPRWriteAttrs *hppr_write_attrs =
> + &hppr_set_feature->feat_data;
As in earlier patch - I'd just do this before checking hdr->version.
Should safe as we are just casting to potentially wrong structure definitions,
not using those until after the header check.
> + memcpy((uint8_t *)&ct3d->hard_ppr_wr_attrs + hdr->offset,
> + hppr_write_attrs,
> + bytes_to_copy);
> + set_feat_info->data_size += bytes_to_copy;
> +
> + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
> + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
> + ct3d->hard_ppr_attrs.op_mode = ct3d->hard_ppr_wr_attrs.op_mode;
> + ct3d->hard_ppr_attrs.hppr_op_mode = ct3d->hard_ppr_wr_attrs.hppr_op_mode;
> + }
> } else {
> return CXL_MBOX_UNSUPPORTED;
> }
> @@ -1564,7 +1678,12 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size);
> } else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) {
> memset(&ct3d->ecs_wr_attrs, 0, set_feat_info->data_size);
> + } else if (qemu_uuid_is_equal(&hdr->uuid, &soft_ppr_uuid)) {
> + memset(&ct3d->soft_ppr_wr_attrs, 0, set_feat_info->data_size);
> + } else if (qemu_uuid_is_equal(&hdr->uuid, &hard_ppr_uuid)) {
> + memset(&ct3d->hard_ppr_wr_attrs, 0, set_feat_info->data_size);
> }
> +
> set_feat_info->data_transfer_flag = 0;
> set_feat_info->data_saved_across_reset = false;
> set_feat_info->data_offset = 0;
> @@ -1574,6 +1693,72 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
> return CXL_MBOX_SUCCESS;
> }
>
> +static void cxl_perform_ppr(CXLType3Dev *ct3d, uint64_t dpa)
> +{
> + CXLMaintenance *ent, *next;
> +
> + QLIST_FOREACH_SAFE(ent, &ct3d->maint_list, node, next) {
> + if (dpa == ent->dpa) {
> + QLIST_REMOVE(ent, node);
> + g_free(ent);
> + break;
> + }
> + }
> + /* TODO: produce a Memory Sparing Event Record */
This todo is one we should resolve as it means we can then
comply with the spec that requires these to be possible for the feature
version we are claiming to support. They might not be turned on though
so we'll need to check for that as well.
> +}
> +
> +/* CXL r3.2 section 8.2.10.7.1 - Perform Maintenance (Opcode 0600h) */
> +#define MAINTENANCE_PPR_QUERY_RESOURCES BIT(0)
> +
> +static CXLRetCode cmd_media_perform_maintenance(const struct cxl_cmd *cmd,
> + uint8_t *payload_in, size_t len_in,
> + uint8_t *payload_out, size_t *len_out,
> + CXLCCI *cci)
> +{
> + struct {
> + uint8_t class;
> + uint8_t subclass;
> + union {
> + struct {
> + uint8_t flags;
> + uint64_t dpa;
> + uint8_t nibble_mask[3];
> + } QEMU_PACKED ppr;
> + };
> + } QEMU_PACKED *maint_in = (void *)payload_in;
> + CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +
> + if (maintenance_running(cci)) {
> + return CXL_MBOX_BUSY;
> + }
> +
> + switch (maint_in->class) {
> + case 0:
> + return CXL_MBOX_SUCCESS; /* nop */
> + case 1:
There are already defines for these and the subclass. Good
to use them here as well. Might need to add a define for 0 as well.
> + if (maint_in->ppr.flags & MAINTENANCE_PPR_QUERY_RESOURCES) {
> + return CXL_MBOX_SUCCESS;
> + }
> +
> + switch (maint_in->subclass) {
> + case 0: /* soft ppr */
> + case 1: /* hard ppr */
> + cxl_perform_ppr(ct3d, ldq_le_p(&maint_in->ppr.dpa));
> + return CXL_MBOX_SUCCESS;
> + default:
> + return CXL_MBOX_INVALID_INPUT;
> + }
> + break;
> + case 2:
> + case 3:
> + return CXL_MBOX_UNSUPPORTED;
That's interesting. I'm not sure we can differentiate between unsupported
and invalid as it depends which spec people are reading + what ECNs etc.
So I'd return CXL_MBOX_INVALID_INPUT for these as well.
The reasoning being that Unsupported is specifically that the command
is not supported, not particular parameters like these.
> + default:
> + return CXL_MBOX_INVALID_INPUT;
> + }
> +
> + return CXL_MBOX_SUCCESS;
> +}
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 81774bf4b9..965ad3402d 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1205,6 +1205,30 @@ void ct3_realize(PCIDevice *pci_dev, Error **errp)
> ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
> }
>
> + /* Set default values for soft-PPR attributes */
> + ct3d->soft_ppr_attrs = (CXLMemSoftPPRReadAttrs) {
> + .max_maint_latency = 0x5, /* 100 ms */
> + .op_caps = 0, /* require host involvement */
> + .op_mode = 0,
> + .maint_op_class = CXL_MEMDEV_PPR_MAINT_CLASS,
> + .maint_op_subclass = CXL_MEMDEV_SPPR_MAINT_SUBCLASS,
> + .sppr_flags = CXL_MEMDEV_SPPR_DPA_SUPPORT_FLAG,
Also CXL_MEMDEV_SPPR_MEM_SPARING_EV_REC_CAP I think
as it is required for version 2 and above.
There is a todo comment so maybe fine to leave for now.
Hopefully no one assumes this is set based on the version alone.
Perhaps that's the next thing to enable as if we do put
out he records I think this feature could be considered fully
emulated whereas now it is sort of half done.
> + .restriction_flags = 0,
> + .sppr_op_mode = 0
> + };
> +
> + /* Set default value for hard-PPR attributes */
> + ct3d->hard_ppr_attrs = (CXLMemHardPPRReadAttrs) {
> + .max_maint_latency = 0x5, /* 100 ms */
> + .op_caps = 0, /* require host involvement */
> + .op_mode = 0,
> + .maint_op_class = CXL_MEMDEV_PPR_MAINT_CLASS,
> + .maint_op_subclass = CXL_MEMDEV_HPPR_MAINT_SUBCLASS,
> + .hppr_flags = CXL_MEMDEV_HPPR_DPA_SUPPORT_FLAG,
As above. I think we need to send the event records on completion
if they are enabled.
> + .restriction_flags = 0,
> + .hppr_op_mode = 0
> + };
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 7/7] hw/cxl: Add emulation for memory sparing control feature
2025-06-19 15:16 [PATCH v2 0/7] hw/cxl: Update CXL events to rev3.2 and add maintenance support for memory repair features shiju.jose--- via
` (5 preceding siblings ...)
2025-06-19 15:16 ` [PATCH v2 6/7] hw/cxl: Add Maintenance support shiju.jose--- via
@ 2025-06-19 15:16 ` shiju.jose--- via
2025-06-20 14:48 ` Jonathan Cameron via
6 siblings, 1 reply; 14+ messages in thread
From: shiju.jose--- via @ 2025-06-19 15:16 UTC (permalink / raw)
To: qemu-devel, linux-cxl, jonathan.cameron
Cc: tanxiaofei, prime.zeng, linuxarm, shiju.jose
From: Shiju Jose <shiju.jose@huawei.com>
Memory sparing is defined as a repair function that replaces a portion of
memory with a portion of functional memory at that same DPA. The subclasses
for this operation vary in terms of the scope of the sparing being
performed. The Cacheline sparing subclass refers to a sparing action that
can replace a full cacheline. Row sparing is provided as an alternative to
PPR sparing functions and its scope is that of a single DDR row. Bank
sparing allows an entire bank to be replaced. Rank sparing is defined as
an operation in which an entire DDR rank is replaced.
Memory sparing maintenance operations may be supported by CXL devices
that implement CXL.mem protocol. A sparing maintenance operation requests
the CXL device to perform a repair operation on its media.
For example, a CXL device with DRAM components that support memory sparing
features may implement sparing Maintenance operations.
The host may issue a query command by setting Query Resources flag in the
Input Payload (CXL Spec 3.2 Table 8-120) to determine availability of
sparing resources for a given address. In response to a query request,
the device shall report the resource availability by producing the Memory
Sparing Event Record (CXL Spec 3.2 Table 8-60) in which the Channel, Rank,
Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields are a copy
of the values specified in the request.
During the execution of a sparing maintenance operation, a CXL memory device:
- May or may not retain data
- May or may not be able to process CXL.mem requests correctly.
These CXL memory device capabilities are specified by restriction flags
in the memory sparing feature readable attributes.
When a CXL device identifies error on a memory component, the device
may inform the host about the need for a memory sparing maintenance
operation by using DRAM event record, where the 'maintenance needed' flag
may set. The event record contains some of the DPA, Channel, Rank,
Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields that
should be repaired. The userspace tool requests for maintenance operation
if the 'maintenance needed' flag set in the CXL DRAM error record.
CXL spec 3.2 section 8.2.10.7.2.3 describes the memory sparing feature
discovery and configuration.
CXL spec 3.2 section 8.2.10.7.1.4 describes the device's memory sparing
maintenance operation feature.
Add emulation for CXL memory device memory sparing control feature
and memory sparing maintenance operation command.
TODO: Following are the pending tasks, though not sure how to implement.
1. Add emulation for memory sparing maintenance operation.
2. On query, report memory sparing resource availability in a memory sparing
event record if required in the future.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
hw/cxl/cxl-mailbox-utils.c | 295 ++++++++++++++++++++++++++++++++++++
hw/mem/cxl_type3.c | 44 ++++++
include/hw/cxl/cxl_device.h | 40 +++++
3 files changed, 379 insertions(+)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 87c5df83b0..a97c0b2757 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1243,6 +1243,10 @@ enum CXL_SUPPORTED_FEATURES_LIST {
CXL_FEATURE_ECS,
CXL_FEATURE_SPPR,
CXL_FEATURE_HPPR,
+ CXL_FEATURE_CACHELINE_SPARING,
+ CXL_FEATURE_ROW_SPARING,
+ CXL_FEATURE_BANK_SPARING,
+ CXL_FEATURE_RANK_SPARING,
CXL_FEATURE_MAX
};
@@ -1331,6 +1335,35 @@ typedef struct CXLMemECSSetFeature {
CXLMemECSWriteAttrs feat_data[];
} QEMU_PACKED QEMU_ALIGNED(16) CXLMemECSSetFeature;
+/*
+ * CXL r3.2 section 8.2.10.7.2.3:
+ * Memory Sparing Features Discovery and Configuration
+ */
+static const QemuUUID cacheline_sparing_uuid = {
+ .data = UUID(0x96C33386, 0x91dd, 0x44c7, 0x9e, 0xcb,
+ 0xfd, 0xaf, 0x65, 0x03, 0xba, 0xc4)
+};
+
+static const QemuUUID row_sparing_uuid = {
+ .data = UUID(0x450ebf67, 0xb135, 0x4f97, 0xa4, 0x98,
+ 0xc2, 0xd5, 0x7f, 0x27, 0x9b, 0xed)
+};
+
+static const QemuUUID bank_sparing_uuid = {
+ .data = UUID(0x78b79636, 0x90ac, 0x4b64, 0xa4, 0xef,
+ 0xfa, 0xac, 0x5d, 0x18, 0xa8, 0x63)
+};
+
+static const QemuUUID rank_sparing_uuid = {
+ .data = UUID(0x34dbaff5, 0x0552, 0x4281, 0x8f, 0x76,
+ 0xda, 0x0b, 0x5e, 0x7a, 0x76, 0xa7)
+};
+
+typedef struct CXLMemSparingSetFeature {
+ CXLSetFeatureInHeader hdr;
+ CXLMemSparingWriteAttrs feat_data;
+} QEMU_PACKED QEMU_ALIGNED(16) CXLMemSparingSetFeature;
+
/* CXL r3.1 section 8.2.9.6.1: Get Supported Features (Opcode 0500h) */
static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd,
uint8_t *payload_in,
@@ -1431,6 +1464,70 @@ static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd,
CXL_FEAT_ENTRY_SFE_CEL_VALID,
};
break;
+ case CXL_FEATURE_CACHELINE_SPARING:
+ /* Fill supported feature entry for Cacheline Memory Sparing */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = cacheline_sparing_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemSparingReadAttrs),
+ .set_feat_size = sizeof(CXLMemSparingWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_SPARING_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_SPARING_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ case CXL_FEATURE_ROW_SPARING:
+ /* Fill supported feature entry for Row Memory Sparing */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = row_sparing_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemSparingReadAttrs),
+ .set_feat_size = sizeof(CXLMemSparingWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_SPARING_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_SPARING_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ case CXL_FEATURE_BANK_SPARING:
+ /* Fill supported feature entry for Bank Memory Sparing */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = bank_sparing_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemSparingReadAttrs),
+ .set_feat_size = sizeof(CXLMemSparingWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_SPARING_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_SPARING_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ case CXL_FEATURE_RANK_SPARING:
+ /* Fill supported feature entry for Rank Memory Sparing */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = rank_sparing_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemSparingReadAttrs),
+ .set_feat_size = sizeof(CXLMemSparingWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE |
+ CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION,
+ .get_feat_version = CXL_MEMDEV_SPARING_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_SPARING_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
default:
__builtin_unreachable();
}
@@ -1519,6 +1616,47 @@ static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd,
memcpy(payload_out,
(uint8_t *)&ct3d->hard_ppr_attrs + get_feature->offset,
bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid,
+ &cacheline_sparing_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemSparingReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemSparingReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->cacheline_sparing_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &row_sparing_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemSparingReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemSparingReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->row_sparing_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &bank_sparing_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemSparingReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemSparingReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->bank_sparing_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &rank_sparing_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemSparingReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemSparingReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->rank_sparing_attrs + get_feature->offset,
+ bytes_to_copy);
} else {
return CXL_MBOX_UNSUPPORTED;
}
@@ -1666,6 +1804,78 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
ct3d->hard_ppr_attrs.op_mode = ct3d->hard_ppr_wr_attrs.op_mode;
ct3d->hard_ppr_attrs.hppr_op_mode = ct3d->hard_ppr_wr_attrs.hppr_op_mode;
}
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &cacheline_sparing_uuid)) {
+ if (hdr->version != CXL_MEMDEV_SPARING_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemSparingSetFeature *mem_sparing_set_feature = (void *)payload_in;
+ CXLMemSparingWriteAttrs *mem_sparing_write_attrs =
+ &mem_sparing_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->cacheline_sparing_wr_attrs + hdr->offset,
+ mem_sparing_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->cacheline_sparing_attrs.op_mode =
+ ct3d->cacheline_sparing_wr_attrs.op_mode;
+ }
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &row_sparing_uuid)) {
+ if (hdr->version != CXL_MEMDEV_SPARING_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemSparingSetFeature *mem_sparing_set_feature = (void *)payload_in;
+ CXLMemSparingWriteAttrs *mem_sparing_write_attrs =
+ &mem_sparing_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->row_sparing_wr_attrs + hdr->offset,
+ mem_sparing_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->row_sparing_attrs.op_mode =
+ ct3d->row_sparing_wr_attrs.op_mode;
+ }
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &bank_sparing_uuid)) {
+ if (hdr->version != CXL_MEMDEV_SPARING_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemSparingSetFeature *mem_sparing_set_feature = (void *)payload_in;
+ CXLMemSparingWriteAttrs *mem_sparing_write_attrs =
+ &mem_sparing_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->bank_sparing_wr_attrs + hdr->offset,
+ mem_sparing_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->bank_sparing_attrs.op_mode =
+ ct3d->bank_sparing_wr_attrs.op_mode;
+ }
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &rank_sparing_uuid)) {
+ if (hdr->version != CXL_MEMDEV_SPARING_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ CXLMemSparingSetFeature *mem_sparing_set_feature = (void *)payload_in;
+ CXLMemSparingWriteAttrs *mem_sparing_write_attrs =
+ &mem_sparing_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->rank_sparing_wr_attrs + hdr->offset,
+ mem_sparing_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->rank_sparing_attrs.op_mode =
+ ct3d->rank_sparing_wr_attrs.op_mode;
+ }
} else {
return CXL_MBOX_UNSUPPORTED;
}
@@ -1682,6 +1892,15 @@ static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
memset(&ct3d->soft_ppr_wr_attrs, 0, set_feat_info->data_size);
} else if (qemu_uuid_is_equal(&hdr->uuid, &hard_ppr_uuid)) {
memset(&ct3d->hard_ppr_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &cacheline_sparing_uuid)) {
+ memset(&ct3d->cacheline_sparing_wr_attrs, 0,
+ set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &row_sparing_uuid)) {
+ memset(&ct3d->row_sparing_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &bank_sparing_uuid)) {
+ memset(&ct3d->bank_sparing_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &rank_sparing_uuid)) {
+ memset(&ct3d->rank_sparing_wr_attrs, 0, set_feat_info->data_size);
}
set_feat_info->data_transfer_flag = 0;
@@ -1707,8 +1926,64 @@ static void cxl_perform_ppr(CXLType3Dev *ct3d, uint64_t dpa)
/* TODO: produce a Memory Sparing Event Record */
}
+typedef struct CXLMemSparingMaintInPayload {
+ uint8_t flags;
+ uint8_t channel;
+ uint8_t rank;
+ uint8_t nibble_mask[3];
+ uint8_t bank_group;
+ uint8_t bank;
+ uint8_t row[3];
+ uint16_t column;
+ uint8_t sub_channel;
+} QEMU_PACKED CXLMemSparingMaintInPayload;
+
+static CXLRetCode cxl_perform_mem_sparing(CXLType3Dev *ct3d, uint8_t sub_class,
+ void *maint_pi)
+{
+ CXLMemSparingMaintInPayload *sparing_maint_pi = (void *)maint_pi;
+
+ qemu_log_mask(LOG_UNIMP, "Memory Sparing Maintenance Input Payload...\n");
+ qemu_log_mask(LOG_UNIMP, "flags = %u\n", sparing_maint_pi->flags);
+ qemu_log_mask(LOG_UNIMP, "channel= %u\n", sparing_maint_pi->channel);
+ qemu_log_mask(LOG_UNIMP, "rank = %u\n", sparing_maint_pi->rank);
+ qemu_log_mask(LOG_UNIMP, "nibble_mask[0] = 0x%x\n",
+ sparing_maint_pi->nibble_mask[0]);
+ qemu_log_mask(LOG_UNIMP, "nibble_mask[1] = 0x%x\n",
+ sparing_maint_pi->nibble_mask[1]);
+ qemu_log_mask(LOG_UNIMP, "nibble_mask[2] = 0x%x\n",
+ sparing_maint_pi->nibble_mask[2]);
+ qemu_log_mask(LOG_UNIMP, "bank_group = %u\n",
+ sparing_maint_pi->bank_group);
+ qemu_log_mask(LOG_UNIMP, "bank = %u\n", sparing_maint_pi->bank);
+ qemu_log_mask(LOG_UNIMP, "row[0] = 0x%x\n", sparing_maint_pi->row[0]);
+ qemu_log_mask(LOG_UNIMP, "row[1] = 0x%x\n", sparing_maint_pi->row[1]);
+ qemu_log_mask(LOG_UNIMP, "row[2] = 0x%x\n", sparing_maint_pi->row[2]);
+ qemu_log_mask(LOG_UNIMP, "column = %u\n", sparing_maint_pi->column);
+ qemu_log_mask(LOG_UNIMP, "sub_channel = %u\n",
+ sparing_maint_pi->sub_channel);
+
+ switch (sub_class) {
+ case 0: /* Cacheline Memory Sparing */
+ qemu_log("Cacheline Memory Sparing\n");
+ return CXL_MBOX_SUCCESS;
+ case 1: /* Row Memory Sparing */
+ qemu_log("Row Memory Sparing\n");
+ return CXL_MBOX_SUCCESS;
+ case 2: /* Bank Memory Sparing */
+ qemu_log("Bank Memory Sparing\n");
+ return CXL_MBOX_SUCCESS;
+ case 3: /* Rank Memory Sparing */
+ qemu_log("Rank Memory Sparing\n");
+ return CXL_MBOX_SUCCESS;
+ default:
+ return CXL_MBOX_UNSUPPORTED;
+ }
+}
+
/* CXL r3.2 section 8.2.10.7.1 - Perform Maintenance (Opcode 0600h) */
#define MAINTENANCE_PPR_QUERY_RESOURCES BIT(0)
+#define MAINTENANCE_MEM_SPARING_QUERY_RESOURCES BIT(0)
static CXLRetCode cmd_media_perform_maintenance(const struct cxl_cmd *cmd,
uint8_t *payload_in, size_t len_in,
@@ -1724,6 +1999,7 @@ static CXLRetCode cmd_media_perform_maintenance(const struct cxl_cmd *cmd,
uint64_t dpa;
uint8_t nibble_mask[3];
} QEMU_PACKED ppr;
+ CXLMemSparingMaintInPayload mem_sparing_pi;
};
} QEMU_PACKED *maint_in = (void *)payload_in;
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
@@ -1750,6 +2026,25 @@ static CXLRetCode cmd_media_perform_maintenance(const struct cxl_cmd *cmd,
}
break;
case 2:
+ if (maint_in->ppr.flags & MAINTENANCE_MEM_SPARING_QUERY_RESOURCES) {
+ /*
+ * CXL r3.2 sect 8.2.10.7.1.4 - Memory Sparing Maintenance Operation
+ * TODO: Produce Memory Sparing Event record to report resource
+ * availability if needed. Not sure how to support this.
+ */
+ return CXL_MBOX_SUCCESS;
+ }
+
+ switch (maint_in->subclass) {
+ case 0: /* Cacheline Memory Sparing */
+ case 1: /* Row Memory Sparing */
+ case 2: /* Bank Memory Sparing */
+ case 3: /* Rank Memory Sparing */
+ return cxl_perform_mem_sparing(ct3d, maint_in->subclass,
+ &maint_in->mem_sparing_pi);
+ default:
+ return CXL_MBOX_INVALID_INPUT;
+ }
case 3:
return CXL_MBOX_UNSUPPORTED;
default:
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 965ad3402d..b1bafd6c67 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1229,6 +1229,50 @@ void ct3_realize(PCIDevice *pci_dev, Error **errp)
.hppr_op_mode = 0
};
+ /* Set default value for Cacheline Memory Sparing attributes */
+ ct3d->cacheline_sparing_attrs = (CXLMemSparingReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_SPARING_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_CACHELINE_SPARING_MAINT_SUBCLASS,
+ .restriction_flags = CXL_MEMDEV_HARD_SPARING_SUPPORT_FLAG |
+ CXL_MEMDEV_SOFT_SPARING_SUPPORT_FLAG,
+ };
+
+ /* Set default value for Row Memory Sparing attributes */
+ ct3d->row_sparing_attrs = (CXLMemSparingReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_SPARING_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_ROW_SPARING_MAINT_SUBCLASS,
+ .restriction_flags = CXL_MEMDEV_HARD_SPARING_SUPPORT_FLAG |
+ CXL_MEMDEV_SOFT_SPARING_SUPPORT_FLAG,
+ };
+
+ /* Set default value for Bank Memory Sparing attributes */
+ ct3d->bank_sparing_attrs = (CXLMemSparingReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_SPARING_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_BANK_SPARING_MAINT_SUBCLASS,
+ .restriction_flags = CXL_MEMDEV_HARD_SPARING_SUPPORT_FLAG |
+ CXL_MEMDEV_SOFT_SPARING_SUPPORT_FLAG,
+ };
+
+ /* Set default value for Rank Memory Sparing attributes */
+ ct3d->rank_sparing_attrs = (CXLMemSparingReadAttrs) {
+ .max_maint_latency = 0x5, /* 100 ms */
+ .op_caps = 0, /* require host involvement */
+ .op_mode = 0,
+ .maint_op_class = CXL_MEMDEV_SPARING_MAINT_CLASS,
+ .maint_op_subclass = CXL_MEMDEV_RANK_SPARING_MAINT_SUBCLASS,
+ .restriction_flags = CXL_MEMDEV_HARD_SPARING_SUPPORT_FLAG |
+ CXL_MEMDEV_SOFT_SPARING_SUPPORT_FLAG,
+ };
+
return;
err_release_cdat:
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index b0e13b02b5..34614792a3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -645,6 +645,37 @@ typedef struct CXLMemECSWriteAttrs {
CXLMemECSFRUWriteAttrs fru_attrs[CXL_ECS_NUM_MEDIA_FRUS];
} QEMU_PACKED CXLMemECSWriteAttrs;
+/* CXL Memory Sparing Repair control attributes */
+#define CXL_MEMDEV_SPARING_MAINT_CLASS 0x2
+#define CXL_MEMDEV_CACHELINE_SPARING_MAINT_SUBCLASS 0x0
+#define CXL_MEMDEV_ROW_SPARING_MAINT_SUBCLASS 0x1
+#define CXL_MEMDEV_BANK_SPARING_MAINT_SUBCLASS 0x2
+#define CXL_MEMDEV_RANK_SPARING_MAINT_SUBCLASS 0x3
+
+/*
+ * CXL r3.2 section 8.2.10.7.2.3, Table 8-134 and 8-135:
+ * Memory Sparing Feature Readable/Writable Attributes
+ */
+typedef struct CXLMemSparingReadAttrs {
+ uint8_t max_maint_latency;
+ uint16_t op_caps;
+ uint16_t op_mode;
+ uint8_t maint_op_class;
+ uint8_t maint_op_subclass;
+ uint8_t rsvd[10];
+ uint16_t restriction_flags;
+} QEMU_PACKED CXLMemSparingReadAttrs;
+
+typedef struct CXLMemSparingWriteAttrs {
+ uint16_t op_mode;
+} QEMU_PACKED CXLMemSparingWriteAttrs;
+
+#define CXL_MEMDEV_SPARING_GET_FEATURE_VERSION 0x01
+#define CXL_MEMDEV_SPARING_SET_FEATURE_VERSION 0x01
+#define CXL_MEMDEV_SPARING_SAFE_IN_USE_FLAG BIT(0)
+#define CXL_MEMDEV_HARD_SPARING_SUPPORT_FLAG BIT(1)
+#define CXL_MEMDEV_SOFT_SPARING_SUPPORT_FLAG BIT(2)
+
#define DCD_MAX_NUM_REGION 8
typedef struct CXLDCExtentRaw {
@@ -792,6 +823,15 @@ struct CXLType3Dev {
/* ECS control attributes */
CXLMemECSReadAttrs ecs_attrs;
CXLMemECSWriteAttrs ecs_wr_attrs;
+ /* Memory Sparing control attributes */
+ CXLMemSparingReadAttrs cacheline_sparing_attrs;
+ CXLMemSparingWriteAttrs cacheline_sparing_wr_attrs;
+ CXLMemSparingReadAttrs row_sparing_attrs;
+ CXLMemSparingWriteAttrs row_sparing_wr_attrs;
+ CXLMemSparingReadAttrs bank_sparing_attrs;
+ CXLMemSparingWriteAttrs bank_sparing_wr_attrs;
+ CXLMemSparingReadAttrs rank_sparing_attrs;
+ CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
struct dynamic_capacity {
HostMemoryBackend *host_dc;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 7/7] hw/cxl: Add emulation for memory sparing control feature
2025-06-19 15:16 ` [PATCH v2 7/7] hw/cxl: Add emulation for memory sparing control feature shiju.jose--- via
@ 2025-06-20 14:48 ` Jonathan Cameron via
0 siblings, 0 replies; 14+ messages in thread
From: Jonathan Cameron via @ 2025-06-20 14:48 UTC (permalink / raw)
To: shiju.jose; +Cc: qemu-devel, linux-cxl, tanxiaofei, prime.zeng, linuxarm
On Thu, 19 Jun 2025 16:16:19 +0100
<shiju.jose@huawei.com> wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
>
> Memory sparing is defined as a repair function that replaces a portion of
> memory with a portion of functional memory at that same DPA. The subclasses
> for this operation vary in terms of the scope of the sparing being
> performed. The Cacheline sparing subclass refers to a sparing action that
> can replace a full cacheline. Row sparing is provided as an alternative to
> PPR sparing functions and its scope is that of a single DDR row. Bank
> sparing allows an entire bank to be replaced. Rank sparing is defined as
> an operation in which an entire DDR rank is replaced.
>
> Memory sparing maintenance operations may be supported by CXL devices
> that implement CXL.mem protocol. A sparing maintenance operation requests
> the CXL device to perform a repair operation on its media.
> For example, a CXL device with DRAM components that support memory sparing
> features may implement sparing Maintenance operations.
>
> The host may issue a query command by setting Query Resources flag in the
> Input Payload (CXL Spec 3.2 Table 8-120) to determine availability of
> sparing resources for a given address. In response to a query request,
> the device shall report the resource availability by producing the Memory
> Sparing Event Record (CXL Spec 3.2 Table 8-60) in which the Channel, Rank,
> Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields are a copy
> of the values specified in the request.
>
> During the execution of a sparing maintenance operation, a CXL memory device:
> - May or may not retain data
> - May or may not be able to process CXL.mem requests correctly.
> These CXL memory device capabilities are specified by restriction flags
> in the memory sparing feature readable attributes.
>
> When a CXL device identifies error on a memory component, the device
> may inform the host about the need for a memory sparing maintenance
> operation by using DRAM event record, where the 'maintenance needed' flag
> may set. The event record contains some of the DPA, Channel, Rank,
> Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields that
> should be repaired. The userspace tool requests for maintenance operation
> if the 'maintenance needed' flag set in the CXL DRAM error record.
>
> CXL spec 3.2 section 8.2.10.7.2.3 describes the memory sparing feature
> discovery and configuration.
>
> CXL spec 3.2 section 8.2.10.7.1.4 describes the device's memory sparing
> maintenance operation feature.
>
> Add emulation for CXL memory device memory sparing control feature
> and memory sparing maintenance operation command.
>
> TODO: Following are the pending tasks, though not sure how to implement.
> 1. Add emulation for memory sparing maintenance operation.
At most wipe the data if advertising that it won't be retained.
No need to actually do anything.
> 2. On query, report memory sparing resource availability in a memory sparing
> event record if required in the future.
I'd go with a a per device per type set of counters.
Lets just say we have X of them on a device - once used up they are gone.
No need to worry too much on what X is. Just pick some values so we have
something to test against. 4 maybe enough for testing?
Some comments on previous patch feed through to here. A few more things inline.
Jonathan
>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> ---
> hw/cxl/cxl-mailbox-utils.c | 295 ++++++++++++++++++++++++++++++++++++
> hw/mem/cxl_type3.c | 44 ++++++
> include/hw/cxl/cxl_device.h | 40 +++++
> 3 files changed, 379 insertions(+)
>
>
> +typedef struct CXLMemSparingMaintInPayload {
> + uint8_t flags;
> + uint8_t channel;
> + uint8_t rank;
> + uint8_t nibble_mask[3];
> + uint8_t bank_group;
> + uint8_t bank;
> + uint8_t row[3];
> + uint16_t column;
> + uint8_t sub_channel;
> +} QEMU_PACKED CXLMemSparingMaintInPayload;
> +
> +static CXLRetCode cxl_perform_mem_sparing(CXLType3Dev *ct3d, uint8_t sub_class,
> + void *maint_pi)
> +{
> + CXLMemSparingMaintInPayload *sparing_maint_pi = (void *)maint_pi;
Odd spacing
> +
> + qemu_log_mask(LOG_UNIMP, "Memory Sparing Maintenance Input Payload...\n");
> + qemu_log_mask(LOG_UNIMP, "flags = %u\n", sparing_maint_pi->flags);
> + qemu_log_mask(LOG_UNIMP, "channel= %u\n", sparing_maint_pi->channel);
> + qemu_log_mask(LOG_UNIMP, "rank = %u\n", sparing_maint_pi->rank);
> + qemu_log_mask(LOG_UNIMP, "nibble_mask[0] = 0x%x\n",
> + sparing_maint_pi->nibble_mask[0]);
> + qemu_log_mask(LOG_UNIMP, "nibble_mask[1] = 0x%x\n",
> + sparing_maint_pi->nibble_mask[1]);
> + qemu_log_mask(LOG_UNIMP, "nibble_mask[2] = 0x%x\n",
> + sparing_maint_pi->nibble_mask[2]);
> + qemu_log_mask(LOG_UNIMP, "bank_group = %u\n",
> + sparing_maint_pi->bank_group);
> + qemu_log_mask(LOG_UNIMP, "bank = %u\n", sparing_maint_pi->bank);
> + qemu_log_mask(LOG_UNIMP, "row[0] = 0x%x\n", sparing_maint_pi->row[0]);
> + qemu_log_mask(LOG_UNIMP, "row[1] = 0x%x\n", sparing_maint_pi->row[1]);
> + qemu_log_mask(LOG_UNIMP, "row[2] = 0x%x\n", sparing_maint_pi->row[2]);
> + qemu_log_mask(LOG_UNIMP, "column = %u\n", sparing_maint_pi->column);
> + qemu_log_mask(LOG_UNIMP, "sub_channel = %u\n",
> + sparing_maint_pi->sub_channel);
LOG_UNIMP is a bit odd given there is nothing to do really.
> +
> + switch (sub_class) {
> + case 0: /* Cacheline Memory Sparing */
> + qemu_log("Cacheline Memory Sparing\n");
> + return CXL_MBOX_SUCCESS;
> + case 1: /* Row Memory Sparing */
> + qemu_log("Row Memory Sparing\n");
> + return CXL_MBOX_SUCCESS;
> + case 2: /* Bank Memory Sparing */
> + qemu_log("Bank Memory Sparing\n");
> + return CXL_MBOX_SUCCESS;
> + case 3: /* Rank Memory Sparing */
> + qemu_log("Rank Memory Sparing\n");
> + return CXL_MBOX_SUCCESS;
> + default:
> + return CXL_MBOX_UNSUPPORTED;
As previously - I think this is invalid parameter as the command is supported
just not the sub_class.
> + }
> +}
> +
^ permalink raw reply [flat|nested] 14+ messages in thread