From: <shiju.jose@huawei.com>
To: <linux-edac@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
<mchehab@kernel.org>, <dave.jiang@intel.com>,
<dan.j.williams@intel.com>, <jonathan.cameron@huawei.com>,
<alison.schofield@intel.com>, <nifan.cxl@gmail.com>,
<vishal.l.verma@intel.com>, <ira.weiny@intel.com>,
<dave@stgolabs.net>
Cc: <linux-kernel@vger.kernel.org>, <linuxarm@huawei.com>,
<tanxiaofei@huawei.com>, <prime.zeng@hisilicon.com>,
<shiju.jose@huawei.com>
Subject: [PATCH v2 14/14] rasdaemon: ras-mc-ctl: Update logging of CXL memory module data to align with CXL spec rev 3.1
Date: Fri, 10 Jan 2025 12:26:40 +0000 [thread overview]
Message-ID: <20250110122641.1668-15-shiju.jose@huawei.com> (raw)
In-Reply-To: <20250110122641.1668-1-shiju.jose@huawei.com>
From: Shiju Jose <shiju.jose@huawei.com>
CXL spec 3.1 section 8.2.9.2.1.3 Table 8-47, Memory Module Event Record
has updated with following new fields and new info for Device Event Type
and Device Health Information fields.
1. Validity Flags
2. Component Identifier
3. Device Event Sub-Type
This update modifies ras-mc-ctl to parse and log CXL memory module event
data stored in the RAS SQLite database table, reflecting the
specification changes introduced in revision 3.1.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 46 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 41 insertions(+), 5 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 86ea48e..ba48660 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1439,11 +1439,12 @@ sub get_cxl_transaction_type
return $types[$_[0]];
}
+# CXL rev 3.1 section 8.2.9.2.1.3; Table 8-47
sub get_cxl_dev_event_type
{
my @types;
- if ($_[0] < 0 || $_[0] > 5) {
+ if ($_[0] < 0 || $_[0] > 8) {
return "unknown-type";
}
@@ -1452,15 +1453,37 @@ sub get_cxl_dev_event_type
"Life Used Change",
"Temperature Change",
"Data Path Error",
- "LSA Error");
+ "LSA Error",
+ "Unrecoverable Internal Sideband Bus Error",
+ "Memory Media FRU Error",
+ "Power Management Fault");
return $types[$_[0]];
}
+sub get_cxl_dev_event_sub_type
+{
+ my @types;
+
+ if ($_[0] < 0 || $_[0] > 3) {
+ return "unknown-type";
+ }
+
+ @types = ("Not Reported",
+ "Invalid Config Data",
+ "Unsupported Config Data",
+ "Unsupported Memory Media FRU");
+
+ return $types[$_[0]];
+}
+
+#CXL rev 3.1 section 8.2.9.9.3.1; Table 8-133
use constant {
CXL_DHI_HS_MAINTENANCE_NEEDED => 0x0001,
CXL_DHI_HS_PERFORMANCE_DEGRADED => 0x0002,
CXL_DHI_HS_HW_REPLACEMENT_NEEDED => 0x0004,
+ CXL_DHI_HS_HW_REPLACEMENT_NEEDED => 0x0004,
+ CXL_DHI_HS_MEM_CAPACITY_DEGRADED => 0x0008,
};
sub get_cxl_health_status_text
@@ -1477,6 +1500,9 @@ sub get_cxl_health_status_text
if ($flags & CXL_DHI_HS_HW_REPLACEMENT_NEEDED) {
push @out, (sprintf "\'REPLACEMENT_NEEDED\' ");
}
+ if ($flags & CXL_DHI_HS_MEM_CAPACITY_DEGRADED) {
+ push @out, (sprintf "\'MEM_CAPACITY_DEGRADED\' ");
+ }
return join (", ", @out);
}
@@ -1821,7 +1847,7 @@ sub errors
my ($hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $hdr_maint_op_sub_class, $data);
my ($dpa_flags, $descriptor, $mem_event_type, $mem_event_sub_type, $transaction_type, $channel, $rank, $device, $comp_id, $pldm_entity_id, $pldm_res_id);
my ($nibble_mask, $bank_group, $row, $column, $cor_mask);
- my ($event_type, $health_status, $media_status, $life_used, $dirty_shutdown_cnt, $cor_vol_err_cnt, $cor_per_err_cnt, $device_temp, $add_status);
+ my ($event_type, $event_sub_type, $health_status, $media_status, $life_used, $dirty_shutdown_cnt, $cor_vol_err_cnt, $cor_per_err_cnt, $device_temp, $add_status);
my ($sub_type, $sub_channel, $cme_threshold_ev_flags, $cme_count, $cvme_count);
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {});
@@ -2155,10 +2181,10 @@ sub errors
}
# CXL memory module errors
- $query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, hdr_maint_op_sub_class, event_type, health_status, media_status, life_used, dirty_shutdown_cnt, cor_vol_err_cnt, cor_per_err_cnt, device_temp, add_status from cxl_memory_module_event$conf{opt}{since} order by id";
+ $query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, hdr_maint_op_sub_class, event_type, health_status, media_status, life_used, dirty_shutdown_cnt, cor_vol_err_cnt, cor_per_err_cnt, device_temp, add_status, event_sub_type, comp_id, pldm_entity_id, pldm_resource_id from cxl_memory_module_event$conf{opt}{since} order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $hdr_maint_op_sub_class, $event_type, $health_status, $media_status, $life_used, $dirty_shutdown_cnt, $cor_vol_err_cnt, $cor_per_err_cnt, $device_temp, $add_status));
+ $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $hdr_maint_op_sub_class, $event_type, $health_status, $media_status, $life_used, $dirty_shutdown_cnt, $cor_vol_err_cnt, $cor_per_err_cnt, $device_temp, $add_status, $event_sub_type, $comp_id, $pldm_entity_id, $pldm_res_id));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $timestamp error: ";
@@ -2175,6 +2201,7 @@ sub errors
$out .= sprintf "hdr_maint_op_class=%u, ", $hdr_maint_op_class if (defined $hdr_maint_op_class && length $hdr_maint_op_class);
$out .= sprintf "hdr_maint_op_sub_class=%u, ", $hdr_maint_op_sub_class if (defined $hdr_maint_op_sub_class && length $hdr_maint_op_sub_class);
$out .= sprintf "event_type: %s, ", get_cxl_dev_event_type($event_type) if (defined $event_type && length $event_type);
+ $out .= sprintf "event_sub_type: %s, ", get_cxl_dev_event_sub_type($event_sub_type) if (defined $event_sub_type && length $event_sub_type);
$out .= sprintf "health_status: %s, ", get_cxl_health_status_text($health_status) if (defined $health_status && length $health_status);
$out .= sprintf "media_status: %s, ", get_cxl_media_status($media_status) if (defined $media_status && length $media_status);
$out .= sprintf "life_used=%u, ", $life_used if (defined $life_used && length $life_used);
@@ -2183,6 +2210,15 @@ sub errors
$out .= sprintf "cor_per_err_cnt=%u, ", $cor_per_err_cnt if (defined $cor_per_err_cnt && length $cor_per_err_cnt);
$out .= sprintf "device_temp=%u, ", $device_temp if (defined $device_temp && length $device_temp);
$out .= sprintf "add_status=%u ", $add_status if (defined $add_status && length $add_status);
+ if (defined $comp_id && length $comp_id) {
+ print_cxl_dev_id("component_id", $comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE, $out);
+ }
+ if (defined $pldm_entity_id && length $pldm_entity_id) {
+ print_cxl_dev_id("pldm_entity_id", $pldm_entity_id, CXL_EVENT_GEN_PLDM_ENTITY_ID_SIZE, $out);
+ }
+ if (defined $pldm_res_id && length $pldm_res_id) {
+ print_cxl_dev_id("pldm_resource_id", $pldm_res_id, CXL_EVENT_GEN_PLDM_RES_ID_SIZE, $out);
+ }
$out .= "\n";
}
if ($out ne "") {
--
2.43.0
prev parent reply other threads:[~2025-01-10 12:27 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-10 12:26 [PATCH v2 00/14] rasdaemon: cxl: Update CXL event logging and recording to CXL spec rev 3.1 shiju.jose
2025-01-10 12:26 ` [PATCH v2 01/14] rasdaemon: Fix for parsing error when trace event's format file is larger than PAGE_SIZE shiju.jose
2025-01-10 16:02 ` Jonathan Cameron
2025-01-10 16:11 ` Shiju Jose
2025-01-10 12:26 ` [PATCH v2 02/14] rasdaemon: cxl: Fix logging of memory event type of DRAM trace event shiju.jose
2025-01-10 12:26 ` [PATCH v2 03/14] rasdaemon: cxl: Fix mismatch in region field's name with kernel " shiju.jose
2025-01-10 12:26 ` [PATCH v2 04/14] rasdaemon: cxl: Add automatic indexing for storing CXL fields in SQLite database shiju.jose
2025-01-10 12:26 ` [PATCH v2 05/14] rasdaemon: cxl: Update common event to CXL spec rev 3.1 shiju.jose
2025-01-10 12:26 ` [PATCH v2 06/14] rasdaemon: cxl: Add Component Identifier formatting for " shiju.jose
2025-01-10 12:26 ` [PATCH v2 07/14] rasdaemon: cxl: Update CXL general media event to " shiju.jose
2025-01-10 12:26 ` [PATCH v2 08/14] rasdaemon: cxl: Update CXL DRAM " shiju.jose
2025-01-10 12:26 ` [PATCH v2 09/14] rasdaemon: cxl: Update memory module " shiju.jose
2025-01-10 12:26 ` [PATCH v2 10/14] rasdaemon: ras-mc-ctl: Fix logging of memory event type in CXL DRAM error table shiju.jose
2025-01-10 12:26 ` [PATCH v2 11/14] rasdaemon: ras-mc-ctl: Update logging of common event data to align with CXL spec rev 3.1 shiju.jose
2025-01-10 12:26 ` [PATCH v2 12/14] rasdaemon: ras-mc-ctl: Update logging of CXL general media " shiju.jose
2025-01-10 12:26 ` [PATCH v2 13/14] rasdaemon: ras-mc-ctl: Update logging of CXL DRAM " shiju.jose
2025-01-10 12:26 ` shiju.jose [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250110122641.1668-15-shiju.jose@huawei.com \
--to=shiju.jose@huawei.com \
--cc=alison.schofield@intel.com \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=ira.weiny@intel.com \
--cc=jonathan.cameron@huawei.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxarm@huawei.com \
--cc=mchehab@kernel.org \
--cc=nifan.cxl@gmail.com \
--cc=prime.zeng@hisilicon.com \
--cc=tanxiaofei@huawei.com \
--cc=vishal.l.verma@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox