* [ndctl PATCH v3] test/cxl-poison.sh: test inject and clear poison by region offset
@ 2025-08-23 2:59 alison.schofield
2025-09-05 23:34 ` Dave Jiang
0 siblings, 1 reply; 3+ messages in thread
From: alison.schofield @ 2025-08-23 2:59 UTC (permalink / raw)
To: nvdimm, linux-cxl; +Cc: Alison Schofield
From: Alison Schofield <alison.schofield@intel.com>
The CXL kernel driver recently added support to inject and clear
poison in a region by specifying an offset. Add a test case to the
existing cxl-poison unit test that demonstrates how to use the new
debugfs attributes. Use the kernel trace log to validate the round
trip address translations.
SKIP, do not fail, if the new debugfs attributes are not present.
See the kernel ABI documentation for usage:
Documentation/ABI/testing/debugfs-cxl
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
Changes in v3:
Replace string compare with boolean value for expect_fail (Marc)
Add local declarations in new or modified funcs (Marc)
De-duplicate clear & poison funcs (Marc)
Remove stderr redirection (Marc)
Changes in v2:
Add test_poison_by_region_offset_negative set of test cases
test/cxl-poison.sh | 132 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 117 insertions(+), 15 deletions(-)
diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
index 6ed890bc666c..f941f3cbcffd 100644
--- a/test/cxl-poison.sh
+++ b/test/cxl-poison.sh
@@ -63,20 +63,58 @@ create_x2_region()
# When cxl-cli support for inject and clear arrives, replace
# the writes to /sys/kernel/debug with the new cxl commands.
+_do_poison_sysfs()
+{
+ local action="$1" dev="$2" addr="$3"
+ local expect_fail=${4:-false}
+
+ if "$expect_fail"; then
+ if echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"; then
+ echo "Expected ${action}_poison to fail for $addr"
+ err "$LINENO"
+ fi
+ else
+ echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"
+ fi
+}
+
inject_poison_sysfs()
{
- memdev="$1"
- addr="$2"
-
- echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
+ _do_poison_sysfs 'inject' "$@"
}
clear_poison_sysfs()
{
- memdev="$1"
- addr="$2"
+ _do_poison_sysfs 'clear' "$@"
+}
- echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
+check_trace_entry()
+{
+ local expected_region="$1"
+ local expected_hpa="$2"
+
+ local trace_line
+ trace_line=$(grep "cxl_poison" /sys/kernel/tracing/trace | tail -n 1)
+ if [[ -z "$trace_line" ]]; then
+ echo "No cxl_poison trace event found"
+ err "$LINENO"
+ fi
+
+ local trace_region trace_hpa
+ trace_region=$(echo "$trace_line" | grep -o 'region=[^ ]*' | cut -d= -f2)
+ trace_hpa=$(echo "$trace_line" | grep -o 'hpa=0x[0-9a-fA-F]\+' | cut -d= -f2)
+
+ if [[ "$trace_region" != "$expected_region" ]]; then
+ echo "Expected region $expected_region not found in trace"
+ echo "$trace_line"
+ err "$LINENO"
+ fi
+
+ if [[ "$trace_hpa" != "$expected_hpa" ]]; then
+ echo "Expected HPA $expected_hpa not found in trace"
+ echo "$trace_line"
+ err "$LINENO"
+ fi
}
validate_poison_found()
@@ -97,7 +135,7 @@ validate_poison_found()
fi
}
-test_poison_by_memdev()
+test_poison_by_memdev_by_dpa()
{
find_memdev
inject_poison_sysfs "$memdev" "0x40000000"
@@ -113,9 +151,8 @@ test_poison_by_memdev()
validate_poison_found "-m $memdev" 0
}
-test_poison_by_region()
+test_poison_by_region_by_dpa()
{
- create_x2_region
inject_poison_sysfs "$mem0" "0x40000000"
inject_poison_sysfs "$mem1" "0x40000000"
validate_poison_found "-r $region" 2
@@ -125,13 +162,78 @@ test_poison_by_region()
validate_poison_found "-r $region" 0
}
-# Turn tracing on. Note that 'cxl list --media-errors' toggles the tracing.
-# Turning it on here allows the test user to also view inject and clear
-# trace events.
+test_poison_by_region_offset()
+{
+ local base gran hpa1 hpa2
+ base=$(cat /sys/bus/cxl/devices/"$region"/resource)
+ gran=$(cat /sys/bus/cxl/devices/"$region"/interleave_granularity)
+
+ # Test two HPA addresses: base and base + granularity
+ # This hits the two memdevs in the region interleave.
+ hpa1=$(printf "0x%x" $((base)))
+ hpa2=$(printf "0x%x" $((base + gran)))
+
+ # Inject at the offset and check result using the hpa's
+ # ABI takes an offset, but recall the hpa to check trace event
+
+ inject_poison_sysfs "$region" 0
+ check_trace_entry "$region" "$hpa1"
+ inject_poison_sysfs "$region" "$gran"
+ check_trace_entry "$region" "$hpa2"
+ validate_poison_found "-r $region" 2
+
+ clear_poison_sysfs "$region" 0
+ check_trace_entry "$region" "$hpa1"
+ clear_poison_sysfs "$region" "$gran"
+ check_trace_entry "$region" "$hpa2"
+ validate_poison_found "-r $region" 0
+}
+
+test_poison_by_region_offset_negative()
+{
+ local region_size cache_size cache_offset exceed_offset large_offset
+ region_size=$(cat /sys/bus/cxl/devices/"$region"/size)
+ cache_size=0
+
+ # This case is a no-op until cxl-test ELC mocking arrives
+ # Try to get cache_size if the attribute exists
+ if [ -f "/sys/bus/cxl/devices/$region/cache_size" ]; then
+ cache_size=$(cat /sys/bus/cxl/devices/"$region"/cache_size)
+ fi
+
+ # Offset within extended linear cache (if cache_size > 0)
+ if [[ $cache_size -gt 0 ]]; then
+ cache_offset=$((cache_size - 1))
+ echo "Testing offset within cache: $cache_offset (cache_size: $cache_size)"
+ inject_poison_sysfs "$region" "$cache_offset" true
+ clear_poison_sysfs "$region" "$cache_offset" true
+ else
+ echo "Skipping cache test - cache_size is 0"
+ fi
+
+ # Offset exceeds region size
+ exceed_offset=$((region_size))
+ inject_poison_sysfs "$region" "$exceed_offset" true
+ clear_poison_sysfs "$region" "$exceed_offset" true
+
+ # Offset exceeds region size by a lot
+ large_offset=$((region_size * 2))
+ inject_poison_sysfs "$region" "$large_offset" true
+ clear_poison_sysfs "$region" "$large_offset" true
+}
+
+# Clear old trace events, enable cxl_poison, enable global tracing
+echo "" > /sys/kernel/tracing/trace
echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
+echo 1 > /sys/kernel/tracing/tracing_on
-test_poison_by_memdev
-test_poison_by_region
+test_poison_by_memdev_by_dpa
+create_x2_region
+test_poison_by_region_by_dpa
+[ -f "/sys/kernel/debug/cxl/$region/inject_poison" ] ||
+ do_skip "test cases requires inject by region kernel support"
+test_poison_by_region_offset
+test_poison_by_region_offset_negative
check_dmesg "$LINENO"
--
2.37.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [ndctl PATCH v3] test/cxl-poison.sh: test inject and clear poison by region offset
2025-08-23 2:59 [ndctl PATCH v3] test/cxl-poison.sh: test inject and clear poison by region offset alison.schofield
@ 2025-09-05 23:34 ` Dave Jiang
2025-09-11 16:53 ` Alison Schofield
0 siblings, 1 reply; 3+ messages in thread
From: Dave Jiang @ 2025-09-05 23:34 UTC (permalink / raw)
To: alison.schofield, nvdimm, linux-cxl
On 8/22/25 7:59 PM, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
>
> The CXL kernel driver recently added support to inject and clear
> poison in a region by specifying an offset. Add a test case to the
> existing cxl-poison unit test that demonstrates how to use the new
> debugfs attributes. Use the kernel trace log to validate the round
> trip address translations.
>
> SKIP, do not fail, if the new debugfs attributes are not present.
>
> See the kernel ABI documentation for usage:
> Documentation/ABI/testing/debugfs-cxl
>
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Not bash expert, but LGTM.
> ---
>
> Changes in v3:
> Replace string compare with boolean value for expect_fail (Marc)
> Add local declarations in new or modified funcs (Marc)
> De-duplicate clear & poison funcs (Marc)
> Remove stderr redirection (Marc)
>
> Changes in v2:
> Add test_poison_by_region_offset_negative set of test cases
>
>
> test/cxl-poison.sh | 132 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> index 6ed890bc666c..f941f3cbcffd 100644
> --- a/test/cxl-poison.sh
> +++ b/test/cxl-poison.sh
> @@ -63,20 +63,58 @@ create_x2_region()
> # When cxl-cli support for inject and clear arrives, replace
> # the writes to /sys/kernel/debug with the new cxl commands.
>
> +_do_poison_sysfs()
> +{
> + local action="$1" dev="$2" addr="$3"
> + local expect_fail=${4:-false}
> +
> + if "$expect_fail"; then
> + if echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"; then
> + echo "Expected ${action}_poison to fail for $addr"
> + err "$LINENO"
> + fi
> + else
> + echo "$addr" > "/sys/kernel/debug/cxl/$dev/${action}_poison"
> + fi
> +}
> +
> inject_poison_sysfs()
> {
> - memdev="$1"
> - addr="$2"
> -
> - echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/inject_poison
> + _do_poison_sysfs 'inject' "$@"
> }
>
> clear_poison_sysfs()
> {
> - memdev="$1"
> - addr="$2"
> + _do_poison_sysfs 'clear' "$@"
> +}
>
> - echo "$addr" > /sys/kernel/debug/cxl/"$memdev"/clear_poison
> +check_trace_entry()
> +{
> + local expected_region="$1"
> + local expected_hpa="$2"
> +
> + local trace_line
> + trace_line=$(grep "cxl_poison" /sys/kernel/tracing/trace | tail -n 1)
> + if [[ -z "$trace_line" ]]; then
> + echo "No cxl_poison trace event found"
> + err "$LINENO"
> + fi
> +
> + local trace_region trace_hpa
> + trace_region=$(echo "$trace_line" | grep -o 'region=[^ ]*' | cut -d= -f2)
> + trace_hpa=$(echo "$trace_line" | grep -o 'hpa=0x[0-9a-fA-F]\+' | cut -d= -f2)
> +
> + if [[ "$trace_region" != "$expected_region" ]]; then
> + echo "Expected region $expected_region not found in trace"
> + echo "$trace_line"
> + err "$LINENO"
> + fi
> +
> + if [[ "$trace_hpa" != "$expected_hpa" ]]; then
> + echo "Expected HPA $expected_hpa not found in trace"
> + echo "$trace_line"
> + err "$LINENO"
> + fi
> }
>
> validate_poison_found()
> @@ -97,7 +135,7 @@ validate_poison_found()
> fi
> }
>
> -test_poison_by_memdev()
> +test_poison_by_memdev_by_dpa()
> {
> find_memdev
> inject_poison_sysfs "$memdev" "0x40000000"
> @@ -113,9 +151,8 @@ test_poison_by_memdev()
> validate_poison_found "-m $memdev" 0
> }
>
> -test_poison_by_region()
> +test_poison_by_region_by_dpa()
> {
> - create_x2_region
> inject_poison_sysfs "$mem0" "0x40000000"
> inject_poison_sysfs "$mem1" "0x40000000"
> validate_poison_found "-r $region" 2
> @@ -125,13 +162,78 @@ test_poison_by_region()
> validate_poison_found "-r $region" 0
> }
>
> -# Turn tracing on. Note that 'cxl list --media-errors' toggles the tracing.
> -# Turning it on here allows the test user to also view inject and clear
> -# trace events.
> +test_poison_by_region_offset()
> +{
> + local base gran hpa1 hpa2
> + base=$(cat /sys/bus/cxl/devices/"$region"/resource)
> + gran=$(cat /sys/bus/cxl/devices/"$region"/interleave_granularity)
> +
> + # Test two HPA addresses: base and base + granularity
> + # This hits the two memdevs in the region interleave.
> + hpa1=$(printf "0x%x" $((base)))
> + hpa2=$(printf "0x%x" $((base + gran)))
> +
> + # Inject at the offset and check result using the hpa's
> + # ABI takes an offset, but recall the hpa to check trace event
> +
> + inject_poison_sysfs "$region" 0
> + check_trace_entry "$region" "$hpa1"
> + inject_poison_sysfs "$region" "$gran"
> + check_trace_entry "$region" "$hpa2"
> + validate_poison_found "-r $region" 2
> +
> + clear_poison_sysfs "$region" 0
> + check_trace_entry "$region" "$hpa1"
> + clear_poison_sysfs "$region" "$gran"
> + check_trace_entry "$region" "$hpa2"
> + validate_poison_found "-r $region" 0
> +}
> +
> +test_poison_by_region_offset_negative()
> +{
> + local region_size cache_size cache_offset exceed_offset large_offset
> + region_size=$(cat /sys/bus/cxl/devices/"$region"/size)
> + cache_size=0
> +
> + # This case is a no-op until cxl-test ELC mocking arrives
> + # Try to get cache_size if the attribute exists
> + if [ -f "/sys/bus/cxl/devices/$region/cache_size" ]; then
> + cache_size=$(cat /sys/bus/cxl/devices/"$region"/cache_size)
> + fi
> +
> + # Offset within extended linear cache (if cache_size > 0)
> + if [[ $cache_size -gt 0 ]]; then
> + cache_offset=$((cache_size - 1))
> + echo "Testing offset within cache: $cache_offset (cache_size: $cache_size)"
> + inject_poison_sysfs "$region" "$cache_offset" true
> + clear_poison_sysfs "$region" "$cache_offset" true
> + else
> + echo "Skipping cache test - cache_size is 0"
> + fi
> +
> + # Offset exceeds region size
> + exceed_offset=$((region_size))
> + inject_poison_sysfs "$region" "$exceed_offset" true
> + clear_poison_sysfs "$region" "$exceed_offset" true
> +
> + # Offset exceeds region size by a lot
> + large_offset=$((region_size * 2))
> + inject_poison_sysfs "$region" "$large_offset" true
> + clear_poison_sysfs "$region" "$large_offset" true
> +}
> +
> +# Clear old trace events, enable cxl_poison, enable global tracing
> +echo "" > /sys/kernel/tracing/trace
> echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +echo 1 > /sys/kernel/tracing/tracing_on
>
> -test_poison_by_memdev
> -test_poison_by_region
> +test_poison_by_memdev_by_dpa
> +create_x2_region
> +test_poison_by_region_by_dpa
> +[ -f "/sys/kernel/debug/cxl/$region/inject_poison" ] ||
> + do_skip "test cases requires inject by region kernel support"
> +test_poison_by_region_offset
> +test_poison_by_region_offset_negative
>
> check_dmesg "$LINENO"
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [ndctl PATCH v3] test/cxl-poison.sh: test inject and clear poison by region offset
2025-09-05 23:34 ` Dave Jiang
@ 2025-09-11 16:53 ` Alison Schofield
0 siblings, 0 replies; 3+ messages in thread
From: Alison Schofield @ 2025-09-11 16:53 UTC (permalink / raw)
To: Dave Jiang; +Cc: nvdimm, linux-cxl
On Fri, Sep 05, 2025 at 04:34:46PM -0700, Dave Jiang wrote:
>
>
> On 8/22/25 7:59 PM, alison.schofield@intel.com wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> >
> > The CXL kernel driver recently added support to inject and clear
> > poison in a region by specifying an offset. Add a test case to the
> > existing cxl-poison unit test that demonstrates how to use the new
> > debugfs attributes. Use the kernel trace log to validate the round
> > trip address translations.
> >
> > SKIP, do not fail, if the new debugfs attributes are not present.
> >
> > See the kernel ABI documentation for usage:
> > Documentation/ABI/testing/debugfs-cxl
> >
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Thanks for the review!
Applied https://github.com/pmem/ndctl/commits/pending/
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-09-11 16:53 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-23 2:59 [ndctl PATCH v3] test/cxl-poison.sh: test inject and clear poison by region offset alison.schofield
2025-09-05 23:34 ` Dave Jiang
2025-09-11 16:53 ` Alison Schofield
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox