From: Riana Tauro <riana.tauro@intel.com>
To: Sk Anirban <sk.anirban@intel.com>, <igt-dev@lists.freedesktop.org>
Cc: <anshuman.gupta@intel.com>, <badal.nilawar@intel.com>,
<karthik.poosa@intel.com>, <raag.jadav@intel.com>
Subject: Re: [i-g-t, v4, 2/3] tests/intel/xe_survivability: Add new test runtime survivability
Date: Thu, 11 Sep 2025 14:42:16 +0530 [thread overview]
Message-ID: <e42dcdfc-105d-42e7-9367-ba5cdb49686f@intel.com> (raw)
In-Reply-To: <20250910192031.1908588-7-sk.anirban@intel.com>
Hi Anirban
On 9/11/2025 12:50 AM, Sk Anirban wrote:
> Inject a CSC error through uevent to cause the Xe device to enter a wedged
> state. To return the device to a normal state, reload the driver, as
> the wedged state can only be resolved by rebinding/reprobing the driver.
>
> Check survivability mode to validate that CSC error injection correctly
> triggers the survivability state transition.
>
> v3: Update commit message (Riana)
> Modify the uevents interrupt handling (Riana)
> v4: Move test to xe_survivability (Riana)
>
> Related kernel patch series:
> https://patchwork.freedesktop.org/series/149756/
Remove this. Kernel patch is merged
>
> Signed-off-by: Sk Anirban <sk.anirban@intel.com>
> ---
> tests/intel/xe_survivability.c | 123 +++++++++++++++++++++++++++++++--
> 1 file changed, 119 insertions(+), 4 deletions(-)
>
> diff --git a/tests/intel/xe_survivability.c b/tests/intel/xe_survivability.c
> index 6fb13b391..0fbdcfdcf 100644
> --- a/tests/intel/xe_survivability.c
> +++ b/tests/intel/xe_survivability.c
> @@ -4,6 +4,7 @@
> */
> #include <dirent.h>
> #include <fcntl.h>
> +#include <libudev.h>
> #include <limits.h>
>
> #include "igt.h"
> @@ -12,7 +13,11 @@
> #include "igt_fs.h"
> #include "igt_kmod.h"
> #include "igt_sysfs.h"
> +#include "intel_allocator.h"
> +#include "xe/xe_gt.h"
> +#include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> +#include "xe/xe_spin.h"
>
> /**
> * TEST: Comprehensive run-time survivability mode testing
> @@ -22,10 +27,69 @@
> * Functionality: survivability mode
> * Description: validate runtime survivability mode functionality
> * Test category: functionality test
> + *
> + * SUBTEST: i2c-functionality
> + * Description: Validate i2c adapter functionality in survivability mode
> + *
> + * SUBTEST: runtime-survivability
> + * Description: Force Xe device wedged after injecting a failure in CSC
> + * to test runtime survivability mode
> */
>
> static char bus_addr[NAME_MAX];
>
> +static void ignore_wedged_in_dmesg(void)
> +{
> + /* this is needed for igt_runner so it will ignore it */
> + igt_emit_ignore_dmesg_regex("CRITICAL: Xe has declared device [0-9A-Fa-f:.]* as wedged"
> + "|GT[0-9A-Fa-f]*: reset failed .-ECANCELED"
> + "|GT[0-9A-Fa-f]*: Failed to submit"
> + "|Modules linked in:"
> + "|__pfx___drm_");
> +}
> +
> +static int check_survivability_and_uevents(int fd, struct udev_monitor *mon)
> +{
> + struct udev_device *device;
> + struct pci_device *pci_dev;
> + char path[PATH_MAX];
> + const char *dev_path;
> + const char *wedged;
> + int dirfd;
> +
> + pci_dev = igt_device_get_pci_device(fd);
> + snprintf(bus_addr, sizeof(bus_addr), "%04x:%02x:%02x.%01x",
> + pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func);
> + snprintf(path, PATH_MAX, "/sys/bus/pci/devices/%s/survivability_mode", bus_addr);
> + dirfd = open(path, O_RDONLY);
This is used by both tests. Make it a separate function
> +
> + if (mon) {
> + device = udev_monitor_receive_device(mon);
> + dev_path = udev_device_get_property_value(device, "DEVPATH");
> + wedged = udev_device_get_property_value(device, "WEDGED");
> + igt_cleanup_uevents(mon);
> +
> + igt_assert_f(wedged && !strcmp(wedged, "vendor-specific"),
> + "Expected WEDGED property to be 'vendor-specific', got '%s'",
> + wedged);
indentation
> +
> + igt_assert_f(dev_path && strstr(dev_path, bus_addr),
> + "Expected bus address '%s' to be part of DEVPATH '%s'",
> + bus_addr, dev_path);
> + }
> +
> + return dirfd;
> +}
> +
> +static void force_wedged_csc_error(int fd)
> +{
> + igt_debugfs_write(fd, "inject_csc_hw_error/probability", "100");
> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
> +
> + xe_force_gt_reset_sync(fd, 0);
> + sleep(1);
> +}
> +
> static int find_i2c_adapter(struct pci_device *pci_xe)
> {
> char device_path[PATH_MAX];
> @@ -70,10 +134,6 @@ static void set_survivability_mode(int configfs_device_fd, bool value)
> igt_kmod_bind("xe", bus_addr);
> }
>
> -/**
> - * SUBTEST: i2c-functionality
> - * Description: Validate i2c adapter functionality in survivability mode
> - */
why in this patch?
> static void test_i2c_functionality(int configfs_device_fd, struct pci_device *pci_xe)
> {
> int i2c_adapter_before = 1, i2c_adapter_after;
> @@ -122,10 +182,34 @@ static void set_bus_addr(int fd)
> pci_dev->domain, pci_dev->bus, pci_dev->dev, pci_dev->func);
> }
>
> +static void test_spinner_after_recovery(int fd)
> +{
> + struct drm_xe_engine_class_instance *hwe;
> + unsigned int exec_queue;
> + igt_spin_t *spin;
> + uint64_t ahnd;
> + uint32_t vm;
> +
> + xe_for_each_engine(fd, hwe) {
> + vm = xe_vm_create(fd, 0, 0);
> + exec_queue = xe_exec_queue_create(fd, vm, hwe, 0);
> + ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC);
> +
> + spin = igt_spin_new(fd, .ahnd = ahnd, .engine = exec_queue, .vm = vm);
> +
> + igt_spin_free(fd, spin);
> + xe_exec_queue_destroy(fd, exec_queue);
> + xe_vm_destroy(fd, vm);
> +
> + put_ahnd(ahnd);
> + }
you can just do a basic spin instead of all engines
static void spin_basic(int fd)
{
uint64_t ahnd;
igt_spin_t *spin;
ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC);
spin = igt_spin_new(fd, .ahnd = ahnd);
igt_spin_free(fd, spin);
put_ahnd(ahnd);
}
> +}
> +
> igt_main
> {
> int fd, configfs_fd, configfs_device_fd;
> struct pci_device *pci_xe;
> + char pci_slot[NAME_MAX];
>
> igt_fixture {
> fd = drm_open_driver(DRIVER_XE);
> @@ -134,6 +218,7 @@ igt_main
> configfs_fd = igt_configfs_open("xe");
> igt_require(configfs_fd != -1);
> configfs_device_fd = create_device_configfs_group(configfs_fd);
> + igt_device_get_pci_slot_name(fd, pci_slot);
> igt_require(IS_BATTLEMAGE(intel_get_drm_devid(fd)));
> }
>
> @@ -143,10 +228,40 @@ igt_main
> test_i2c_functionality(configfs_device_fd, pci_xe);
> }
>
> + igt_subtest("runtime-survivability") {
> + struct udev_monitor *mon;
> +
> + igt_require(igt_debugfs_exists(fd, "inject_csc_hw_error/probability",
> + O_RDWR));
> +
> + igt_assert_f(check_survivability_and_uevents(fd, NULL) < 0,
> + "survivability_mode sysfs available");
> + igt_debugfs_write(fd, "inject_csc_hw_error/verbose", "1");
> +
> + ignore_wedged_in_dmesg();
> + mon = igt_watch_uevents();
> + force_wedged_csc_error(fd);
> +
> + igt_assert_f(check_survivability_and_uevents(fd, mon) >= 0,
> + "survivability_mode sysfs not available");
> +
> + drm_close_driver(fd);
> + igt_kmod_rebind("xe", pci_slot);
> + fd = drm_open_driver(DRIVER_XE);
> + test_spinner_after_recovery(fd);
> + }
> +
> igt_fixture {
> + if (igt_debugfs_exists(fd, "inject_csc_hw_error/probability", O_RDWR)) {
> + igt_debugfs_write(fd, "inject_csc_hw_error/probability", "0");
> + igt_debugfs_write(fd, "inject_csc_hw_error/times", "1");
> + }
> +
> igt_fs_remove_dir(configfs_fd, bus_addr);
> close(configfs_device_fd);
> close(configfs_fd);
> drm_close_driver(fd);
> + igt_kmod_rebind("xe", pci_slot);
> }
> }
next prev parent reply other threads:[~2025-09-11 9:12 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 19:20 [i-g-t, v4, 0/3] tests/intel/xe_survivability: Add xe_survivability test Sk Anirban
2025-09-10 19:20 ` [i-g-t, v3, 1/3] tests/intel/xe_survivability: Add new xe_survivability test for i2c adapter Sk Anirban
2025-09-11 9:21 ` [i-g-t,v3,1/3] " Riana Tauro
2025-09-10 19:20 ` [i-g-t, v4, 2/3] tests/intel/xe_survivability: Add new test runtime survivability Sk Anirban
2025-09-11 9:12 ` Riana Tauro [this message]
2025-09-10 19:20 ` [i-g-t,v4,3/3] HAX: Add xe_survivability tests to fast feedback list Sk Anirban
2025-09-10 20:03 ` ✓ Xe.CI.BAT: success for tests/intel/xe_survivability: Add xe_survivability test Patchwork
2025-09-10 20:20 ` ✓ i915.CI.BAT: " Patchwork
2025-09-11 3:44 ` ✓ Xe.CI.Full: " Patchwork
2025-09-11 16:22 ` ✓ i915.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e42dcdfc-105d-42e7-9367-ba5cdb49686f@intel.com \
--to=riana.tauro@intel.com \
--cc=anshuman.gupta@intel.com \
--cc=badal.nilawar@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=karthik.poosa@intel.com \
--cc=raag.jadav@intel.com \
--cc=sk.anirban@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.