From: Francois Dugast <francois.dugast@intel.com>
To: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <igt-dev@lists.freedesktop.org>,
Lucas De Marchi <lucas.demarchi@intel.com>,
Matthew Brost <matthew.brost@intel.com>,
"Michal Wajdeczko" <michal.wajdeczko@intel.com>
Subject: Re: [PATCH i-g-t] tests/intel/xe_fault_injection: Add new test for fault injection
Date: Tue, 24 Sep 2024 11:16:05 +0200 [thread overview]
Message-ID: <ZvKDVRI53vnLuOfn@fdugast-desk> (raw)
In-Reply-To: <ZvGu8qdBeuP6XeUk@intel.com>
On Mon, Sep 23, 2024 at 02:09:54PM -0400, Rodrigo Vivi wrote:
> On Fri, Sep 20, 2024 at 03:19:13PM +0200, Francois Dugast wrote:
> > Use the kernel fault injection infrastructure to test error handling
> > of xe at probe time.
> >
> > Add the following test:
> > * "function-fault-injection"
> >
> > Signed-off-by: Francois Dugast <francois.dugast@intel.com>
> > Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> > Cc: Matthew Brost <matthew.brost@intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> > tests/intel/xe_fault_injection.c | 229 +++++++++++++++++++++++++++++++
> > tests/meson.build | 1 +
> > 2 files changed, 230 insertions(+)
> > create mode 100644 tests/intel/xe_fault_injection.c
> >
> > diff --git a/tests/intel/xe_fault_injection.c b/tests/intel/xe_fault_injection.c
> > new file mode 100644
> > index 000000000..86fb6029d
> > --- /dev/null
> > +++ b/tests/intel/xe_fault_injection.c
>
> perhaps we should name this xe_probe_faults.c ?
The intention for this file is to exercise the newly introduced use of
fault-inject.h in the driver. For now the scope is indeed limited to
probe time (hence the name of the only test in this file) but in the
future there is no reason to limit to only probe time.
>
> > @@ -0,0 +1,229 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +/**
> > + * TEST: Check fault injection
> > + * Category: Core
> > + * Mega feature: General Core features
> > + * Sub-category: driver
> > + * Test category: fault injection
> > + */
> > +
> > +#include <regex.h>
> > +
> > +#include "igt.h"
> > +#include "igt_device.h"
> > +#include "igt_kmod.h"
> > +#include "igt_sysfs.h"
> > +
> > +#define MAX_LINE_SIZE 1024
> > +#define PATH_FUNCTIONS_INJECTABLE "/sys/kernel/debug/fail_function/injectable"
> > +#define PATH_FUNCTIONS_INJECT "/sys/kernel/debug/fail_function/inject"
> > +#define PATH_FUNCTIONS_RETVAL "/sys/kernel/debug/fail_function/%s/retval"
> > +#define REGEX_XE_FUNCTIONS "^(.+)\\[xe\\]"
> > +#define INJECT_ERRNO -ENOMEM
> > +
> > +enum sysfs_action {
> > + SYSFS_BIND,
> > + SYSFS_UNBIND,
> > +};
> > +
> > +enum injection_list_action {
> > + INJECTION_LIST_ADD,
> > + INJECTION_LIST_REMOVE,
> > +};
> > +
> > +/*
> > + * The injectable file requires CONFIG_FUNCTION_ERROR_INJECTION in kernel.
> > + */
> > +static bool function_error_injection_enabled(void)
> > +{
> > + FILE *file = fopen(PATH_FUNCTIONS_INJECTABLE, "rw");
> > +
> > + if (file) {
> > + fclose(file);
> > + return true;
> > + }
> > +
> > + return false;
> > +}
> > +
> > +static int sysfs_do(enum sysfs_action action, const char pci_slot[])
> > +{
> > + int sysfs;
> > + int ret;
> > +
> > + sysfs = open("/sys/bus/pci/drivers/xe", O_DIRECTORY);
> > + igt_assert(sysfs);
> > +
> > + switch(action) {
> > + case SYSFS_BIND:
> > + ret = igt_sysfs_set(sysfs, "bind", pci_slot);
> > + break;
> > + case SYSFS_UNBIND:
> > + ret = igt_sysfs_set(sysfs, "unbind", pci_slot);
> > + break;
>
> cool and clean fn, but we should probably add it to a library place.
> And then reused in common places like xe_wedge...
Sure, will do.
>
> > + default:
> > + igt_assert(!"missing");
> > + }
> > +
> > + close(sysfs);
> > +
> > + return ret;
> > +}
> > +
> > +static void injection_list_do(enum injection_list_action action, char function_name[])
> > +{
> > + FILE *file_inject;
> > +
> > + file_inject = fopen(PATH_FUNCTIONS_INJECT, "w");
> > + igt_assert(file_inject);
> > +
> > + switch(action) {
> > + case INJECTION_LIST_ADD:
> > + fprintf(file_inject, "%s", function_name);
> > + break;
> > + case INJECTION_LIST_REMOVE:
> > + fprintf(file_inject, "!%s", function_name);
> > + break;
> > + default:
> > + igt_assert(!"missing");
> > + }
> > +
> > + fclose(file_inject);
> > +}
> > +
> > +/*
> > + * See https://docs.kernel.org/fault-injection/fault-injection.html#application-examples
> > + */
> > +static void setup_injection_fault(void)
> > +{
> > + FILE *file;
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/task-filter", "w");
> > + igt_assert(file);
> > + fprintf(file, "N");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/probability", "w");
> > + igt_assert(file);
> > + fprintf(file, "100");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/interval", "w");
> > + igt_assert(file);
> > + fprintf(file, "0");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/times", "w");
> > + igt_assert(file);
> > + fprintf(file, "-1");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/space", "w");
> > + igt_assert(file);
> > + fprintf(file, "0");
> > + fclose(file);
> > +
> > + file = fopen("/sys/kernel/debug/fail_function/verbose", "w");
> > + igt_assert(file);
> > + fprintf(file, "1");
> > + fclose(file);
> > +}
> > +
> > +static void cleanup_injection_fault(void)
> > +{
> > + FILE *file;
> > +
> > + file = fopen(PATH_FUNCTIONS_INJECT, "w");
> > + igt_assert(file);
> > + fprintf(file, "\n");
> > + fclose(file);
> > +}
> > +
> > +static void set_retval(char function_name[], long long retval)
> > +{
> > + FILE *file_retval;
> > + char file_path[MAX_LINE_SIZE];
> > +
> > + sprintf(file_path, PATH_FUNCTIONS_RETVAL, function_name);
> > +
> > + file_retval = fopen(file_path, "w");
> > + igt_assert(file_retval);
> > +
> > + fprintf(file_retval, "%#016llx", retval);
> > + fclose(file_retval);
> > +}
> > +
> > +static void inject_fault_try_bind(char pci_slot[], char function_name[])
> > +{
> > + igt_info("Injecting error \"%s\" (%d) in function \"%s\"\n",
> > + strerror(-INJECT_ERRNO), INJECT_ERRNO, function_name);
> > +
> > + injection_list_do(INJECTION_LIST_ADD, function_name);
> > + set_retval(function_name, INJECT_ERRNO);
> > + sysfs_do(SYSFS_BIND, pci_slot);
> > + igt_assert_eq(-errno, INJECT_ERRNO);
> > + injection_list_do(INJECTION_LIST_REMOVE, function_name);
> > +}
> > +
> > +/**
> > + * SUBTEST: function-fault-injection-during-probe
> > + * Description: inject an error in each injectable function then reprobe driver
> > + */
> > +static void
> > +function_fault_injection_during_probe(void)
> > +{
> > + FILE *file_injectable;
> > + char line[MAX_LINE_SIZE];
> > + char function_name[MAX_LINE_SIZE];
> > + regex_t regex;
> > + regmatch_t pmatch[2];
> > + char pci_slot[MAX_LINE_SIZE];
> > + int fd;
> > +
> > + fd = drm_open_driver(DRIVER_XE);
> > + igt_device_get_pci_slot_name(fd, pci_slot);
> > + drm_close_driver(fd);
> > +
> > + igt_assert_eq(regcomp(®ex, REGEX_XE_FUNCTIONS, REG_EXTENDED), 0);
> > +
> > + file_injectable = fopen(PATH_FUNCTIONS_INJECTABLE, "r");
> > + igt_assert(file_injectable);
> > +
> > + sysfs_do(SYSFS_UNBIND, pci_slot);
> > +
> > + /*
> > + * Iterate over each error injectable function of the xe module
> > + */
> > + while ((fgets(line, MAX_LINE_SIZE, file_injectable)) != NULL) {
> > + if (regexec(®ex, line, 2, pmatch, 0) == 0) {
> > + strcpy(function_name, line);
> > + function_name[pmatch[1].rm_eo - 1] = '\0';
> > + inject_fault_try_bind(pci_slot, function_name);
> > + }
>
> hmmm... will this really work 100% of the cases? what about reset and
> wedged cases? will they get listed and attempt the rebind?
>
> But well, the good part of this is that any kernel addition doesn't
> require a change in igt... then perhaps the name of this test case
> file is right indeed...
This test covers 100% of the cases introduced by [1] where fault-inject.h
is used instead of the i915 macro to detect issues at probe time. This
means only uses of the ALLOW_ERROR_INJECTION() macro for error injectable
functions [2], not other uses of fault-inject.h such as in reset and
wedged cases.
Yes it is dynamic, meaning no change is required in this test after adding
one new case of ALLOW_ERROR_INJECTION() in the driver, as long as it is
meant to fail at probe time with error INJECT_ERRNO.
With the current proposal [1], execution of this test looks like this:
Starting subtest: function-fault-injection-during-probe
Opened device: /dev/dri/card0
Injecting error "Cannot allocate memory" (-12) in function "wait_for_lmem_ready"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_device_create"
Injecting error "Cannot allocate memory" (-12) in function "xe_ggtt_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_ads_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_ct_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_log_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_pm_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_tile_init_early"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_uc_fw_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_wa_init"
Injecting error "Cannot allocate memory" (-12) in function "xe_wopcm_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_guc_relay_init"
Injecting error "Cannot allocate memory" (-12) in function "fault_inject_sriov_init"
Subtest function-fault-injection-during-probe: SUCCESS (2.532s)
[1] https://patchwork.freedesktop.org/series/138654/
[2] https://docs.kernel.org/fault-injection/fault-injection.html#error-injectable-functions
Francois
>
> > + }
> > +
> > + fclose(file_injectable);
> > + regfree(®ex);
> > +}
> > +
> > +igt_main
> > +{
> > + igt_fixture {
> > + igt_require(function_error_injection_enabled());
> > + setup_injection_fault();
> > + }
> > +
> > + igt_subtest("function-fault-injection") {
> > + function_fault_injection_during_probe();
> > + }
> > +
> > + igt_fixture {
> > + cleanup_injection_fault();
> > + igt_xe_driver_unload();
> > + igt_xe_driver_load(NULL);
> > + }
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 00556c9d6..cf0d4486e 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -293,6 +293,7 @@ intel_xe_progs = [
> > 'xe_exec_store',
> > 'xe_exec_threads',
> > 'xe_exercise_blt',
> > + 'xe_fault_injection',
> > 'xe_gpgpu_fill',
> > 'xe_gt_freq',
> > 'xe_huc_copy',
> > --
> > 2.43.0
> >
prev parent reply other threads:[~2024-09-24 9:16 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-20 13:19 [PATCH i-g-t] tests/intel/xe_fault_injection: Add new test for fault injection Francois Dugast
2024-09-20 13:36 ` ✗ Fi.CI.BUILD: failure for " Patchwork
2024-09-20 14:01 ` ✗ GitLab.Pipeline: warning " Patchwork
2024-09-23 18:09 ` [PATCH i-g-t] " Rodrigo Vivi
2024-09-24 9:16 ` Francois Dugast [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZvKDVRI53vnLuOfn@fdugast-desk \
--to=francois.dugast@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=lucas.demarchi@intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.wajdeczko@intel.com \
--cc=rodrigo.vivi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox