* [PATCH v1 0/3] DRM RAS Fixes
@ 2026-05-14 20:28 Raag Jadav
2026-05-14 20:28 ` [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure Raag Jadav
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Raag Jadav @ 2026-05-14 20:28 UTC (permalink / raw)
To: intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, riana.tauro, maarten, airlied, simona, kuba,
Raag Jadav
Detailed description in commit message.
Raag Jadav (3):
drm/ras: Cancel and free message on get counter failure
drm/xe/drm_ras: Make counter allocation drm managed
drm/xe/drm_ras: Add per node cleanup action
drivers/gpu/drm/drm_ras.c | 5 +++-
drivers/gpu/drm/xe/xe_drm_ras.c | 47 +++++++++++++--------------------
2 files changed, 22 insertions(+), 30 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure
2026-05-14 20:28 [PATCH v1 0/3] DRM RAS Fixes Raag Jadav
@ 2026-05-14 20:28 ` Raag Jadav
2026-05-20 7:12 ` Tauro, Riana
2026-05-14 20:28 ` [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed Raag Jadav
2026-05-14 20:28 ` [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action Raag Jadav
2 siblings, 1 reply; 9+ messages in thread
From: Raag Jadav @ 2026-05-14 20:28 UTC (permalink / raw)
To: intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, riana.tauro, maarten, airlied, simona, kuba,
Raag Jadav
doit_reply_value() directly returns on get counter failure, which results
in stale sk_buff and genetlink header that aren't cleaned up. Fix it.
Fixes: c36218dc49f5 ("drm/ras: Introduce the DRM RAS infrastructure over generic netlink")
Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
drivers/gpu/drm/drm_ras.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index d6eab29a1394..262a75f0c493 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -207,8 +207,11 @@ static int doit_reply_value(struct genl_info *info, u32 node_id,
ret = get_node_error_counter(node_id, error_id,
&error_name, &value);
- if (ret)
+ if (ret) {
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
return ret;
+ }
ret = msg_reply_value(msg, error_id, error_name, value);
if (ret) {
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed
2026-05-14 20:28 [PATCH v1 0/3] DRM RAS Fixes Raag Jadav
2026-05-14 20:28 ` [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure Raag Jadav
@ 2026-05-14 20:28 ` Raag Jadav
2026-05-20 9:51 ` Tauro, Riana
2026-05-14 20:28 ` [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action Raag Jadav
2 siblings, 1 reply; 9+ messages in thread
From: Raag Jadav @ 2026-05-14 20:28 UTC (permalink / raw)
To: intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, riana.tauro, maarten, airlied, simona, kuba,
Raag Jadav
cleanup_node_param() is not registered in case of counter allocation
failure, which results in stale memory of previous node that isn't
cleaned up on unwind. Fix this using drm managed allocation, which is
guaranteed to be cleaned up on unwind.
Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
drivers/gpu/drm/xe/xe_drm_ras.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
index c21c8b428de6..89640ffb1c33 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.c
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -80,7 +80,7 @@ static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *x
struct xe_drm_ras_counter *counter;
int i;
- counter = kcalloc(DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
+ counter = drmm_kcalloc(&xe->drm, DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
if (!counter)
return ERR_PTR(-ENOMEM);
@@ -135,9 +135,6 @@ static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_err
{
struct drm_ras_node *node = &ras->node[severity];
- kfree(ras->info[severity]);
- ras->info[severity] = NULL;
-
kfree(node->device_name);
node->device_name = NULL;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action
2026-05-14 20:28 [PATCH v1 0/3] DRM RAS Fixes Raag Jadav
2026-05-14 20:28 ` [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure Raag Jadav
2026-05-14 20:28 ` [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed Raag Jadav
@ 2026-05-14 20:28 ` Raag Jadav
2026-05-20 7:23 ` Tauro, Riana
2 siblings, 1 reply; 9+ messages in thread
From: Raag Jadav @ 2026-05-14 20:28 UTC (permalink / raw)
To: intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, riana.tauro, maarten, airlied, simona, kuba,
Raag Jadav
cleanup_node_param() is not registered in case of counter allocation
failure, which results in stale memory of previous node that isn't
cleaned up on unwind. Add per node cleanup action which guarantees
cleanup on unwind and also simplifies the cleanup logic.
Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
drivers/gpu/drm/xe/xe_drm_ras.c | 42 +++++++++++++--------------------
1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
index 89640ffb1c33..40abde29a26f 100644
--- a/drivers/gpu/drm/xe/xe_drm_ras.c
+++ b/drivers/gpu/drm/xe/xe_drm_ras.c
@@ -131,14 +131,20 @@ static int assign_node_params(struct xe_device *xe, struct drm_ras_node *node,
return 0;
}
-static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_error_severity severity)
+static void cleanup_node_param(struct drm_ras_node *node)
{
- struct drm_ras_node *node = &ras->node[severity];
-
kfree(node->device_name);
node->device_name = NULL;
}
+static void cleanup_node(struct drm_device *drm, void *arg)
+{
+ struct drm_ras_node *node = arg;
+
+ drm_ras_node_unregister(node);
+ cleanup_node_param(node);
+}
+
static int register_nodes(struct xe_device *xe)
{
struct xe_drm_ras *ras = &xe->ras;
@@ -150,13 +156,19 @@ static int register_nodes(struct xe_device *xe)
ret = assign_node_params(xe, node, i);
if (ret) {
- cleanup_node_param(ras, i);
+ cleanup_node_param(node);
return ret;
}
ret = drm_ras_node_register(node);
if (ret) {
- cleanup_node_param(ras, i);
+ cleanup_node_param(node);
+ return ret;
+ }
+
+ ret = drmm_add_action_or_reset(&xe->drm, cleanup_node, node);
+ if (ret) {
+ cleanup_node(&xe->drm, node);
return ret;
}
}
@@ -164,20 +176,6 @@ static int register_nodes(struct xe_device *xe)
return 0;
}
-static void xe_drm_ras_unregister_nodes(struct drm_device *device, void *arg)
-{
- struct xe_device *xe = arg;
- struct xe_drm_ras *ras = &xe->ras;
- int i;
-
- for_each_error_severity(i) {
- struct drm_ras_node *node = &ras->node[i];
-
- drm_ras_node_unregister(node);
- cleanup_node_param(ras, i);
- }
-}
-
/**
* xe_drm_ras_init() - Initialize DRM RAS
* @xe: xe device instance
@@ -204,11 +202,5 @@ int xe_drm_ras_init(struct xe_device *xe)
return err;
}
- err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes, xe);
- if (err) {
- drm_err(&xe->drm, "Failed to add action for Xe DRM RAS (%pe)\n", ERR_PTR(err));
- return err;
- }
-
return 0;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure
2026-05-14 20:28 ` [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure Raag Jadav
@ 2026-05-20 7:12 ` Tauro, Riana
0 siblings, 0 replies; 9+ messages in thread
From: Tauro, Riana @ 2026-05-20 7:12 UTC (permalink / raw)
To: Raag Jadav, intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, maarten, airlied, simona, kuba
On 5/15/2026 1:58 AM, Raag Jadav wrote:
> doit_reply_value() directly returns on get counter failure, which results
> in stale sk_buff and genetlink header that aren't cleaned up. Fix it.
>
> Fixes: c36218dc49f5 ("drm/ras: Introduce the DRM RAS infrastructure over generic netlink")
> Signed-off-by: Raag Jadav <raag.jadav@intel.com>
> ---
> drivers/gpu/drm/drm_ras.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
> index d6eab29a1394..262a75f0c493 100644
> --- a/drivers/gpu/drm/drm_ras.c
> +++ b/drivers/gpu/drm/drm_ras.c
> @@ -207,8 +207,11 @@ static int doit_reply_value(struct genl_info *info, u32 node_id,
>
> ret = get_node_error_counter(node_id, error_id,
> &error_name, &value);
> - if (ret)
> + if (ret) {
> + genlmsg_cancel(msg, hdr);
> + nlmsg_free(msg);
Use goto here. Its cleaner since it's used in multiple places
Thanks
Riana
> return ret;
> + }
>
> ret = msg_reply_value(msg, error_id, error_name, value);
> if (ret) {
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action
2026-05-14 20:28 ` [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action Raag Jadav
@ 2026-05-20 7:23 ` Tauro, Riana
2026-05-20 10:39 ` Raag Jadav
0 siblings, 1 reply; 9+ messages in thread
From: Tauro, Riana @ 2026-05-20 7:23 UTC (permalink / raw)
To: Raag Jadav, intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, maarten, airlied, simona, kuba
On 5/15/2026 1:58 AM, Raag Jadav wrote:
> cleanup_node_param() is not registered in case of counter allocation
> failure, which results in stale memory of previous node that isn't
> cleaned up on unwind.
It is registered.
ret = assign_node_params(xe, node, i);
if (ret)
cleanup_node_param(ras, i);
> Add per node cleanup action which guarantees
> cleanup on unwind and also simplifies the cleanup logic.
>
> Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> Signed-off-by: Raag Jadav <raag.jadav@intel.com>
> ---
> drivers/gpu/drm/xe/xe_drm_ras.c | 42 +++++++++++++--------------------
> 1 file changed, 17 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
> index 89640ffb1c33..40abde29a26f 100644
> --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> @@ -131,14 +131,20 @@ static int assign_node_params(struct xe_device *xe, struct drm_ras_node *node,
> return 0;
> }
>
> -static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_error_severity severity)
> +static void cleanup_node_param(struct drm_ras_node *node)
> {
> - struct drm_ras_node *node = &ras->node[severity];
> -
> kfree(node->device_name);
> node->device_name = NULL;
> }
>
> +static void cleanup_node(struct drm_device *drm, void *arg)
> +{
> + struct drm_ras_node *node = arg;
> +
> + drm_ras_node_unregister(node);
> + cleanup_node_param(node);
> +}
> +
> static int register_nodes(struct xe_device *xe)
> {
> struct xe_drm_ras *ras = &xe->ras;
> @@ -150,13 +156,19 @@ static int register_nodes(struct xe_device *xe)
>
> ret = assign_node_params(xe, node, i);
> if (ret) {
> - cleanup_node_param(ras, i);
> + cleanup_node_param(node);
At this point drm_ras node is not registered.
> return ret;
> }
>
> ret = drm_ras_node_register(node);
> if (ret) {
> - cleanup_node_param(ras, i);
> + cleanup_node_param(node);
Ditto
Thanks
Riana
> + return ret;
> + }
> +
> + ret = drmm_add_action_or_reset(&xe->drm, cleanup_node, node);
> + if (ret) {
> + cleanup_node(&xe->drm, node);
> return ret;
> }
> }
> @@ -164,20 +176,6 @@ static int register_nodes(struct xe_device *xe)
> return 0;
> }
>
> -static void xe_drm_ras_unregister_nodes(struct drm_device *device, void *arg)
> -{
> - struct xe_device *xe = arg;
> - struct xe_drm_ras *ras = &xe->ras;
> - int i;
> -
> - for_each_error_severity(i) {
> - struct drm_ras_node *node = &ras->node[i];
> -
> - drm_ras_node_unregister(node);
> - cleanup_node_param(ras, i);
> - }
> -}
> -
> /**
> * xe_drm_ras_init() - Initialize DRM RAS
> * @xe: xe device instance
> @@ -204,11 +202,5 @@ int xe_drm_ras_init(struct xe_device *xe)
> return err;
> }
>
> - err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes, xe);
> - if (err) {
> - drm_err(&xe->drm, "Failed to add action for Xe DRM RAS (%pe)\n", ERR_PTR(err));
> - return err;
> - }
> -
> return 0;
> }
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed
2026-05-14 20:28 ` [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed Raag Jadav
@ 2026-05-20 9:51 ` Tauro, Riana
2026-05-20 10:36 ` Raag Jadav
0 siblings, 1 reply; 9+ messages in thread
From: Tauro, Riana @ 2026-05-20 9:51 UTC (permalink / raw)
To: Raag Jadav, intel-xe, dri-devel, netdev
Cc: rodrigo.vivi, maarten, airlied, simona, kuba
On 5/15/2026 1:58 AM, Raag Jadav wrote:
> cleanup_node_param() is not registered in case of counter allocation
> failure, which results in stale memory of previous node that isn't
> cleaned up on unwind. Fix this using drm managed allocation, which is
> guaranteed to be cleaned up on unwind.
>
> Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> Signed-off-by: Raag Jadav <raag.jadav@intel.com>
> ---
> drivers/gpu/drm/xe/xe_drm_ras.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
> index c21c8b428de6..89640ffb1c33 100644
> --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> @@ -80,7 +80,7 @@ static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *x
> struct xe_drm_ras_counter *counter;
> int i;
>
> - counter = kcalloc(DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
> + counter = drmm_kcalloc(&xe->drm, DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
> if (!counter)
> return ERR_PTR(-ENOMEM);
The intention was to clean up nodes if there is a failure, to prevent
memory
from persisting throughout the drm device lifecycle. We actually
discussed this offline afair.
So there was a change from from v5 to v6 in the initial patch [v6,2/5]
drm/xe/xe_drm_ras: Add support for XE DRM RAS - Patchwork
<https://patchwork.freedesktop.org/patch/704873/?series=155188&rev=6>
Thanks
Riana
>
> @@ -135,9 +135,6 @@ static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_err
> {
> struct drm_ras_node *node = &ras->node[severity];
>
> - kfree(ras->info[severity]);
> - ras->info[severity] = NULL;
> -
> kfree(node->device_name);
> node->device_name = NULL;
> }
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed
2026-05-20 9:51 ` Tauro, Riana
@ 2026-05-20 10:36 ` Raag Jadav
0 siblings, 0 replies; 9+ messages in thread
From: Raag Jadav @ 2026-05-20 10:36 UTC (permalink / raw)
To: Tauro, Riana
Cc: intel-xe, dri-devel, netdev, rodrigo.vivi, maarten, airlied,
simona, kuba
On Wed, May 20, 2026 at 03:21:49PM +0530, Tauro, Riana wrote:
> On 5/15/2026 1:58 AM, Raag Jadav wrote:
> > cleanup_node_param() is not registered in case of counter allocation
> > failure, which results in stale memory of previous node that isn't
> > cleaned up on unwind. Fix this using drm managed allocation, which is
> > guaranteed to be cleaned up on unwind.
> >
> > Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> > Signed-off-by: Raag Jadav <raag.jadav@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_drm_ras.c | 5 +----
> > 1 file changed, 1 insertion(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
> > index c21c8b428de6..89640ffb1c33 100644
> > --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> > +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> > @@ -80,7 +80,7 @@ static struct xe_drm_ras_counter *allocate_and_copy_counters(struct xe_device *x
> > struct xe_drm_ras_counter *counter;
> > int i;
> > - counter = kcalloc(DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
> > + counter = drmm_kcalloc(&xe->drm, DRM_XE_RAS_ERR_COMP_MAX, sizeof(*counter), GFP_KERNEL);
> > if (!counter)
> > return ERR_PTR(-ENOMEM);
>
> The intention was to clean up nodes if there is a failure, to prevent
> memory
> from persisting throughout the drm device lifecycle. We actually discussed
> this offline afair.
>
> So there was a change from from v5 to v6 in the initial patch [v6,2/5]
> drm/xe/xe_drm_ras: Add support for XE DRM RAS - Patchwork
> <https://patchwork.freedesktop.org/patch/704873/?series=155188&rev=6>
Yes, the idea was to prevent the driver from updating stale counter
(which isn't exposed to the user). But rethinking about it now, this
can be achieved by simply keeping info as NULL I guess?
Raag
> > @@ -135,9 +135,6 @@ static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_err
> > {
> > struct drm_ras_node *node = &ras->node[severity];
> > - kfree(ras->info[severity]);
> > - ras->info[severity] = NULL;
> > -
> > kfree(node->device_name);
> > node->device_name = NULL;
> > }
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action
2026-05-20 7:23 ` Tauro, Riana
@ 2026-05-20 10:39 ` Raag Jadav
0 siblings, 0 replies; 9+ messages in thread
From: Raag Jadav @ 2026-05-20 10:39 UTC (permalink / raw)
To: Tauro, Riana
Cc: intel-xe, dri-devel, netdev, rodrigo.vivi, maarten, airlied,
simona, kuba
On Wed, May 20, 2026 at 12:53:52PM +0530, Tauro, Riana wrote:
> On 5/15/2026 1:58 AM, Raag Jadav wrote:
> > cleanup_node_param() is not registered in case of counter allocation
> > failure, which results in stale memory of previous node that isn't
> > cleaned up on unwind.
>
> It is registered.
>
> ret = assign_node_params(xe, node, i);
> if (ret)
> cleanup_node_param(ras, i);
Is that also true for previous node params (in case second node
registration fails)?
> > Add per node cleanup action which guarantees
> > cleanup on unwind and also simplifies the cleanup logic.
> >
> > Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> > Signed-off-by: Raag Jadav <raag.jadav@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_drm_ras.c | 42 +++++++++++++--------------------
> > 1 file changed, 17 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c b/drivers/gpu/drm/xe/xe_drm_ras.c
> > index 89640ffb1c33..40abde29a26f 100644
> > --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> > +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> > @@ -131,14 +131,20 @@ static int assign_node_params(struct xe_device *xe, struct drm_ras_node *node,
> > return 0;
> > }
> > -static void cleanup_node_param(struct xe_drm_ras *ras, const enum drm_xe_ras_error_severity severity)
> > +static void cleanup_node_param(struct drm_ras_node *node)
> > {
> > - struct drm_ras_node *node = &ras->node[severity];
> > -
> > kfree(node->device_name);
> > node->device_name = NULL;
> > }
> > +static void cleanup_node(struct drm_device *drm, void *arg)
> > +{
> > + struct drm_ras_node *node = arg;
> > +
> > + drm_ras_node_unregister(node);
> > + cleanup_node_param(node);
> > +}
> > +
> > static int register_nodes(struct xe_device *xe)
> > {
> > struct xe_drm_ras *ras = &xe->ras;
> > @@ -150,13 +156,19 @@ static int register_nodes(struct xe_device *xe)
> > ret = assign_node_params(xe, node, i);
> > if (ret) {
> > - cleanup_node_param(ras, i);
> > + cleanup_node_param(node);
>
> At this point drm_ras node is not registered.
Yes, and I don't believe we're attempting to unregister here :)
> > return ret;
> > }
> > ret = drm_ras_node_register(node);
> > if (ret) {
> > - cleanup_node_param(ras, i);
> > + cleanup_node_param(node);
>
> Ditto
Ditto.
Raag
> > + return ret;
> > + }
> > +
> > + ret = drmm_add_action_or_reset(&xe->drm, cleanup_node, node);
> > + if (ret) {
> > + cleanup_node(&xe->drm, node);
> > return ret;
> > }
> > }
> > @@ -164,20 +176,6 @@ static int register_nodes(struct xe_device *xe)
> > return 0;
> > }
> > -static void xe_drm_ras_unregister_nodes(struct drm_device *device, void *arg)
> > -{
> > - struct xe_device *xe = arg;
> > - struct xe_drm_ras *ras = &xe->ras;
> > - int i;
> > -
> > - for_each_error_severity(i) {
> > - struct drm_ras_node *node = &ras->node[i];
> > -
> > - drm_ras_node_unregister(node);
> > - cleanup_node_param(ras, i);
> > - }
> > -}
> > -
> > /**
> > * xe_drm_ras_init() - Initialize DRM RAS
> > * @xe: xe device instance
> > @@ -204,11 +202,5 @@ int xe_drm_ras_init(struct xe_device *xe)
> > return err;
> > }
> > - err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes, xe);
> > - if (err) {
> > - drm_err(&xe->drm, "Failed to add action for Xe DRM RAS (%pe)\n", ERR_PTR(err));
> > - return err;
> > - }
> > -
> > return 0;
> > }
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2026-05-20 10:39 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-14 20:28 [PATCH v1 0/3] DRM RAS Fixes Raag Jadav
2026-05-14 20:28 ` [PATCH v1 1/3] drm/ras: Cancel and free message on get counter failure Raag Jadav
2026-05-20 7:12 ` Tauro, Riana
2026-05-14 20:28 ` [PATCH v1 2/3] drm/xe/drm_ras: Make counter allocation drm managed Raag Jadav
2026-05-20 9:51 ` Tauro, Riana
2026-05-20 10:36 ` Raag Jadav
2026-05-14 20:28 ` [PATCH v1 3/3] drm/xe/drm_ras: Add per node cleanup action Raag Jadav
2026-05-20 7:23 ` Tauro, Riana
2026-05-20 10:39 ` Raag Jadav
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox