Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v1 07/11] drm/xe/ras: Introduce correctable error handling
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

Add initial support for correctable error handling which is serviced
using system controller event. Currently we only log the errors in
dmesg but this serves as a foundation for RAS infrastructure and will
be further extended to facilitate other RAS features.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Mallesh Koujalagi <mallesh.koujalagi@intel.com>
---
 drivers/gpu/drm/xe/Makefile           |  1 +
 drivers/gpu/drm/xe/xe_ras.c           | 92 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_ras.h           | 15 +++++
 drivers/gpu/drm/xe/xe_ras_types.h     | 73 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_sysctrl_event.c |  3 +-
 5 files changed, 183 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/xe/xe_ras.c
 create mode 100644 drivers/gpu/drm/xe/xe_ras.h
 create mode 100644 drivers/gpu/drm/xe/xe_ras_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1c863b711ae9..22f17bd1082d 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -114,6 +114,7 @@ xe-y += xe_bb.o \
 	xe_pxp_submit.o \
 	xe_query.o \
 	xe_range_fence.o \
+	xe_ras.o \
 	xe_reg_sr.o \
 	xe_reg_whitelist.o \
 	xe_ring_ops.o \
diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
new file mode 100644
index 000000000000..08e91348c459
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include "xe_printk.h"
+#include "xe_ras.h"
+#include "xe_ras_types.h"
+#include "xe_sysctrl.h"
+#include "xe_sysctrl_event_types.h"
+
+/* Severity of detected errors  */
+enum xe_ras_severity {
+	XE_RAS_SEV_NOT_SUPPORTED = 0,
+	XE_RAS_SEV_CORRECTABLE,
+	XE_RAS_SEV_UNCORRECTABLE,
+	XE_RAS_SEV_INFORMATIONAL,
+	XE_RAS_SEV_MAX
+};
+
+/* Major IP blocks/components where errors can originate */
+enum xe_ras_component {
+	XE_RAS_COMP_NOT_SUPPORTED = 0,
+	XE_RAS_COMP_DEVICE_MEMORY,
+	XE_RAS_COMP_CORE_COMPUTE,
+	XE_RAS_COMP_RESERVED,
+	XE_RAS_COMP_PCIE,
+	XE_RAS_COMP_FABRIC,
+	XE_RAS_COMP_SOC_INTERNAL,
+	XE_RAS_COMP_MAX
+};
+
+static const char *const xe_ras_severities[] = {
+	[XE_RAS_SEV_NOT_SUPPORTED]		= "Not Supported",
+	[XE_RAS_SEV_CORRECTABLE]		= "Correctable Error",
+	[XE_RAS_SEV_UNCORRECTABLE]		= "Uncorrectable Error",
+	[XE_RAS_SEV_INFORMATIONAL]		= "Informational Error",
+};
+static_assert(ARRAY_SIZE(xe_ras_severities) == XE_RAS_SEV_MAX);
+
+static const char *const xe_ras_components[] = {
+	[XE_RAS_COMP_NOT_SUPPORTED]		= "Not Supported",
+	[XE_RAS_COMP_DEVICE_MEMORY]		= "Device Memory",
+	[XE_RAS_COMP_CORE_COMPUTE]		= "Core Compute",
+	[XE_RAS_COMP_RESERVED]			= "Reserved",
+	[XE_RAS_COMP_PCIE]			= "PCIe",
+	[XE_RAS_COMP_FABRIC]			= "Fabric",
+	[XE_RAS_COMP_SOC_INTERNAL]		= "SoC Internal",
+};
+static_assert(ARRAY_SIZE(xe_ras_components) == XE_RAS_COMP_MAX);
+
+static inline const char *sev_to_str(u8 sev)
+{
+	if (sev >= XE_RAS_SEV_MAX)
+		sev = XE_RAS_SEV_NOT_SUPPORTED;
+
+	return xe_ras_severities[sev];
+}
+
+static inline const char *comp_to_str(u8 comp)
+{
+	if (comp >= XE_RAS_COMP_MAX)
+		comp = XE_RAS_COMP_NOT_SUPPORTED;
+
+	return xe_ras_components[comp];
+}
+
+void xe_ras_counter_threshold_crossed(struct xe_device *xe,
+				      struct xe_sysctrl_event_response *response)
+{
+	struct xe_ras_threshold_crossed *pending = (void *)&response->data;
+	struct xe_ras_error_class *errors = pending->counters;
+	u32 counter_id, ncounters = pending->ncounters;
+
+	if (!ncounters || ncounters > XE_RAS_NUM_COUNTERS) {
+		xe_err(xe, "sysctrl: unexpected counter threshold crossed %u\n", ncounters);
+		return;
+	}
+
+	BUILD_BUG_ON(sizeof(response->data) < sizeof(*pending));
+	xe_warn(xe, "[RAS]: counter threshold crossed, %u new errors\n", ncounters);
+
+	for (counter_id = 0; counter_id < ncounters; counter_id++) {
+		u8 severity, component;
+
+		severity = errors[counter_id].common.severity;
+		component = errors[counter_id].common.component;
+
+		xe_warn(xe, "[RAS]: %s %s detected\n",
+			comp_to_str(component), sev_to_str(severity));
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h
new file mode 100644
index 000000000000..ea90593b62dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ras.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_RAS_H_
+#define _XE_RAS_H_
+
+struct xe_device;
+struct xe_sysctrl_event_response;
+
+void xe_ras_counter_threshold_crossed(struct xe_device *xe,
+				      struct xe_sysctrl_event_response *response);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ras_types.h b/drivers/gpu/drm/xe/xe_ras_types.h
new file mode 100644
index 000000000000..4e63c67f806a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ras_types.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_RAS_TYPES_H_
+#define _XE_RAS_TYPES_H_
+
+#include <linux/types.h>
+
+#define XE_RAS_NUM_COUNTERS			16
+
+/**
+ * struct xe_ras_error_common - Error fields that are common across all products
+ */
+struct xe_ras_error_common {
+	/** @severity: Error severity */
+	u8 severity;
+	/** @component: IP block where error originated */
+	u8 component;
+} __packed;
+
+/**
+ * struct xe_ras_error_unit - Error unit information
+ */
+struct xe_ras_error_unit {
+	/** @tile: Tile identifier */
+	u8 tile;
+	/** @instance: Instance identifier specific to IP */
+	u32 instance;
+} __packed;
+
+/**
+ * struct xe_ras_error_cause - Error cause information
+ */
+struct xe_ras_error_cause {
+	/** @cause: Cause/checker */
+	u32 cause;
+	/** @reserved: For future use */
+	u8 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_error_product - Error fields that are specific to the product
+ */
+struct xe_ras_error_product {
+	/** @unit: Unit within IP block */
+	struct xe_ras_error_unit unit;
+	/** @cause: Cause/checker */
+	struct xe_ras_error_cause cause;
+} __packed;
+
+/**
+ * struct xe_ras_error_class - Combines common and product-specific parts
+ */
+struct xe_ras_error_class {
+	/** @common: Common error type and component */
+	struct xe_ras_error_common common;
+	/** @product: Product-specific unit and cause */
+	struct xe_ras_error_product product;
+} __packed;
+
+/**
+ * struct xe_ras_threshold_crossed - Data for threshold crossed event
+ */
+struct xe_ras_threshold_crossed {
+	/** @ncounters: Number of error counters that crossed thresholds */
+	u32 ncounters;
+	/** @counters: Array of error counters that crossed threshold */
+	struct xe_ras_error_class counters[XE_RAS_NUM_COUNTERS];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_event.c b/drivers/gpu/drm/xe/xe_sysctrl_event.c
index 74163e0bafe2..e96af8be07a2 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_event.c
+++ b/drivers/gpu/drm/xe/xe_sysctrl_event.c
@@ -6,6 +6,7 @@
 #include "xe_device.h"
 #include "xe_irq.h"
 #include "xe_printk.h"
+#include "xe_ras.h"
 #include "xe_sysctrl.h"
 #include "xe_sysctrl_event_types.h"
 #include "xe_sysctrl_mailbox.h"
@@ -35,7 +36,7 @@ static void get_pending_event(struct xe_sysctrl *sc, struct xe_sysctrl_mailbox_c
 		}
 
 		if (response->event == XE_SYSCTRL_EVENT_THRESHOLD_CROSSED)
-			xe_warn(xe, "[RAS]: counter threshold crossed\n");
+			xe_ras_counter_threshold_crossed(xe, response);
 		else
 			xe_err(xe, "sysctrl: unexpected event %#x\n", response->event);
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 06/11] drm/xe/sysctrl: Add system controller event support
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

System controller reports different types of events to GFX endpoint for
different usecases, add initial support for them. This will be further
extended to service those usecases.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Mallesh Koujalagi <mallesh.koujalagi@intel.com>
---
 drivers/gpu/drm/xe/Makefile                   |  1 +
 drivers/gpu/drm/xe/xe_sysctrl.c               | 11 +++
 drivers/gpu/drm/xe/xe_sysctrl.h               |  1 +
 drivers/gpu/drm/xe/xe_sysctrl_event.c         | 86 +++++++++++++++++++
 drivers/gpu/drm/xe/xe_sysctrl_event_types.h   | 57 ++++++++++++
 drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h | 18 ++++
 drivers/gpu/drm/xe/xe_sysctrl_types.h         |  3 +
 7 files changed, 177 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_sysctrl_event.c
 create mode 100644 drivers/gpu/drm/xe/xe_sysctrl_event_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 3fceda259834..1c863b711ae9 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -126,6 +126,7 @@ xe-y += xe_bb.o \
 	xe_survivability_mode.o \
 	xe_sync.o \
 	xe_sysctrl.o \
+	xe_sysctrl_event.o \
 	xe_sysctrl_mailbox.o \
 	xe_tile.o \
 	xe_tile_sysfs.o \
diff --git a/drivers/gpu/drm/xe/xe_sysctrl.c b/drivers/gpu/drm/xe/xe_sysctrl.c
index 7de3e73bd8e0..6a7da5d2794a 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl.c
+++ b/drivers/gpu/drm/xe/xe_sysctrl.c
@@ -12,6 +12,7 @@
 #include "regs/xe_sysctrl_regs.h"
 #include "xe_device.h"
 #include "xe_mmio.h"
+#include "xe_pm.h"
 #include "xe_soc_remapper.h"
 #include "xe_sysctrl.h"
 #include "xe_sysctrl_mailbox.h"
@@ -39,6 +40,12 @@ static void sysctrl_fini(void *arg)
 
 static void xe_sysctrl_work(struct work_struct *work)
 {
+	struct xe_sysctrl *sc = container_of(work, struct xe_sysctrl, work);
+	struct xe_device *xe = sc_to_xe(sc);
+
+	guard(xe_pm_runtime)(xe);
+	guard(mutex)(&sc->work_lock);
+	xe_sysctrl_event(sc);
 }
 
 /**
@@ -74,6 +81,10 @@ int xe_sysctrl_init(struct xe_device *xe)
 	if (ret)
 		return ret;
 
+	ret = devm_mutex_init(xe->drm.dev, &sc->work_lock);
+	if (ret)
+		return ret;
+
 	xe->soc_remapper.set_sysctrl_region(xe, SYSCTRL_MAILBOX_INDEX);
 	xe_sysctrl_mailbox_init(sc);
 	INIT_WORK(&sc->work, xe_sysctrl_work);
diff --git a/drivers/gpu/drm/xe/xe_sysctrl.h b/drivers/gpu/drm/xe/xe_sysctrl.h
index f7469bfc9324..090dffb6d55f 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl.h
@@ -16,6 +16,7 @@ static inline struct xe_device *sc_to_xe(struct xe_sysctrl *sc)
 	return container_of(sc, struct xe_device, sc);
 }
 
+void xe_sysctrl_event(struct xe_sysctrl *sc);
 int xe_sysctrl_init(struct xe_device *xe);
 void xe_sysctrl_irq_handler(struct xe_device *xe, u32 master_ctl);
 void xe_sysctrl_pm_resume(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_event.c b/drivers/gpu/drm/xe/xe_sysctrl_event.c
new file mode 100644
index 000000000000..74163e0bafe2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sysctrl_event.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_irq.h"
+#include "xe_printk.h"
+#include "xe_sysctrl.h"
+#include "xe_sysctrl_event_types.h"
+#include "xe_sysctrl_mailbox.h"
+#include "xe_sysctrl_mailbox_types.h"
+
+static void get_pending_event(struct xe_sysctrl *sc, struct xe_sysctrl_mailbox_command *command)
+{
+	struct xe_sysctrl_event_response *response = command->data_out;
+	struct xe_device *xe = sc_to_xe(sc);
+	u32 count = XE_SYSCTRL_EVENT_FLOOD;
+	size_t len;
+	int ret;
+
+	do {
+		memset(response, 0, sizeof(*response));
+
+		ret = xe_sysctrl_send_command(sc, command, &len);
+		if (ret) {
+			xe_err(xe, "sysctrl: failed to get pending event %d\n", ret);
+			return;
+		}
+
+		if (len != sizeof(*response)) {
+			xe_err(xe, "sysctrl: unexpected event response length %zu (expected %zu)\n",
+			       len, sizeof(*response));
+			return;
+		}
+
+		if (response->event == XE_SYSCTRL_EVENT_THRESHOLD_CROSSED)
+			xe_warn(xe, "[RAS]: counter threshold crossed\n");
+		else
+			xe_err(xe, "sysctrl: unexpected event %#x\n", response->event);
+
+		if (!--count) {
+			xe_err(xe, "sysctrl: event flooding\n");
+			return;
+		}
+
+		xe_dbg(xe, "sysctrl: %u events pending\n", response->count);
+	} while (response->count);
+}
+
+static void event_request_prepare(struct xe_device *xe, struct xe_sysctrl_app_msg_hdr *header,
+				  struct xe_sysctrl_event_request *request)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	header->data = REG_FIELD_PREP(APP_HDR_GROUP_ID_MASK, XE_SYSCTRL_GROUP_GFSP) |
+		       REG_FIELD_PREP(APP_HDR_COMMAND_MASK, XE_SYSCTRL_CMD_GET_PENDING_EVENT);
+
+	request->vector = xe_device_has_msix(xe) ? XE_IRQ_DEFAULT_MSIX : 0;
+	request->fn = PCI_FUNC(pdev->devfn);
+}
+
+/**
+ * xe_sysctrl_event() - Handler for System Controller events
+ * @sc: System Controller instance
+ *
+ * Handle events generated by System Controller.
+ */
+void xe_sysctrl_event(struct xe_sysctrl *sc)
+{
+	struct xe_sysctrl_mailbox_command command = {};
+	struct xe_sysctrl_event_response response = {};
+	struct xe_sysctrl_event_request request = {};
+	struct xe_sysctrl_app_msg_hdr header = {};
+
+	xe_device_assert_mem_access(sc_to_xe(sc));
+	event_request_prepare(sc_to_xe(sc), &header, &request);
+
+	command.header = header;
+	command.data_in = &request;
+	command.data_in_len = sizeof(request);
+	command.data_out = &response;
+	command.data_out_len = sizeof(response);
+
+	get_pending_event(sc, &command);
+}
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_event_types.h b/drivers/gpu/drm/xe/xe_sysctrl_event_types.h
new file mode 100644
index 000000000000..4d444ba40b9b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sysctrl_event_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_SYSCTRL_EVENT_TYPES_H_
+#define _XE_SYSCTRL_EVENT_TYPES_H_
+
+#include <linux/types.h>
+
+#define XE_SYSCTRL_EVENT_DATA_LEN		59
+
+/* Modify as needed */
+#define XE_SYSCTRL_EVENT_FLOOD			16
+
+/**
+ * enum xe_sysctrl_event - Events reported by System Controller
+ *
+ * @XE_SYSCTRL_EVENT_THRESHOLD_CROSSED: Error counter threshold crossed
+ */
+enum xe_sysctrl_event {
+	XE_SYSCTRL_EVENT_THRESHOLD_CROSSED	= 0x01,
+};
+
+/**
+ * struct xe_sysctrl_event_request - Request structure for pending event
+ */
+struct xe_sysctrl_event_request {
+	/** @vector: MSI-X vector that was triggered */
+	u32 vector;
+	/** @fn: Function index (0-7) of PCIe device */
+	u32 fn:8;
+	/** @reserved: Reserved for future use */
+	u32 reserved:24;
+	/** @reserved2: Reserved for future use */
+	u32 reserved2[2];
+} __packed;
+
+/**
+ * struct xe_sysctrl_event_response - Response structure for pending event
+ */
+struct xe_sysctrl_event_response {
+	/** @count: Pending event count, decremented by fw on each response */
+	u32 count;
+	/** @event: Pending event type */
+	u32 event;
+	/** @timestamp: Timestamp of most recent event */
+	u64 timestamp;
+	/** @extended: Event has extended payload */
+	u32 extended:1;
+	/** @reserved: Reserved for future use */
+	u32 reserved:31;
+	/** @data: Generic event data */
+	u32 data[XE_SYSCTRL_EVENT_DATA_LEN];
+} __packed;
+
+#endif /* _XE_SYSCTRL_EVENT_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
index 89456aec6097..84d7c647e743 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
@@ -10,6 +10,24 @@
 
 #include "abi/xe_sysctrl_abi.h"
 
+/**
+ * enum xe_sysctrl_group - System Controller command groups
+ *
+ * @XE_SYSCTRL_GROUP_GFSP: GFSP group
+ */
+enum xe_sysctrl_group {
+	XE_SYSCTRL_GROUP_GFSP			= 0x01,
+};
+
+/**
+ * enum xe_sysctrl_gfsp_cmd - Commands supported by GFSP group
+ *
+ * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event
+ */
+enum xe_sysctrl_gfsp_cmd {
+	XE_SYSCTRL_CMD_GET_PENDING_EVENT	= 0x07,
+};
+
 /**
  * struct xe_sysctrl_mailbox_command - System Controller mailbox command
  */
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_types.h b/drivers/gpu/drm/xe/xe_sysctrl_types.h
index 5f408d6491ef..95359af691c9 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_types.h
@@ -31,6 +31,9 @@ struct xe_sysctrl {
 
 	/** @work: Pending events worker */
 	struct work_struct work;
+
+	/** @work_lock: Mutex protecting pending events */
+	struct mutex work_lock;
 };
 
 #endif
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 05/11] drm/xe/sysctrl: Add system controller interrupt handler
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

Add system controller interrupt handler which is denoted by 11th bit in
GFX master interrupt register. While at it, add worker for scheduling
system controller work.

Co-developed-by: Soham Purkait <soham.purkait@intel.com>
Signed-off-by: Soham Purkait <soham.purkait@intel.com>
Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Mallesh Koujalagi <mallesh.koujalagi@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_irq_regs.h |  1 +
 drivers/gpu/drm/xe/xe_irq.c           |  2 ++
 drivers/gpu/drm/xe/xe_sysctrl.c       | 35 +++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_sysctrl.h       |  1 +
 drivers/gpu/drm/xe/xe_sysctrl_types.h |  4 +++
 5 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
index 9d74f454d3ff..1d6b976c4de0 100644
--- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -22,6 +22,7 @@
 #define   DISPLAY_IRQ				REG_BIT(16)
 #define   SOC_H2DMEMINT_IRQ			REG_BIT(13)
 #define   I2C_IRQ				REG_BIT(12)
+#define   SYSCTRL_IRQ				REG_BIT(11)
 #define   GT_DW_IRQ(x)				REG_BIT(x)
 
 /*
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 9a775c6588dc..e9f0b3cad06d 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -24,6 +24,7 @@
 #include "xe_mmio.h"
 #include "xe_pxp.h"
 #include "xe_sriov.h"
+#include "xe_sysctrl.h"
 #include "xe_tile.h"
 
 /*
@@ -525,6 +526,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 				xe_heci_csc_irq_handler(xe, master_ctl);
 			xe_display_irq_handler(xe, master_ctl);
 			xe_i2c_irq_handler(xe, master_ctl);
+			xe_sysctrl_irq_handler(xe, master_ctl);
 			xe_mert_irq_handler(xe, master_ctl);
 			gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
 		}
diff --git a/drivers/gpu/drm/xe/xe_sysctrl.c b/drivers/gpu/drm/xe/xe_sysctrl.c
index 2bcef304eb9a..7de3e73bd8e0 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl.c
+++ b/drivers/gpu/drm/xe/xe_sysctrl.c
@@ -8,6 +8,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_irq_regs.h"
 #include "regs/xe_sysctrl_regs.h"
 #include "xe_device.h"
 #include "xe_mmio.h"
@@ -30,10 +31,16 @@
 static void sysctrl_fini(void *arg)
 {
 	struct xe_device *xe = arg;
+	struct xe_sysctrl *sc = &xe->sc;
 
+	disable_work_sync(&sc->work);
 	xe->soc_remapper.set_sysctrl_region(xe, 0);
 }
 
+static void xe_sysctrl_work(struct work_struct *work)
+{
+}
+
 /**
  * xe_sysctrl_init() - Initialize System Controller subsystem
  * @xe: xe device instance
@@ -55,12 +62,6 @@ int xe_sysctrl_init(struct xe_device *xe)
 	if (!xe->info.has_sysctrl)
 		return 0;
 
-	xe->soc_remapper.set_sysctrl_region(xe, SYSCTRL_MAILBOX_INDEX);
-
-	ret = devm_add_action_or_reset(xe->drm.dev, sysctrl_fini, xe);
-	if (ret)
-		return ret;
-
 	sc->mmio = devm_kzalloc(xe->drm.dev, sizeof(*sc->mmio), GFP_KERNEL);
 	if (!sc->mmio)
 		return -ENOMEM;
@@ -73,9 +74,29 @@ int xe_sysctrl_init(struct xe_device *xe)
 	if (ret)
 		return ret;
 
+	xe->soc_remapper.set_sysctrl_region(xe, SYSCTRL_MAILBOX_INDEX);
 	xe_sysctrl_mailbox_init(sc);
+	INIT_WORK(&sc->work, xe_sysctrl_work);
 
-	return 0;
+	return devm_add_action_or_reset(xe->drm.dev, sysctrl_fini, xe);
+}
+
+/**
+ * xe_sysctrl_irq_handler() - Handler for System Controller interrupts
+ * @xe: xe device instance
+ * @master_ctl: interrupt register
+ *
+ * Handle interrupts generated by System Controller.
+ */
+void xe_sysctrl_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+	struct xe_sysctrl *sc = &xe->sc;
+
+	if (!xe->info.has_sysctrl || !sc->work.func)
+		return;
+
+	if (master_ctl & SYSCTRL_IRQ)
+		schedule_work(&sc->work);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_sysctrl.h b/drivers/gpu/drm/xe/xe_sysctrl.h
index f3b0f3716b2f..f7469bfc9324 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl.h
@@ -17,6 +17,7 @@ static inline struct xe_device *sc_to_xe(struct xe_sysctrl *sc)
 }
 
 int xe_sysctrl_init(struct xe_device *xe);
+void xe_sysctrl_irq_handler(struct xe_device *xe, u32 master_ctl);
 void xe_sysctrl_pm_resume(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_types.h b/drivers/gpu/drm/xe/xe_sysctrl_types.h
index 8217f6befe70..5f408d6491ef 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_types.h
@@ -8,6 +8,7 @@
 
 #include <linux/mutex.h>
 #include <linux/types.h>
+#include <linux/workqueue_types.h>
 
 struct xe_mmio;
 
@@ -27,6 +28,9 @@ struct xe_sysctrl {
 
 	/** @phase_bit: Message boundary phase toggle bit (0 or 1) */
 	bool phase_bit;
+
+	/** @work: Pending events worker */
+	struct work_struct work;
 };
 
 #endif
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 04/11] drm/xe/uapi: Add additional error components to XE drm_ras
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

From: Riana Tauro <riana.tauro@intel.com>

Add additional Error components supported by XE RAS (Reliability,
Availability and Serviceability).

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
---
 include/uapi/drm/xe_drm.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 48e9f1fdb78d..50c80af4ad4e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -2589,6 +2589,12 @@ enum drm_xe_ras_error_component {
 	DRM_XE_RAS_ERR_COMP_CORE_COMPUTE = 1,
 	/** @DRM_XE_RAS_ERR_COMP_SOC_INTERNAL: SoC Internal Error */
 	DRM_XE_RAS_ERR_COMP_SOC_INTERNAL,
+	/** @DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY: Device Memory Error */
+	DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY,
+	/** @DRM_XE_RAS_ERR_COMP_PCIE: PCIe Subsystem Error */
+	DRM_XE_RAS_ERR_COMP_PCIE,
+	/** @DRM_XE_RAS_ERR_COMP_FABRIC: Fabric Subsystem Error */
+	DRM_XE_RAS_ERR_COMP_FABRIC,
 	/** @DRM_XE_RAS_ERR_COMP_MAX: Max Error */
 	DRM_XE_RAS_ERR_COMP_MAX	/* non-ABI */
 };
@@ -2606,7 +2612,10 @@ enum drm_xe_ras_error_component {
  */
 #define DRM_XE_RAS_ERROR_COMPONENT_NAMES {				\
 	[DRM_XE_RAS_ERR_COMP_CORE_COMPUTE] = "core-compute",		\
-	[DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal"		\
+	[DRM_XE_RAS_ERR_COMP_SOC_INTERNAL] = "soc-internal",		\
+	[DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY] = "device-memory",		\
+	[DRM_XE_RAS_ERR_COMP_PCIE] = "pcie",				\
+	[DRM_XE_RAS_ERR_COMP_FABRIC] = "fabric",			\
 }
 
 #if defined(__cplusplus)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 03/11] drm/ras: Introduce set-error-threshold
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

Add set-error-threshold command support which allows setting threshold
value of the error. Threshold in RAS context means the number of errors
the hardware is expected to accumulate before it raises them to software.
This is to have a fine grained control over error notifications that are
raised by the hardware.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
 Documentation/gpu/drm-ras.rst            |  9 +++++
 Documentation/netlink/specs/drm_ras.yaml | 12 ++++++
 drivers/gpu/drm/drm_ras.c                | 48 ++++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             | 14 +++++++
 drivers/gpu/drm/drm_ras_nl.h             |  2 +
 include/drm/drm_ras.h                    | 13 +++++++
 include/uapi/drm/drm_ras.h               |  1 +
 7 files changed, 99 insertions(+)

diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-ras.rst
index 6443dfd1677f..a819aa150604 100644
--- a/Documentation/gpu/drm-ras.rst
+++ b/Documentation/gpu/drm-ras.rst
@@ -54,6 +54,8 @@ User space tools can:
   ``node-id`` and ``error-id`` as parameters.
 * Query specific error threshold value with the ``get-error-threshold`` command, using both
   ``node-id`` and ``error-id`` as parameters.
+* Set specific error threshold value with the ``set-error-threshold`` command, using
+  ``node-id``, ``error-id`` and ``error-threshold`` as parameters.
 
 YAML-based Interface
 --------------------
@@ -109,3 +111,10 @@ Example: Query threshold value of a given error
 
     sudo ynl --family drm_ras --do get-error-threshold --json '{"node-id":0, "error-id":1}'
     {'error-id': 1, 'error-name': 'error_name1', 'error-threshold': 0}
+
+Example: Set threshold value of a given error
+
+.. code-block:: bash
+
+    sudo ynl --family drm_ras --do set-error-threshold --json '{"node-id":0, "error-id":1, "error-threshold":8}'
+    None
diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/netlink/specs/drm_ras.yaml
index 95a939fb987d..09824309cdff 100644
--- a/Documentation/netlink/specs/drm_ras.yaml
+++ b/Documentation/netlink/specs/drm_ras.yaml
@@ -150,3 +150,15 @@ operations:
             - error-id
             - error-name
             - error-threshold
+    -
+      name: set-error-threshold
+      doc: >-
+           Set threshold value of the error.
+      attribute-set: error-threshold-attrs
+      flags: [admin-perm]
+      do:
+        request:
+          attributes:
+            - node-id
+            - error-id
+            - error-threshold
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index d2d853d5d69c..e4ff6d87f824 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -41,6 +41,9 @@
  *    Userspace must provide Node ID and Error ID.
  *    Returns the threshold value of a specific error.
  *
+ * 4. SET_ERROR_THRESHOLD: Set threshold value of the error.
+ *    Userspace must provide Node ID, Error ID and Threshold value to be set.
+ *
  * Node registration:
  *
  * - drm_ras_node_register(): Registers a new node and assigns
@@ -72,6 +75,8 @@
  *   operation, fetching a counter value from a specific node.
  * - drm_ras_nl_get_error_threshold_doit(): Implements the GET_ERROR_THRESHOLD doit
  *   operation, fetching the threshold value of a specific error.
+ * - drm_ras_nl_set_error_threshold_doit(): Implements the SET_ERROR_THRESHOLD doit
+ *   operation, setting the threshold value of a specific error.
  */
 
 static DEFINE_XARRAY_ALLOC(drm_ras_xa);
@@ -184,6 +189,21 @@ static int get_node_error_threshold(u32 node_id, u32 error_id,
 	return node->query_error_threshold(node, error_id, name, value);
 }
 
+static int set_node_error_threshold(u32 node_id, u32 error_id, u32 value)
+{
+	struct drm_ras_node *node;
+
+	node = xa_load(&drm_ras_xa, node_id);
+	if (!node || !node->set_error_threshold)
+		return -ENOENT;
+
+	if (error_id < node->error_counter_range.first ||
+	    error_id > node->error_counter_range.last)
+		return -EINVAL;
+
+	return node->set_error_threshold(node, error_id, value);
+}
+
 static int msg_reply_counter_value(struct sk_buff *msg, u32 error_id,
 				   const char *error_name, u32 value)
 {
@@ -417,6 +437,34 @@ int drm_ras_nl_get_error_threshold_doit(struct sk_buff *skb,
 	return doit_reply_threshold_value(info, node_id, error_id);
 }
 
+/**
+ * drm_ras_nl_set_error_threshold_doit() - Set threshold value of the error
+ * @skb: Netlink message buffer
+ * @info: Generic Netlink info containing attributes of the request
+ *
+ * Extracts the node ID, error ID and threshold value from the netlink attributes
+ * and sets the threshold of the corresponding error.
+ *
+ * Return: 0 on success, or negative errno on failure.
+ */
+int drm_ras_nl_set_error_threshold_doit(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	u32 node_id, error_id, value;
+
+	if (!info->attrs ||
+	    GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID) ||
+	    GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID) ||
+	    GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD))
+		return -EINVAL;
+
+	node_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID]);
+	error_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID]);
+	value = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD]);
+
+	return set_node_error_threshold(node_id, error_id, value);
+}
+
 /**
  * drm_ras_node_register() - Register a new RAS node
  * @node: Node structure to register
diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
index 48e231734f4d..8b202d773dac 100644
--- a/drivers/gpu/drm/drm_ras_nl.c
+++ b/drivers/gpu/drm/drm_ras_nl.c
@@ -28,6 +28,13 @@ static const struct nla_policy drm_ras_get_error_threshold_nl_policy[DRM_RAS_A_E
 	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID] = { .type = NLA_U32, },
 };
 
+/* DRM_RAS_CMD_SET_ERROR_THRESHOLD - do */
+static const struct nla_policy drm_ras_set_error_threshold_nl_policy[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD + 1] = {
+	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID] = { .type = NLA_U32, },
+	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID] = { .type = NLA_U32, },
+	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD] = { .type = NLA_U32, },
+};
+
 /* Ops table for drm_ras */
 static const struct genl_split_ops drm_ras_nl_ops[] = {
 	{
@@ -56,6 +63,13 @@ static const struct genl_split_ops drm_ras_nl_ops[] = {
 		.maxattr	= DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
+	{
+		.cmd		= DRM_RAS_CMD_SET_ERROR_THRESHOLD,
+		.doit		= drm_ras_nl_set_error_threshold_doit,
+		.policy		= drm_ras_set_error_threshold_nl_policy,
+		.maxattr	= DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
 };
 
 struct genl_family drm_ras_nl_family __ro_after_init = {
diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
index 540fe22e2312..9db7f5d00201 100644
--- a/drivers/gpu/drm/drm_ras_nl.h
+++ b/drivers/gpu/drm/drm_ras_nl.h
@@ -20,6 +20,8 @@ int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
 					struct netlink_callback *cb);
 int drm_ras_nl_get_error_threshold_doit(struct sk_buff *skb,
 					struct genl_info *info);
+int drm_ras_nl_set_error_threshold_doit(struct sk_buff *skb,
+					struct genl_info *info);
 
 extern struct genl_family drm_ras_nl_family;
 
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
index 50cee70bd065..7a69821b8b78 100644
--- a/include/drm/drm_ras.h
+++ b/include/drm/drm_ras.h
@@ -71,6 +71,19 @@ struct drm_ras_node {
 	 */
 	int (*query_error_threshold)(struct drm_ras_node *node, u32 error_id,
 				     const char **name, u32 *val);
+	/**
+	 * @set_error_threshold:
+	 *
+	 * This callback is used by drm-ras to set threshold value of a specific
+	 * error.
+	 *
+	 * Driver should expect set_error_threshold() to be called with error_id
+	 * from `error_counter_range.first` to `error_counter_range.last`.
+	 *
+	 * Returns: 0 on success, negative error code on failure.
+	 */
+	int (*set_error_threshold)(struct drm_ras_node *node, u32 error_id,
+				   u32 val);
 
 	/** @priv: Driver private data */
 	void *priv;
diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
index 49c5ca497d73..8ff0311d0d63 100644
--- a/include/uapi/drm/drm_ras.h
+++ b/include/uapi/drm/drm_ras.h
@@ -52,6 +52,7 @@ enum {
 	DRM_RAS_CMD_LIST_NODES = 1,
 	DRM_RAS_CMD_GET_ERROR_COUNTER,
 	DRM_RAS_CMD_GET_ERROR_THRESHOLD,
+	DRM_RAS_CMD_SET_ERROR_THRESHOLD,
 
 	__DRM_RAS_CMD_MAX,
 	DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 02/11] drm/ras: Introduce get-error-threshold
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

Add get-error-threshold command support which allows querying threshold
value of the error. Threshold in RAS context means the number of errors
the hardware is expected to accumulate before it raises them to software.
This is to have a fine grained control over error notifications that are
raised by the hardware.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
 Documentation/gpu/drm-ras.rst            |   8 ++
 Documentation/netlink/specs/drm_ras.yaml |  37 ++++++++
 drivers/gpu/drm/drm_ras.c                | 103 +++++++++++++++++++++++
 drivers/gpu/drm/drm_ras_nl.c             |  13 +++
 drivers/gpu/drm/drm_ras_nl.h             |   2 +
 include/drm/drm_ras.h                    |  14 +++
 include/uapi/drm/drm_ras.h               |  11 +++
 7 files changed, 188 insertions(+)

diff --git a/Documentation/gpu/drm-ras.rst b/Documentation/gpu/drm-ras.rst
index 70b246a78fc8..6443dfd1677f 100644
--- a/Documentation/gpu/drm-ras.rst
+++ b/Documentation/gpu/drm-ras.rst
@@ -52,6 +52,8 @@ User space tools can:
   as a parameter.
 * Query specific error counter values with the ``get-error-counter`` command, using both
   ``node-id`` and ``error-id`` as parameters.
+* Query specific error threshold value with the ``get-error-threshold`` command, using both
+  ``node-id`` and ``error-id`` as parameters.
 
 YAML-based Interface
 --------------------
@@ -101,3 +103,9 @@ Example: Query an error counter for a given node
     sudo ynl --family drm_ras --do get-error-counter --json '{"node-id":0, "error-id":1}'
     {'error-id': 1, 'error-name': 'error_name1', 'error-value': 0}
 
+Example: Query threshold value of a given error
+
+.. code-block:: bash
+
+    sudo ynl --family drm_ras --do get-error-threshold --json '{"node-id":0, "error-id":1}'
+    {'error-id': 1, 'error-name': 'error_name1', 'error-threshold': 0}
diff --git a/Documentation/netlink/specs/drm_ras.yaml b/Documentation/netlink/specs/drm_ras.yaml
index 79af25dac3c5..95a939fb987d 100644
--- a/Documentation/netlink/specs/drm_ras.yaml
+++ b/Documentation/netlink/specs/drm_ras.yaml
@@ -69,6 +69,25 @@ attribute-sets:
         name: error-value
         type: u32
         doc: Current value of the requested error counter.
+  -
+    name: error-threshold-attrs
+    attributes:
+      -
+        name: node-id
+        type: u32
+        doc: Node ID targeted by this operation.
+      -
+        name: error-id
+        type: u32
+        doc: Unique identifier for a specific error within the node.
+      -
+        name: error-name
+        type: string
+        doc: Name of the error.
+      -
+        name: error-threshold
+        type: u32
+        doc: Threshold value of the error.
 
 operations:
   list:
@@ -113,3 +132,21 @@ operations:
             - node-id
         reply:
           attributes: *errorinfo
+    -
+      name: get-error-threshold
+      doc: >-
+           Retrieve threshold value of the error.
+           The response includes the id, the name, and current threshold
+           value of the error.
+      attribute-set: error-threshold-attrs
+      flags: [admin-perm]
+      do:
+        request:
+          attributes:
+            - node-id
+            - error-id
+        reply:
+          attributes:
+            - error-id
+            - error-name
+            - error-threshold
diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index 1f7435d60f11..d2d853d5d69c 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -37,6 +37,10 @@
  *    Returns all counters of a node if only Node ID is provided or specific
  *    error counters.
  *
+ * 3. GET_ERROR_THRESHOLD: Query threshold value of the error.
+ *    Userspace must provide Node ID and Error ID.
+ *    Returns the threshold value of a specific error.
+ *
  * Node registration:
  *
  * - drm_ras_node_register(): Registers a new node and assigns
@@ -66,6 +70,8 @@
  *   operation, fetching all counters from a specific node.
  * - drm_ras_nl_get_error_counter_doit(): Implements the GET_ERROR_COUNTER doit
  *   operation, fetching a counter value from a specific node.
+ * - drm_ras_nl_get_error_threshold_doit(): Implements the GET_ERROR_THRESHOLD doit
+ *   operation, fetching the threshold value of a specific error.
  */
 
 static DEFINE_XARRAY_ALLOC(drm_ras_xa);
@@ -162,6 +168,22 @@ static int get_node_error_counter(u32 node_id, u32 error_id,
 	return node->query_error_counter(node, error_id, name, value);
 }
 
+static int get_node_error_threshold(u32 node_id, u32 error_id,
+				    const char **name, u32 *value)
+{
+	struct drm_ras_node *node;
+
+	node = xa_load(&drm_ras_xa, node_id);
+	if (!node || !node->query_error_threshold)
+		return -ENOENT;
+
+	if (error_id < node->error_counter_range.first ||
+	    error_id > node->error_counter_range.last)
+		return -EINVAL;
+
+	return node->query_error_threshold(node, error_id, name, value);
+}
+
 static int msg_reply_counter_value(struct sk_buff *msg, u32 error_id,
 				   const char *error_name, u32 value)
 {
@@ -180,6 +202,24 @@ static int msg_reply_counter_value(struct sk_buff *msg, u32 error_id,
 			   value);
 }
 
+static int msg_reply_threshold_value(struct sk_buff *msg, u32 error_id,
+				     const char *error_name, u32 value)
+{
+	int ret;
+
+	ret = nla_put_u32(msg, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID, error_id);
+	if (ret)
+		return ret;
+
+	ret = nla_put_string(msg, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_NAME,
+			     error_name);
+	if (ret)
+		return ret;
+
+	return nla_put_u32(msg, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD,
+			   value);
+}
+
 static int doit_reply_counter_value(struct genl_info *info, u32 node_id,
 				    u32 error_id)
 {
@@ -216,6 +256,42 @@ static int doit_reply_counter_value(struct genl_info *info, u32 node_id,
 	return genlmsg_reply(msg, info);
 }
 
+static int doit_reply_threshold_value(struct genl_info *info, u32 node_id,
+				      u32 error_id)
+{
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	const char *error_name;
+	u32 value;
+	int ret;
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_iput(msg, info);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return -EMSGSIZE;
+	}
+
+	ret = get_node_error_threshold(node_id, error_id,
+				       &error_name, &value);
+	if (ret)
+		return ret;
+
+	ret = msg_reply_threshold_value(msg, error_id, error_name, value);
+	if (ret) {
+		genlmsg_cancel(msg, hdr);
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+}
+
 /**
  * drm_ras_nl_get_error_counter_dumpit() - Dump all Error Counters
  * @skb: Netlink message buffer
@@ -314,6 +390,33 @@ int drm_ras_nl_get_error_counter_doit(struct sk_buff *skb,
 	return doit_reply_counter_value(info, node_id, error_id);
 }
 
+/**
+ * drm_ras_nl_get_error_threshold_doit() - Query threshold value of the error
+ * @skb: Netlink message buffer
+ * @info: Generic Netlink info containing attributes of the request
+ *
+ * Extracts the node ID and error ID from the netlink attributes and
+ * retrieves the current threshold of the corresponding error. Sends the
+ * result back to the requesting user via the standard Genl reply.
+ *
+ * Return: 0 on success, or negative errno on failure.
+ */
+int drm_ras_nl_get_error_threshold_doit(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	u32 node_id, error_id;
+
+	if (!info->attrs ||
+	    GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID) ||
+	    GENL_REQ_ATTR_CHECK(info, DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID))
+		return -EINVAL;
+
+	node_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID]);
+	error_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID]);
+
+	return doit_reply_threshold_value(info, node_id, error_id);
+}
+
 /**
  * drm_ras_node_register() - Register a new RAS node
  * @node: Node structure to register
diff --git a/drivers/gpu/drm/drm_ras_nl.c b/drivers/gpu/drm/drm_ras_nl.c
index 16803d0c4a44..48e231734f4d 100644
--- a/drivers/gpu/drm/drm_ras_nl.c
+++ b/drivers/gpu/drm/drm_ras_nl.c
@@ -22,6 +22,12 @@ static const struct nla_policy drm_ras_get_error_counter_dump_nl_policy[DRM_RAS_
 	[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID] = { .type = NLA_U32, },
 };
 
+/* DRM_RAS_CMD_GET_ERROR_THRESHOLD - do */
+static const struct nla_policy drm_ras_get_error_threshold_nl_policy[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID + 1] = {
+	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID] = { .type = NLA_U32, },
+	[DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID] = { .type = NLA_U32, },
+};
+
 /* Ops table for drm_ras */
 static const struct genl_split_ops drm_ras_nl_ops[] = {
 	{
@@ -43,6 +49,13 @@ static const struct genl_split_ops drm_ras_nl_ops[] = {
 		.maxattr	= DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
 	},
+	{
+		.cmd		= DRM_RAS_CMD_GET_ERROR_THRESHOLD,
+		.doit		= drm_ras_nl_get_error_threshold_doit,
+		.policy		= drm_ras_get_error_threshold_nl_policy,
+		.maxattr	= DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
 };
 
 struct genl_family drm_ras_nl_family __ro_after_init = {
diff --git a/drivers/gpu/drm/drm_ras_nl.h b/drivers/gpu/drm/drm_ras_nl.h
index 06ccd9342773..540fe22e2312 100644
--- a/drivers/gpu/drm/drm_ras_nl.h
+++ b/drivers/gpu/drm/drm_ras_nl.h
@@ -18,6 +18,8 @@ int drm_ras_nl_get_error_counter_doit(struct sk_buff *skb,
 				      struct genl_info *info);
 int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
 					struct netlink_callback *cb);
+int drm_ras_nl_get_error_threshold_doit(struct sk_buff *skb,
+					struct genl_info *info);
 
 extern struct genl_family drm_ras_nl_family;
 
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h
index 5d50209e51db..50cee70bd065 100644
--- a/include/drm/drm_ras.h
+++ b/include/drm/drm_ras.h
@@ -57,6 +57,20 @@ struct drm_ras_node {
 	 */
 	int (*query_error_counter)(struct drm_ras_node *node, u32 error_id,
 				   const char **name, u32 *val);
+	/**
+	 * @query_error_threshold:
+	 *
+	 * This callback is used by drm-ras to query threshold value of a
+	 * specific error.
+	 *
+	 * Driver should expect query_error_threshold() to be called with
+	 * error_id from `error_counter_range.first` to
+	 * `error_counter_range.last`.
+	 *
+	 * Returns: 0 on success, negative error code on failure.
+	 */
+	int (*query_error_threshold)(struct drm_ras_node *node, u32 error_id,
+				     const char **name, u32 *val);
 
 	/** @priv: Driver private data */
 	void *priv;
diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h
index 5f40fa5b869d..49c5ca497d73 100644
--- a/include/uapi/drm/drm_ras.h
+++ b/include/uapi/drm/drm_ras.h
@@ -38,9 +38,20 @@ enum {
 	DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1)
 };
 
+enum {
+	DRM_RAS_A_ERROR_THRESHOLD_ATTRS_NODE_ID = 1,
+	DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_ID,
+	DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_NAME,
+	DRM_RAS_A_ERROR_THRESHOLD_ATTRS_ERROR_THRESHOLD,
+
+	__DRM_RAS_A_ERROR_THRESHOLD_ATTRS_MAX,
+	DRM_RAS_A_ERROR_THRESHOLD_ATTRS_MAX = (__DRM_RAS_A_ERROR_THRESHOLD_ATTRS_MAX - 1)
+};
+
 enum {
 	DRM_RAS_CMD_LIST_NODES = 1,
 	DRM_RAS_CMD_GET_ERROR_COUNTER,
+	DRM_RAS_CMD_GET_ERROR_THRESHOLD,
 
 	__DRM_RAS_CMD_MAX,
 	DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 01/11] drm/ras: Update counter helpers with counter naming
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav
In-Reply-To: <20260417211730.837345-1-raag.jadav@intel.com>

Counter helpers deal with counter values. Use the appropriate naming to
match with their functionality.

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
---
 drivers/gpu/drm/drm_ras.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_ras.c b/drivers/gpu/drm/drm_ras.c
index b2fa5ab86d87..1f7435d60f11 100644
--- a/drivers/gpu/drm/drm_ras.c
+++ b/drivers/gpu/drm/drm_ras.c
@@ -162,8 +162,8 @@ static int get_node_error_counter(u32 node_id, u32 error_id,
 	return node->query_error_counter(node, error_id, name, value);
 }
 
-static int msg_reply_value(struct sk_buff *msg, u32 error_id,
-			   const char *error_name, u32 value)
+static int msg_reply_counter_value(struct sk_buff *msg, u32 error_id,
+				   const char *error_name, u32 value)
 {
 	int ret;
 
@@ -180,8 +180,8 @@ static int msg_reply_value(struct sk_buff *msg, u32 error_id,
 			   value);
 }
 
-static int doit_reply_value(struct genl_info *info, u32 node_id,
-			    u32 error_id)
+static int doit_reply_counter_value(struct genl_info *info, u32 node_id,
+				    u32 error_id)
 {
 	struct sk_buff *msg;
 	struct nlattr *hdr;
@@ -204,7 +204,7 @@ static int doit_reply_value(struct genl_info *info, u32 node_id,
 	if (ret)
 		return ret;
 
-	ret = msg_reply_value(msg, error_id, error_name, value);
+	ret = msg_reply_counter_value(msg, error_id, error_name, value);
 	if (ret) {
 		genlmsg_cancel(msg, hdr);
 		nlmsg_free(msg);
@@ -272,7 +272,7 @@ int drm_ras_nl_get_error_counter_dumpit(struct sk_buff *skb,
 			break;
 		}
 
-		ret = msg_reply_value(skb, error_id, error_name, value);
+		ret = msg_reply_counter_value(skb, error_id, error_name, value);
 		if (ret) {
 			genlmsg_cancel(skb, hdr);
 			break;
@@ -311,7 +311,7 @@ int drm_ras_nl_get_error_counter_doit(struct sk_buff *skb,
 	node_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID]);
 	error_id = nla_get_u32(info->attrs[DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID]);
 
-	return doit_reply_value(info, node_id, error_id);
+	return doit_reply_counter_value(info, node_id, error_id);
 }
 
 /**
-- 
2.43.0


^ permalink raw reply related

* [PATCH v1 00/11] Introduce error threshold to drm_ras
From: Raag Jadav @ 2026-04-17 21:16 UTC (permalink / raw)
  To: intel-xe, dri-devel, netdev
  Cc: simona.vetter, airlied, kuba, lijo.lazar, Hawking.Zhang, davem,
	pabeni, edumazet, maarten, zachary.mckevitt, rodrigo.vivi,
	riana.tauro, michal.wajdeczko, matthew.d.roper,
	umesh.nerlige.ramappa, mallesh.koujalagi, soham.purkait,
	anoop.c.vijay, aravind.iddamsetty, Raag Jadav

This series reuses some pieces of [1] and [2] and introduces error
threshold to drm_ras infrastructure. This allows user to get and set
the threshold value of a specific error.

Detailed description in commit message and documentation.

[1] https://patchwork.freedesktop.org/series/164393/
[2] https://patchwork.freedesktop.org/series/160184/

Raag Jadav (9):
  drm/ras: Update counter helpers with counter naming
  drm/ras: Introduce get-error-threshold
  drm/ras: Introduce set-error-threshold
  drm/xe/sysctrl: Add system controller interrupt handler
  drm/xe/sysctrl: Add system controller event support
  drm/xe/ras: Introduce correctable error handling
  drm/xe/ras: Get error threshold support
  drm/xe/ras: Set error threshold support
  drm/xe/drm_ras: Wire up error threshold callbacks

Riana Tauro (2):
  drm/xe/uapi: Add additional error components to XE drm_ras
  drm/xe/ras: Add flag for Xe RAS

 Documentation/gpu/drm-ras.rst                 |  17 ++
 Documentation/netlink/specs/drm_ras.yaml      |  49 +++++
 drivers/gpu/drm/drm_ras.c                     | 165 +++++++++++++-
 drivers/gpu/drm/drm_ras_nl.c                  |  27 +++
 drivers/gpu/drm/drm_ras_nl.h                  |   4 +
 drivers/gpu/drm/xe/Makefile                   |   2 +
 drivers/gpu/drm/xe/regs/xe_irq_regs.h         |   1 +
 drivers/gpu/drm/xe/xe_device_types.h          |   2 +
 drivers/gpu/drm/xe/xe_drm_ras.c               |  29 ++-
 drivers/gpu/drm/xe/xe_hw_error.c              |   2 +-
 drivers/gpu/drm/xe/xe_irq.c                   |   2 +
 drivers/gpu/drm/xe/xe_pci.c                   |   3 +
 drivers/gpu/drm/xe/xe_pci_types.h             |   1 +
 drivers/gpu/drm/xe/xe_ras.c                   | 207 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_ras.h                   |  19 ++
 drivers/gpu/drm/xe/xe_ras_types.h             | 123 +++++++++++
 drivers/gpu/drm/xe/xe_sysctrl.c               |  46 +++-
 drivers/gpu/drm/xe/xe_sysctrl.h               |   2 +
 drivers/gpu/drm/xe/xe_sysctrl_event.c         |  87 ++++++++
 drivers/gpu/drm/xe/xe_sysctrl_event_types.h   |  57 +++++
 drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h |  22 ++
 drivers/gpu/drm/xe/xe_sysctrl_types.h         |   7 +
 include/drm/drm_ras.h                         |  27 +++
 include/uapi/drm/drm_ras.h                    |  12 +
 include/uapi/drm/xe_drm.h                     |  11 +-
 25 files changed, 906 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_ras.c
 create mode 100644 drivers/gpu/drm/xe/xe_ras.h
 create mode 100644 drivers/gpu/drm/xe/xe_ras_types.h
 create mode 100644 drivers/gpu/drm/xe/xe_sysctrl_event.c
 create mode 100644 drivers/gpu/drm/xe/xe_sysctrl_event_types.h

-- 
2.43.0


^ permalink raw reply

* Re: [PATCH] net: hsr: avoid synchronize_net() in hsr_del_port() under rtnl_mutex
From: Shardul Bankar @ 2026-04-17 21:12 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, horms, netdev, liuhangbin, lukma, acsjakub,
	kees, xiaoliang.yang_1, fmancera, linux-kernel, janak,
	kalpan.jani, syzbot+f2fbf7478a35a94c8b7c
In-Reply-To: <CANn89i+Yug-M2HFnoz5LrffTaB4AHtC74sPKjLWA7cJ2fKF4-w@mail.gmail.com>

On Fri, 2026-04-17 at 11:18 -0700, Eric Dumazet wrote:
> On Fri, Apr 17, 2026 at 10:53 AM Shardul Bankar
> <shardulsb08@gmail.com> wrote:
> > 
> 
>  synchronize_rcu_expedited() should be quite fast...

Confirmed.  I ran a targeted HSR-only teardown stress on current
mainline (many HSR-bearing netns torn down concurrently, no WireGuard,
no syzkaller harness).  At N=1000, patched-vs-baseline cleanup_net
wall time is within run-to-run noise- the patch has no measurable
benefit on the workload it was meant to address.

> Signature looks like bug fixed recently in wireguard.
> 
> commit 60a25ef8dacb3566b1a8c4de00572a498e2a3bf9

Agreed.  60a25ef8dacb covers what I was actually observing, and
standalone evidence for the HSR path on current mainline is not
strong enough to justify this change.

Please drop this patch.

Thanks,
Shardul

^ permalink raw reply

* Re: [PATCH 1/9] bitfield: add FIELD_GET_SIGNED()
From: Yury Norov @ 2026-04-17 21:09 UTC (permalink / raw)
  To: David Laight
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Andy Lutomirski, Peter Zijlstra, Jonathan Cameron,
	David Lechner, Nuno Sá, Andy Shevchenko, Ping-Ke Shih,
	Richard Cochran, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Alexandre Belloni, Yury Norov,
	Rasmus Villemoes, Hans de Goede, Linus Walleij, Sakari Ailus,
	Salah Triki, Achim Gratz, Ben Collins, linux-kernel, linux-iio,
	linux-wireless, netdev, linux-rtc
In-Reply-To: <20260417204355.37fd960d@pumpkin>

On Fri, Apr 17, 2026 at 08:43:55PM +0100, David Laight wrote:
> On Fri, 17 Apr 2026 13:36:12 -0400
> Yury Norov <ynorov@nvidia.com> wrote:
> 
> > The bitfields are designed in assumption that fields contain unsigned
> > integer values, thus extracting the values from the field implies
> > zero-extending.
> > 
> > Some drivers need to sign-extend their fields, and currently do it like:
> > 
> > 	dc_re += sign_extend32(FIELD_GET(0xfff000, tmp), 11);
> > 	dc_im += sign_extend32(FIELD_GET(0xfff, tmp), 11);
> > 
> > It's error-prone because it relies on user to provide the correct
> > index of the most significant bit and proper 32 vs 64 function flavor.
> > 
> > Thus, introduce a FIELD_GET_SIGNED() macro, which is the more
> > convenient and compiles (on x86_64) to just a couple instructions:
> > shl and sar.
> > 
> > Signed-off-by: Yury Norov <ynorov@nvidia.com>
> > ---
> >  include/linux/bitfield.h | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
> > index 54aeeef1f0ec..35ef63972810 100644
> > --- a/include/linux/bitfield.h
> > +++ b/include/linux/bitfield.h
> > @@ -178,6 +178,22 @@
> >  		__FIELD_GET(_mask, _reg, "FIELD_GET: ");		\
> >  	})
> >  
> > +/**
> > + * FIELD_GET_SIGNED() - extract a signed bitfield element
> > + * @mask: shifted mask defining the field's length and position
> > + * @reg:  value of entire bitfield
> > + *
> > + * Returns the sign-extended field specified by @_mask from the
> > + * bitfield passed in as @_reg by masking and shifting it down.
> > + */
> > +#define FIELD_GET_SIGNED(mask, reg)					\
> > +	({								\
> > +		__BF_FIELD_CHECK(mask, reg, 0U, "FIELD_GET_SIGNED: ");	\
> > +		 ((__signed_scalar_typeof(mask))((long long)(reg) <<	\
> > +		 __builtin_clzll(mask) >> (__builtin_clzll(mask) +	\
> > +						__builtin_ctzll(mask))));\
> 
> Have you looked at what that generates on a typical 32bit architecture?

Yes, for arm32:

  #define FIELD_GET_SIGNED(mask, reg)                           \
      ((long long)(reg) <<                                      \
         __builtin_clzll(mask) >> (__builtin_clzll(mask) +      \
          __builtin_ctzll(mask)))
  
  long long foo(long long reg)
  {
         return FIELD_GET_SIGNED(0x00f00000ULL, reg);
  }

generates:

  foo(long long):
        lsls    r1, r0, #8
        asrs    r0, r1, #28
        asrs    r1, r1, #31
        bx      lr

Just as good as x86_64.

https://godbolt.org/z/eMnKrnocq
 
> It really a bad idea to use __signed_scalar_typeof() on anything that isn't
> a simple variable.
> The bloat from all this when 'mask' is an expansion of GENMASK() is horrid.
> Indeed both signed_scalar_typeof() and unsigned_scalar_typeof() should
> really not be used - there are generally much better ways.

David, it's not the first time you're throwing "bad idea, horrid bloat,
really not be used"-like rant with absolutely no evidence that people
do something wrong. Today I became another random victim of your style
of communication, and I don't think there's any benefit to tolerate it
for me or anybody else.

I encourage you to change your attitude, and use professional and
specific communication style in the kernel mailing list.

Starting from now, I'm not a free tester for your ideas anymore. If
you think that my patch is wrong, please prove it yourself. If you
think that 32-bit or whatever code generation is bad - please send
an example. If you believe that your implementation is any better -
please bother yourself to convince me.

I will continue receiving patches from you in my tree, but if your
patch is claimed to improve code generation, performance of any sort,
or similar things, and doesn't provide any numbers - I'll not waste
my time on it.

Thanks,
Yury

> In this case you can just write:
> 	({
> 		auto _mask = mask;
> 		unsigned int __sl = __builtin_clzll(_mask);
> 		unsigned int __sr = __sl + __builtin_ctzll(_mask);
> 		__builtin_chose_expr(sizeof(_mask) <= 4,
> 			(int)(reg) << __sl - 32 >> __sr - 32,
> 			((long long)(reg) << __sl >> __sr)
> 	})
> and let the compiler do any more integer promotions (etc).
> 
> I'm also not convinced that the checks __BF_FIELD_CHECK() does
> on 'reg' are in any sense worth the effort.
> 
> I have tried some simpler alternatives, eg:
> 	!__builtin_constant_p(reg) && statically_true((reg & mask) == 0)
> however that throws up some false positives due to some of weird ways
> people have used FIELD_GET() where it is nothing like the simplest
> (or most obvious) way to do things.
> That might have been the code that split a 32bit value into bytes
> in a printf with:
> 	FIELD_GET(GENMASK(7, 0), val), FIELD_GET(GENMASK(15, 8), val),
> 	FIELD_GET(GENMASK(23, 16), val), FIELD_GET(GENMASK(31, 24), val),
> 
> 	David
> 
> > +	})
> > +
> >  /**
> >   * FIELD_MODIFY() - modify a bitfield element
> >   * @_mask: shifted mask defining the field's length and position

^ permalink raw reply

* [PATCH net] sctp: fix sockets_allocated imbalance after sk_clone()
From: Xin Long @ 2026-04-17 21:09 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Kuniyuki Iwashima

sk_clone() increments sockets_allocated and sets the socket refcount to 2.
SCTP performs additional accounting in sctp_clone_sock(), so the clone-time
increment must be undone to avoid double counting.

Note we cannot simply remove the SCTP-side increment, because the SCTP
destroy path in sctp_destroy_sock() only decrements sockets_allocated when
sp->ep is set, which may not be true for all failure paths in
sctp_clone_sock().

Fixes: 16942cf4d3e3 ("sctp: Use sk_clone() in sctp_accept().")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d2665bbd41a2..d0e7048230c0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4855,8 +4855,9 @@ static struct sock *sctp_clone_sock(struct sock *sk,
 	if (!newsk)
 		return ERR_PTR(err);

-	/* sk_clone() sets refcnt to 2 */
+	/* sk_clone() sets refcnt to 2 and increments sockets_allocated */
 	sock_put(newsk);
+	sk_sockets_allocated_dec(newsk);

 	newinet = inet_sk(newsk);
 	newsp = sctp_sk(newsk);
-- 
2.47.1

^ permalink raw reply related

* Re: [PATCH] tcp: fix orphan count order in __tcp_close()
From: Eric Dumazet @ 2026-04-17 20:54 UTC (permalink / raw)
  To: RubenKelevra
  Cc: netdev, ncardwell, kuniyu, davem, dsahern, kuba, pabeni, horms
In-Reply-To: <20260417202518.1354891-1-rubenkelevra@gmail.com>

On Fri, Apr 17, 2026 at 1:25 PM RubenKelevra <rubenkelevra@gmail.com> wrote:
>
> __tcp_close() calls sock_orphan(sk) first and drains the backlog with
> __release_sock(sk), which might call tcp_done() which decrements the
> tcp_orphan_count. After which we will increment tcp_orphan_count again.
>
> Since tcp_orphan_count is an unsigned int, we underflow to uint_max if we
> started with a 0 - at least on all current supported platforms.
>
> I could not locate a direct user of this value, and in
> tcp_orphan_count_sum() this underflow is contained by adding the unsigned
> int value into a signed int sum, causing it to behave like -1 on current
> supported platforms and then get clamped by max(n, 0) to 0.
>
> The impact therefore is currently limited to e.g. tcp_too_many_orphans()
> checking an artificially low value, if the cached sum is refreshed within
> this timeframe.
>
> This fix mirrors the previous fix I found while investigating: commit
> 75c2d9077c63 ("[TCP]: Fix sock_orphan dead lock")
>
> Later commit eb4dea585304 ("net: Fix percpu counters deadlock") moved the
> increment down for old percpu_counter reasons. commit 19757cebf0c5 ("tcp:
> switch orphan_count to bare per-cpu counters") changed orphan accounting to
> plain per-cpu counters, so that old reason no longer applies the same way
> now.

I find this patch rather confusing. Have you used an LLM to generate it?

You are mentioning old patches that are not relevant (bh disable/enable).

Given we advise only increasing tcp_max_orphans value (default being
262144 on modern hosts),
your patch has really no effect.

^ permalink raw reply

* [PATCH] tcp: fix orphan count order in __tcp_close()
From: RubenKelevra @ 2026-04-17 20:25 UTC (permalink / raw)
  To: netdev, edumazet, ncardwell
  Cc: kuniyu, davem, dsahern, kuba, pabeni, horms, RubenKelevra

__tcp_close() calls sock_orphan(sk) first and drains the backlog with
__release_sock(sk), which might call tcp_done() which decrements the
tcp_orphan_count. After which we will increment tcp_orphan_count again.

Since tcp_orphan_count is an unsigned int, we underflow to uint_max if we
started with a 0 - at least on all current supported platforms.

I could not locate a direct user of this value, and in
tcp_orphan_count_sum() this underflow is contained by adding the unsigned
int value into a signed int sum, causing it to behave like -1 on current
supported platforms and then get clamped by max(n, 0) to 0.

The impact therefore is currently limited to e.g. tcp_too_many_orphans()
checking an artificially low value, if the cached sum is refreshed within
this timeframe.

This fix mirrors the previous fix I found while investigating: commit
75c2d9077c63 ("[TCP]: Fix sock_orphan dead lock")

Later commit eb4dea585304 ("net: Fix percpu counters deadlock") moved the
increment down for old percpu_counter reasons. commit 19757cebf0c5 ("tcp:
switch orphan_count to bare per-cpu counters") changed orphan accounting to
plain per-cpu counters, so that old reason no longer applies the same way
now.

Fixes: 19757cebf0c5 ("tcp: switch orphan_count to bare per-cpu counters")
Signed-off-by: RubenKelevra <rubenkelevra@gmail.com>
---
 net/ipv4/tcp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2014a6408e93..1a91cb31b02f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3234,11 +3234,10 @@ void __tcp_close(struct sock *sk, long timeout)

 	local_bh_disable();
 	bh_lock_sock(sk);
+	tcp_orphan_count_inc();
 	/* remove backlog if any, without releasing ownership. */
 	__release_sock(sk);

-	tcp_orphan_count_inc();
-
 	/* Have we already been destroyed by a softirq or backlog? */
 	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
 		goto out;
-- 
2.53.0

^ permalink raw reply related

* Re: [PATCH] rds: zero per-item info buffer before handing it to visitors
From: Allison Henderson @ 2026-04-17 20:07 UTC (permalink / raw)
  To: Michael Bommarito, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Simon Horman, netdev, linux-rdma, rds-devel, linux-kernel
In-Reply-To: <20260417141916.494761-1-michael.bommarito@gmail.com>

On Fri, 2026-04-17 at 10:19 -0400, Michael Bommarito wrote:
> Yet another from my "clanker."  This only applies to people who
> don't use CONFIG_INIT_STACK_ALL_ZERO, but I presume that's
> still enough people that it's worth backporting since it can
> be chained through leaked addresses to defeat KASLR.
> 
> rds_for_each_conn_info() and rds_walk_conn_path_info() both hand a
> caller-allocated on-stack u64 buffer to a per-connection visitor and
> then copy the full item_len bytes back to user space via
> rds_info_copy() regardless of how much of the buffer the visitor
> actually wrote.
> 
> rds_ib_conn_info_visitor() and rds6_ib_conn_info_visitor() only
> write a subset of their output struct when the underlying
> rds_connection is not in state RDS_CONN_UP (src/dst addr, tos, sl
> and the two GIDs via explicit memsets).  Several u32 fields
> (max_send_wr, max_recv_wr, max_send_sge, rdma_mr_max, rdma_mr_size,
> cache_allocs) and the 2-byte alignment hole between sl and
> cache_allocs remain as whatever stack contents preceded the visitor
> call and are then memcpy_to_user()'d out to user space.
> 
> struct rds_info_rdma_connection and struct rds6_info_rdma_connection
> are the only rds_info_* structs in include/uapi/linux/rds.h that are
> not marked __attribute__((packed)), so they have a real alignment
> hole.  The other info visitors (rds_conn_info_visitor,
> rds6_conn_info_visitor, rds_tcp_tc_info, ...) write all fields of
> their packed output struct today and are not known to be vulnerable,
> but a future visitor that adds a conditional write-path would have
> the same bug.
> 
> Reproduction on a kernel built without CONFIG_INIT_STACK_ALL_ZERO=y:
> a local unprivileged user opens AF_RDS, sets SO_RDS_TRANSPORT=IB,
> binds to a local address on an RDMA-capable netdev (rxe soft-RoCE on
> any netdev is sufficient), sendto()'s any peer on the same subnet
> (fails cleanly but installs an rds_connection in the global hash in
> RDS_CONN_CONNECTING), then calls getsockopt(SOL_RDS,
> RDS_INFO_IB_CONNECTIONS).  The returned 68-byte item contains 26
> bytes of stack garbage including kernel text/data pointers:
> 
>     0..7   0a 63 00 01 0a 63 00 02     src=10.99.0.1 dst=10.99.0.2
>     8..39  00 ...                      gids (memset-zeroed)
>     40..47 e0 92 a3 81 ff ff ff ff     kernel pointer (max_send_wr)
>     48..55 7f 37 b5 81 ff ff ff ff     kernel pointer (rdma_mr_max)
>     56..59 01 00 08 00                 rdma_mr_size (garbage)
>     60..61 00 00                       tos, sl
>     62..63 00 00                       alignment padding
>     64..67 18 00 00 00                 cache_allocs (garbage)
> 
> Fix by zeroing the per-item buffer in both rds_for_each_conn_info()
> and rds_walk_conn_path_info() before invoking the visitor.  This
> covers the IPv4/IPv6 IB visitors and hardens all current and future
> visitors against the same class of bug.
> 
> No functional change for visitors that fully populate their output.
> 
> Fixes: ec16227e1414 ("RDS/IB: Infiniband transport")
> Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
> Assisted-by: Claude:claude-opus-4-7

Hi Micheal,

The change looks fine to me.  Since this is a bug fix, you'll want to cc stable
tree stable@vger.kernel.org, and note the target tree and component in the
subject line like this:  

[PATCH net v2] net/rds: zero per-item info buffer before handing it to visitors

Other than that, the patch looks good to me.  Thanks Micheal.

Reviewed-by: Allison Henderson <achender@kernel.org>

Allison

> ---
>  net/rds/connection.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/net/rds/connection.c b/net/rds/connection.c
> index 412441aaa298..c10b7ed06c49 100644
> --- a/net/rds/connection.c
> +++ b/net/rds/connection.c
> @@ -701,6 +701,13 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
>  	     i++, head++) {
>  		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
>  
> +			/* Zero the per-item buffer before handing it to the
> +			 * visitor so any field the visitor does not write -
> +			 * including implicit alignment padding - cannot leak
> +			 * stack contents to user space via rds_info_copy().
> +			 */
> +			memset(buffer, 0, item_len);
> +
>  			/* XXX no c_lock usage.. */
>  			if (!visitor(conn, buffer))
>  				continue;
> @@ -750,6 +757,13 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
>  			 */
>  			cp = conn->c_path;
>  
> +			/* Zero the per-item buffer for the same reason as
> +			 * rds_for_each_conn_info(): any byte the visitor
> +			 * does not write (including alignment padding) must
> +			 * not leak stack contents via rds_info_copy().
> +			 */
> +			memset(buffer, 0, item_len);
> +
>  			/* XXX no cp_lock usage.. */
>  			if (!visitor(cp, buffer))
>  				continue;


^ permalink raw reply

* Re: [PATCH net] net/packet: fix TOCTOU race on mmap'd vnet_hdr in tpacket_snd()
From: Willem de Bruijn @ 2026-04-17 20:01 UTC (permalink / raw)
  To: Zero Mark, Willem de Bruijn
  Cc: security, David S . Miller, Jakub Kicinski, Eric Dumazet, netdev,
	Zero Mark
In-Reply-To: <20260417133610.88158-1-patzilla007@gmail.com>

Zero Mark wrote:
> In tpacket_snd(), when PACKET_VNET_HDR is enabled, vnet_hdr points
> directly into the mmap'd TX ring buffer shared with userspace. The
> kernel validates the header via __packet_snd_vnet_parse() but then
> re-reads all fields later in virtio_net_hdr_to_skb(). A concurrent
> userspace thread can modify the vnet_hdr fields between validation
> and use, bypassing all safety checks.
> 
> The non-TPACKET path (packet_snd()) already correctly copies vnet_hdr
> to a stack-local variable. All other vnet_hdr consumers in the kernel
> (tun.c, tap.c, virtio_net.c) also use stack copies. The TPACKET TX
> path is the only caller of virtio_net_hdr_to_skb() that reads directly
> from user-controlled shared memory.
> 
> Fix this by copying vnet_hdr from the mmap'd ring buffer to a
> stack-local variable before validation and use, consistent with the
> approach used in packet_snd() and all other callers.
> 
> Fixes: 1d036d25e560 ("packet: tpacket_snd gso and checksum offload")
> Signed-off-by: Zero Mark <patzilla007@gmail.com>

Reviewed-by: Willem de Bruijn <willemb@google.com>

^ permalink raw reply

* [BUG] some temporary IPv6 address don't get regenerated
From: Łukasz Stelmach @ 2026-04-17 15:23 UTC (permalink / raw)
  To: netdev

Hi,

Apparently, something in addrconf.c can go wrong and a temporary
addresses may not get regenerated leaving users who wish to use them
with only the stable ones. Below, 2a01:110f:4321:1002:abcc:78d7:2055:94ec
while still valid is not preferred anymore. Even if it's not the only
global temporary address it is the only usable to contact hosts on the
Internet because the other temporary addresses are ULA.

Neither received RAs nor adding and removing an address manually (one
with a different prefix unrelated to these below) which as far as I
understand, should trigger address maintenance code.

I noticed this phenomenon once or twice before. It seems to be very
rare, yet quite undesirable I'd say. What might have triggered it today
is a reboot of my router and (possible?) change in lifetime values it
announced.

Even stranger is that there is a preferred fd89:: (ULA), but not 2a01::.

accept_ra as well as use_tempaddr are set to 2.

Of course, this ma be get fixed by a suspend/resume (no, it may not) or
manual ifdown/ifup (of course it helped), but nevertheless I thought it
was worth reporting.

--8<---------------cut here---------------start------------->8---
107: bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether b8:ca:3a:d4:1e:97 brd ff:ff:ff:ff:ff:ff
    inet 192.168.2.122/24 brd 192.168.2.255 scope global dynamic bond0
       valid_lft 165585sec preferred_lft 165585sec

    inet6 fd89:82bb:420:2:cab3:d2d6:aeeb:e250/64 scope global temporary dynamic 
       valid_lft 597079sec preferred_lft 78177sec

    inet6 2a01:110f:4321:1002:abcc:78d7:2055:94ec/64 scope global temporary deprecated dynamic 
       valid_lft 60815sec preferred_lft 0sec

    inet6 2a01:110f:4321:1002:abac:3aff:fed4:beef/64 scope global dynamic mngtmpaddr proto kernel_ra 
       valid_lft 60815sec preferred_lft 60815sec

    inet6 fd89:82bb:420:2:24bf:fe8f:5c9c:c753/64 scope global temporary deprecated dynamic 
       valid_lft 511186sec preferred_lft 0sec

    inet6 fd89:82bb:420:2:abac:3aff:fed4:beef/64 scope global dynamic mngtmpaddr proto kernel_ra 
       valid_lft 2591805sec preferred_lft 604605sec

    inet6 fe80::abac:3aff:fed4:beef/64 scope link proto kernel_ll 
       valid_lft forever preferred_lft forever
--8<---------------cut here---------------end--------------->8---

-- 
Kind regards,
Łukasz Stelmach

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH iwl-net v1] igc: set tx buffer type for SMD frames
From: Loktionov, Aleksandr @ 2026-04-17 19:48 UTC (permalink / raw)
  To: Kohei Enju, intel-wired-lan@lists.osuosl.org,
	netdev@vger.kernel.org
  Cc: Nguyen, Anthony L, Kitszel, Przemyslaw, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Choong Yong Liang, Vladimir Oltean, Gomes, Vinicius,
	Choong, Chwee Lin, Simon Horman
In-Reply-To: <20260417193223.291093-1-kohei@enjuk.jp>



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of Kohei Enju
> Sent: Friday, April 17, 2026 9:32 PM
> To: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org
> Cc: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>; Andrew Lunn
> <andrew+netdev@lunn.ch>; David S. Miller <davem@davemloft.net>; Eric
> Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo
> Abeni <pabeni@redhat.com>; Choong Yong Liang
> <yong.liang.choong@linux.intel.com>; Vladimir Oltean
> <vladimir.oltean@nxp.com>; Gomes, Vinicius <vinicius.gomes@intel.com>;
> Choong, Chwee Lin <chwee.lin.choong@intel.com>; Simon Horman
> <horms@kernel.org>; Kohei Enju <kohei@enjuk.jp>
> Subject: [Intel-wired-lan] [PATCH iwl-net v1] igc: set tx buffer type
> for SMD frames
> 
> Sashiko pointed out that igc_fpe_init_smd_frame() initializes
> igc_tx_buffer fields for an SMD skb, but does not set the buffer type:
> https://sashiko.dev/#/patchset/20260415025226.114115-1-
> kohei%40enjuk.jp
> 
> Since igc_tx_buffer entries are reused, a stale XDP or XSK type can
> remain and make TX completion use the wrong cleanup path.
> 
> Set the buffer type to IGC_TX_BUFFER_TYPE_SKB.
> 
> Fixes: 5422570c0010 ("igc: add support for frame preemption
> verification")
> Signed-off-by: Kohei Enju <kohei@enjuk.jp>
> ---
>  drivers/net/ethernet/intel/igc/igc_tsn.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c
> b/drivers/net/ethernet/intel/igc/igc_tsn.c
> index 02dd9f0290a3..52de2bcbadbe 100644
> --- a/drivers/net/ethernet/intel/igc/igc_tsn.c
> +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
> @@ -34,6 +34,7 @@ static int igc_fpe_init_smd_frame(struct igc_ring
> *ring,
>  		return -ENOMEM;
>  	}
> 
> +	buffer->type = IGC_TX_BUFFER_TYPE_SKB;
>  	buffer->skb = skb;
>  	buffer->protocol = 0;
>  	buffer->bytecount = skb->len;
> --
> 2.53.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

^ permalink raw reply

* Re: [PATCH 1/9] bitfield: add FIELD_GET_SIGNED()
From: David Laight @ 2026-04-17 19:43 UTC (permalink / raw)
  To: Yury Norov
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Andy Lutomirski, Peter Zijlstra, Jonathan Cameron,
	David Lechner, Nuno Sá, Andy Shevchenko, Ping-Ke Shih,
	Richard Cochran, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Alexandre Belloni, Yury Norov,
	Rasmus Villemoes, Hans de Goede, Linus Walleij, Sakari Ailus,
	Salah Triki, Achim Gratz, Ben Collins, linux-kernel, linux-iio,
	linux-wireless, netdev, linux-rtc
In-Reply-To: <20260417173621.368914-2-ynorov@nvidia.com>

On Fri, 17 Apr 2026 13:36:12 -0400
Yury Norov <ynorov@nvidia.com> wrote:

> The bitfields are designed in assumption that fields contain unsigned
> integer values, thus extracting the values from the field implies
> zero-extending.
> 
> Some drivers need to sign-extend their fields, and currently do it like:
> 
> 	dc_re += sign_extend32(FIELD_GET(0xfff000, tmp), 11);
> 	dc_im += sign_extend32(FIELD_GET(0xfff, tmp), 11);
> 
> It's error-prone because it relies on user to provide the correct
> index of the most significant bit and proper 32 vs 64 function flavor.
> 
> Thus, introduce a FIELD_GET_SIGNED() macro, which is the more
> convenient and compiles (on x86_64) to just a couple instructions:
> shl and sar.
> 
> Signed-off-by: Yury Norov <ynorov@nvidia.com>
> ---
>  include/linux/bitfield.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
> index 54aeeef1f0ec..35ef63972810 100644
> --- a/include/linux/bitfield.h
> +++ b/include/linux/bitfield.h
> @@ -178,6 +178,22 @@
>  		__FIELD_GET(_mask, _reg, "FIELD_GET: ");		\
>  	})
>  
> +/**
> + * FIELD_GET_SIGNED() - extract a signed bitfield element
> + * @mask: shifted mask defining the field's length and position
> + * @reg:  value of entire bitfield
> + *
> + * Returns the sign-extended field specified by @_mask from the
> + * bitfield passed in as @_reg by masking and shifting it down.
> + */
> +#define FIELD_GET_SIGNED(mask, reg)					\
> +	({								\
> +		__BF_FIELD_CHECK(mask, reg, 0U, "FIELD_GET_SIGNED: ");	\
> +		 ((__signed_scalar_typeof(mask))((long long)(reg) <<	\
> +		 __builtin_clzll(mask) >> (__builtin_clzll(mask) +	\
> +						__builtin_ctzll(mask))));\

Have you looked at what that generates on a typical 32bit architecture?

It really a bad idea to use __signed_scalar_typeof() on anything that isn't
a simple variable.
The bloat from all this when 'mask' is an expansion of GENMASK() is horrid.
Indeed both signed_scalar_typeof() and unsigned_scalar_typeof() should
really not be used - there are generally much better ways.

In this case you can just write:
	({
		auto _mask = mask;
		unsigned int __sl = __builtin_clzll(_mask);
		unsigned int __sr = __sl + __builtin_ctzll(_mask);
		__builtin_chose_expr(sizeof(_mask) <= 4,
			(int)(reg) << __sl - 32 >> __sr - 32,
			((long long)(reg) << __sl >> __sr)
	})
and let the compiler do any more integer promotions (etc).

I'm also not convinced that the checks __BF_FIELD_CHECK() does
on 'reg' are in any sense worth the effort.

I have tried some simpler alternatives, eg:
	!__builtin_constant_p(reg) && statically_true((reg & mask) == 0)
however that throws up some false positives due to some of weird ways
people have used FIELD_GET() where it is nothing like the simplest
(or most obvious) way to do things.
That might have been the code that split a 32bit value into bytes
in a printf with:
	FIELD_GET(GENMASK(7, 0), val), FIELD_GET(GENMASK(15, 8), val),
	FIELD_GET(GENMASK(23, 16), val), FIELD_GET(GENMASK(31, 24), val),

	David

> +	})
> +
>  /**
>   * FIELD_MODIFY() - modify a bitfield element
>   * @_mask: shifted mask defining the field's length and position


^ permalink raw reply

* [PATCH iwl-net v1] igc: set tx buffer type for SMD frames
From: Kohei Enju @ 2026-04-17 19:31 UTC (permalink / raw)
  To: intel-wired-lan, netdev
  Cc: Tony Nguyen, Przemek Kitszel, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Choong Yong Liang,
	Vladimir Oltean, Vinicius Costa Gomes, Chwee-Lin Choong,
	Simon Horman, Kohei Enju

Sashiko pointed out that igc_fpe_init_smd_frame() initializes
igc_tx_buffer fields for an SMD skb, but does not set the buffer type:
https://sashiko.dev/#/patchset/20260415025226.114115-1-kohei%40enjuk.jp

Since igc_tx_buffer entries are reused, a stale XDP or XSK type can
remain and make TX completion use the wrong cleanup path.

Set the buffer type to IGC_TX_BUFFER_TYPE_SKB.

Fixes: 5422570c0010 ("igc: add support for frame preemption verification")
Signed-off-by: Kohei Enju <kohei@enjuk.jp>
---
 drivers/net/ethernet/intel/igc/igc_tsn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 02dd9f0290a3..52de2bcbadbe 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -34,6 +34,7 @@ static int igc_fpe_init_smd_frame(struct igc_ring *ring,
 		return -ENOMEM;
 	}
 
+	buffer->type = IGC_TX_BUFFER_TYPE_SKB;
 	buffer->skb = skb;
 	buffer->protocol = 0;
 	buffer->bytecount = skb->len;
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH net v4 4/4] selftests: bonding: add test for lacp_strict mode
From: Jakub Kicinski @ 2026-04-17 19:27 UTC (permalink / raw)
  To: Louis Scalbert
  Cc: netdev, stephen, andrew+netdev, jv, edumazet, pabeni, fbl, andy,
	shemminger, maheshb
In-Reply-To: <20260417140505.3860237-5-louis.scalbert@6wind.com>

On Fri, 17 Apr 2026 16:05:05 +0200 Louis Scalbert wrote:
> +ip netns exec "${p_ns}" tc qdisc add dev eth0 root netem loss 100%

netem is not included the in the config for bonding tests, this fails
in the CI.

^ permalink raw reply

* Re: [PATCH 0/9] bitfield: add FIELD_GET_SIGNED()
From: Yury Norov @ 2026-04-17 19:21 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Andy Lutomirski, Peter Zijlstra, Jonathan Cameron,
	David Lechner, Nuno Sá, Andy Shevchenko, Ping-Ke Shih,
	Richard Cochran, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Alexandre Belloni, Yury Norov,
	Rasmus Villemoes, Hans de Goede, Linus Walleij, Sakari Ailus,
	Salah Triki, Achim Gratz, Ben Collins, linux-kernel, linux-iio,
	linux-wireless, netdev, linux-rtc
In-Reply-To: <aeJ6hnZSbo2DrLpi@ashevche-desk.local>

On Fri, Apr 17, 2026 at 09:23:02PM +0300, Andy Shevchenko wrote:
> On Fri, Apr 17, 2026 at 01:36:11PM -0400, Yury Norov wrote:
> > The bitfields are designed in assumption that fields contain unsigned
> > integer values, thus extracting the values from the field implies
> > zero-extending.
> > 
> > Some drivers need to sign-extend their fields, and currently do it like:
> > 
> > 	dc_re += sign_extend32(FIELD_GET(0xfff000, tmp), 11);
> > 	dc_im += sign_extend32(FIELD_GET(0xfff, tmp), 11);
> > 
> > It's error-prone because it relies on user to provide the correct
> > index of the most significant bit.
> > 
> > This series adds a signed version of FIELD_GET(), which is the more
> > convenient and compiles (on x86_64) to just a couple instructions:
> > shl and sar.
> > 
> > Patch #1 adds FIELD_GET_SIGNED(), and the rest of the series applies it
> > tree-wide.
> 
> Here the example is missing.

This series is full of examples... I'll add one here if you prefer, if
it comes to v2.
 
> Nevertheless, I looked at the implementation a bit and wondering how would it
> work for 64-bit mask of say GENMASK_ULL(63, 60)? Wouldn't it give an overflow?

In that case, the '<< __builtin_clzll(mask)' part becomes a NOP, and
the compiler only emits a single sar:

   long long foo(long long reg)
  {
    10:   f3 0f 1e fa             endbr64
          return FIELD_GET_SIGNED(GENMASK_ULL(63, 60), reg);
    14:   48 89 f8                mov    %rdi,%rax
    17:   48 c1 f8 3c             sar    $0x3c,%rax
  }

Just tested it with a real kernel build with gcc-15.2, and it works as
intended.

^ permalink raw reply

* Re: [PATCH net] eth: fbnic: fix double-free of PCS on phylink creation failure
From: Bobby Eshleman @ 2026-04-17 18:44 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Alexander Duyck, Jakub Kicinski, kernel-team, Andrew Lunn,
	David S. Miller, Eric Dumazet, Paolo Abeni, Russell King, netdev,
	linux-kernel, Bobby Eshleman
In-Reply-To: <49138791-0726-4065-a772-56fea43070b7@lunn.ch>

On Fri, Apr 17, 2026 at 01:48:38AM +0200, Andrew Lunn wrote:
> > diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> > index e3ca5fcfabef..2a6a73393732 100644
> > --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> > +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
> > @@ -818,7 +818,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
> >  	netif_tx_stop_all_queues(netdev);
> >  
> >  	if (fbnic_phylink_create(netdev)) {
> > -		fbnic_netdev_free(fbd);
> > +		free_netdev(netdev);
> > +		fbd->netdev = NULL;
> 
> Why set it to NULL? Setting pointers to NULL like this often suggests
> you are not confident the code is correct and you are being
> defensive. It is better to review the code and be sure it does the
> correct thing.
> 
> >  		return NULL;
> >  	}
> >  
> > diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
> > index 09c5225111be..50240e6c2ee9 100644
> > --- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
> > +++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
> > @@ -237,6 +237,7 @@ int fbnic_phylink_create(struct net_device *netdev)
> >  		dev_err(netdev->dev.parent,
> >  			"Failed to create Phylink interface, err: %d\n", err);
> >  		xpcs_destroy_pcs(pcs);
> > +		fbn->pcs = NULL;
> 
> Why set it to NULL? If it failed, you are unwinding and about to fail
> the probe. Nothing should be using it.
> 
> I would also say fbnic_phylink_destroy() is wrong or at least whoever
> wrote it is not confident in there own code. It should only be called
> if fbnic_phylink_create() was successful, so you know fbn->pcs is
> valid, so there is no need to test it. The same for fbn->phylink.
> 
>        Andrew

Fair points. I think it looks sound without resetting to NULL, but I'll
double check and remove if confident.

I'll look at the checks in _destroy() too.

Thanks,
Bobby

^ permalink raw reply

* [PATCH 4/4 nf] netfilter: xtables: fix L4 header parsing for non-first fragments
From: Fernando Fernandez Mancera @ 2026-04-17 18:34 UTC (permalink / raw)
  To: netfilter-devel
  Cc: netdev, coreteam, pablo, fw, phil, Fernando Fernandez Mancera
In-Reply-To: <20260417183433.4739-1-fmancera@suse.de>

The TPROXY target and osf match relies on L4 header to operate. For
fragmented packets, every fragment carries the transport protocol
identifier, but only the first fragment contains the L4 header.

As the 'raw' table can be configured to run at priority -450 (before
defragmentation at -400), the target/match can be reached before
reassembly. In this case, non-first fragments have their payload
incorrectly parsed as a TCP/UDP header.

Add a fragment check to ensure TPROXY/osf only evaluates unfragmented
packets or the first fragment in the stream.

Fixes: 902d6a4c2a4f ("netfilter: nf_defrag: Skip defrag if NOTRACK is set")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
---
 net/netfilter/xt_TPROXY.c | 8 ++++++--
 net/netfilter/xt_osf.c    | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e4bea1d346cf..ac4b011ce48c 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -40,6 +40,9 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	struct udphdr _hdr, *hp;
 	struct sock *sk;
 
+	if (ip_is_fragment(iph))
+		return NF_DROP;
+
 	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
 	if (hp == NULL)
 		return NF_DROP;
@@ -106,6 +109,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+	unsigned short fragoff = 0;
 	struct udphdr _hdr, *hp;
 	struct sock *sk;
 	const struct in6_addr *laddr;
@@ -113,8 +117,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	int thoff = 0;
 	int tproto;
 
-	tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
-	if (tproto < 0)
+	tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL);
+	if (tproto < 0 || fragoff)
 		return NF_DROP;
 
 	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index dc9485854002..889dff4daff0 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -27,6 +27,9 @@
 static bool
 xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
+	if (ip_is_fragment(ip_hdr(skb)))
+		return false;
+
 	return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
 			    xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
 }
-- 
2.53.0


^ permalink raw reply related

* [PATCH 3/4 nf] netfilter: nft_osf: skip evaluation for non-first fragments
From: Fernando Fernandez Mancera @ 2026-04-17 18:34 UTC (permalink / raw)
  To: netfilter-devel
  Cc: netdev, coreteam, pablo, fw, phil, Fernando Fernandez Mancera
In-Reply-To: <20260417183433.4739-1-fmancera@suse.de>

The osf expression extracts TCP options to match them against
fingerprints. For fragmented packets, every fragment carries the
transport protocol used but only the first fragment contains the TCP
header.

As nftables is not evaluating chain priority, a osf expression could be
attached to a PREROUTING chain with a priority lower than -400. This
would bypass defragmentation. In addition, nft_osf should be able to
work in stateless environments, therefore it can be use in situation
when defragmentation is not being performed.

Add a check for pkt->fragoff to ensure osf only evaluates unfragmented
packets or the first fragment in the stream.

Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
---
 net/netfilter/nft_osf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 1c0b493ef0a9..ceca87e405eb 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -28,7 +28,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	struct nf_osf_data data;
 	struct tcphdr _tcph;

-	if (pkt->tprot != IPPROTO_TCP) {
+	if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}
-- 
2.53.0

^ permalink raw reply related

* [PATCH 2/4 nf] netfilter: nft_tproxy: skip evaluation for non-first fragments
From: Fernando Fernandez Mancera @ 2026-04-17 18:34 UTC (permalink / raw)
  To: netfilter-devel
  Cc: netdev, coreteam, pablo, fw, phil, Fernando Fernandez Mancera
In-Reply-To: <20260417183433.4739-1-fmancera@suse.de>

The tproxy expression relies on L4 ports to perform socke lookups. For
fragmented packets, every fragment carries the transport protocol used
but only the first fragment contains the L4 header.

As nftables is not evaluating chain priority, a tproxy expression could
be attached to a PREROUTING chain with a priority lower than -400. This
would bypass defragmentation.

Add a check for pkt->fragoff to ensure tproxy only evaluates
unfragmented packets or the first fragment in the stream.

Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
---
 net/netfilter/nft_tproxy.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 50481280abd2..8080cbd878cd 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
 	__be16 tport = 0;
 	struct sock *sk;
 
-	if (pkt->tprot != IPPROTO_TCP &&
-	    pkt->tprot != IPPROTO_UDP) {
+	if ((pkt->tprot != IPPROTO_TCP &&
+	     pkt->tprot != IPPROTO_UDP) || pkt->fragoff) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}
@@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
 
 	memset(&taddr, 0, sizeof(taddr));
 
-	if (pkt->tprot != IPPROTO_TCP &&
-	    pkt->tprot != IPPROTO_UDP) {
+	if ((pkt->tprot != IPPROTO_TCP &&
+	     pkt->tprot != IPPROTO_UDP) || pkt->fragoff) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}
-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox