From: Shuai Xue <xueshuai@linux.alibaba.com>
To: rostedt@goodmis.org, lukas@wunner.de, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org,
linux-trace-kernel@vger.kernel.org, helgaas@kernel.org
Cc: bhelgaas@google.com, tony.luck@intel.com, bp@alien8.de,
xueshuai@linux.alibaba.com, mhiramat@kernel.org,
mathieu.desnoyers@efficios.com, oleg@redhat.com,
naveen@kernel.org, davem@davemloft.net,
anil.s.keshavamurthy@intel.com, mark.rutland@arm.com,
peterz@infradead.org, tianruidong@linux.alibaba.com
Subject: [PATCH v6] PCI: hotplug: Add a generic RAS tracepoint for hotplug event
Date: Wed, 15 Jan 2025 09:37:53 +0800 [thread overview]
Message-ID: <20250115013753.49126-1-xueshuai@linux.alibaba.com> (raw)
Hotplug events are critical indicators for analyzing hardware health,
particularly in AI supercomputers where surprise link downs can
significantly impact system performance and reliability.
To this end, define a new TRACING_SYSTEM named pci, add a generic RAS
tracepoint for hotplug event to help healthy check, and generate
tracepoints for pcie hotplug event. Add enum pci_hotplug_event in
include/uapi/linux/pci.h so applications like rasdaemon can register
tracepoint event handlers for it.
The output like below:
$ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable
$ cat /sys/kernel/debug/tracing/trace_pipe
<...>-206 [001] ..... 40.373870: pci_hp_event: 0000:00:02.0 slot:10, event:Link Down
<...>-206 [001] ..... 40.374871: pci_hp_event: 0000:00:02.0 slot:10, event:Card not present
Suggested-by: Lukas Wunner <lukas@wunner.de>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
---
changes since v5:
- move define of enum to include/uapi/linux/pci.h
---
drivers/pci/hotplug/pciehp_ctrl.c | 33 ++++++++++++++++++-----
drivers/pci/hotplug/trace.h | 45 +++++++++++++++++++++++++++++++
include/uapi/linux/pci.h | 31 +++++++++++++++++++++
3 files changed, 103 insertions(+), 6 deletions(-)
create mode 100644 drivers/pci/hotplug/trace.h
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index d603a7aa7483..f9beb4d3a9b8 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -23,6 +23,9 @@
#include "../pci.h"
#include "pciehp.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
/* The following routines constitute the bulk of the
hotplug controller logic
*/
@@ -244,12 +247,20 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
case ON_STATE:
ctrl->state = POWEROFF_STATE;
mutex_unlock(&ctrl->state_lock);
- if (events & PCI_EXP_SLTSTA_DLLSC)
+ if (events & PCI_EXP_SLTSTA_DLLSC) {
ctrl_info(ctrl, "Slot(%s): Link Down\n",
slot_name(ctrl));
- if (events & PCI_EXP_SLTSTA_PDC)
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_LINK_DOWN);
+ }
+ if (events & PCI_EXP_SLTSTA_PDC) {
ctrl_info(ctrl, "Slot(%s): Card not present\n",
slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_NOT_PRESENT);
+ }
pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
break;
default:
@@ -269,6 +280,9 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
INDICATOR_NOOP);
ctrl_info(ctrl, "Slot(%s): Card not present\n",
slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_NOT_PRESENT);
}
mutex_unlock(&ctrl->state_lock);
return;
@@ -281,12 +295,19 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
case OFF_STATE:
ctrl->state = POWERON_STATE;
mutex_unlock(&ctrl->state_lock);
- if (present)
+ if (present) {
ctrl_info(ctrl, "Slot(%s): Card present\n",
slot_name(ctrl));
- if (link_active)
- ctrl_info(ctrl, "Slot(%s): Link Up\n",
- slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_CARD_PRESENT);
+ }
+ if (link_active) {
+ ctrl_info(ctrl, "Slot(%s): Link Up\n", slot_name(ctrl));
+ trace_pci_hp_event(pci_name(ctrl->pcie->port),
+ slot_name(ctrl),
+ PCI_HOTPLUG_LINK_UP);
+ }
ctrl->request_result = pciehp_enable_slot(ctrl);
break;
default:
diff --git a/drivers/pci/hotplug/trace.h b/drivers/pci/hotplug/trace.h
new file mode 100644
index 000000000000..1415ac505cb5
--- /dev/null
+++ b/drivers/pci/hotplug/trace.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_HW_EVENT_PCI_HP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HW_EVENT_PCI_HP_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pci
+
+TRACE_EVENT(pci_hp_event,
+
+ TP_PROTO(const char *port_name,
+ const char *slot,
+ const int event),
+
+ TP_ARGS(port_name, slot, event),
+
+ TP_STRUCT__entry(
+ __string( port_name, port_name )
+ __string( slot, slot )
+ __field( int, event )
+ ),
+
+ TP_fast_assign(
+ __assign_str(port_name);
+ __assign_str(slot);
+ __entry->event = event;
+ ),
+
+ TP_printk("%s slot:%s, event:%s\n",
+ __get_str(port_name),
+ __get_str(slot),
+ __print_symbolic(__entry->event, PCI_HOTPLUG_EVENT)
+ )
+);
+
+#endif /* _TRACE_HW_EVENT_PCI_HP_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/pci/hotplug
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h
index a769eefc5139..58a8ad9389e3 100644
--- a/include/uapi/linux/pci.h
+++ b/include/uapi/linux/pci.h
@@ -19,6 +19,7 @@
#define _UAPILINUX_PCI_H
#include <linux/pci_regs.h> /* The pci register defines */
+#include <linux/tracepoint.h>
/*
* The PCI interface treats multi-function devices as independent
@@ -39,4 +40,34 @@
#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */
#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */
+#define PCI_HOTPLUG_EVENT \
+ EM(PCI_HOTPLUG_LINK_UP, "Link Up") \
+ EM(PCI_HOTPLUG_LINK_DOWN, "Link Down") \
+ EM(PCI_HOTPLUG_CARD_PRESENT, "Card present") \
+ EMe(PCI_HOTPLUG_CARD_NOT_PRESENT, "Card not present")
+
+/* Enums require being exported to userspace, for user tool parsing */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+enum pci_hotplug_event {
+ PCI_HOTPLUG_LINK_UP,
+ PCI_HOTPLUG_LINK_DOWN,
+ PCI_HOTPLUG_CARD_PRESENT,
+ PCI_HOTPLUG_CARD_NOT_PRESENT,
+};
+
+PCI_HOTPLUG_EVENT
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
+
#endif /* _UAPILINUX_PCI_H */
--
2.39.3
next reply other threads:[~2025-01-15 1:37 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-15 1:37 Shuai Xue [this message]
2025-01-15 2:41 ` [PATCH v6] PCI: hotplug: Add a generic RAS tracepoint for hotplug event Steven Rostedt
2025-01-15 3:59 ` Shuai Xue
2025-01-15 9:33 ` Lukas Wunner
2025-01-15 15:05 ` Steven Rostedt
2025-01-18 5:13 ` kernel test robot
2025-01-18 17:52 ` kernel test robot
2025-02-24 3:38 ` Shuai Xue
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250115013753.49126-1-xueshuai@linux.alibaba.com \
--to=xueshuai@linux.alibaba.com \
--cc=anil.s.keshavamurthy@intel.com \
--cc=bhelgaas@google.com \
--cc=bp@alien8.de \
--cc=davem@davemloft.net \
--cc=helgaas@kernel.org \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=lukas@wunner.de \
--cc=mark.rutland@arm.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=naveen@kernel.org \
--cc=oleg@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tianruidong@linux.alibaba.com \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox