From: Corey Minyard <corey@minyard.net>
To: Matt Fleming <mfleming@cloudflare.com>
Cc: openipmi-developer@lists.sourceforge.net,
Tony Camuso <tcamuso@redhat.com>,
linux-kernel@vger.kernel.org, kernel-team@cloudflare.com,
Corey Minyard <corey@minyard.net>,
stable@vger.kernel.org
Subject: [PATCH v3] ipmi: Add limits to event and receive message requests
Date: Tue, 28 Apr 2026 13:00:33 -0500 [thread overview]
Message-ID: <20260428180611.500258-1-corey@minyard.net> (raw)
The driver would just fetch events and receive messages until the
BMC said it was done. To avoid issues with BMCs that never say they are
done, add a limit of 10 fetches at a time.
In addition, an si interface has an attn state it can return from the
hardware which is supposed to cause a flag fetch to see if the driver
needs to fetch events or message or a few other things. If the attn
bit gets stuck, it's a similar problem. So allow messages in between
flag fetches so the driver itself doesn't get stuck.
This is a more general fix than the previous fix for the specific bad
BMC, but should fix the more general issue of a BMC that won't stop
saying it has data.
This has been there from the beginning of the driver. It's not a bug
per-se, but it is accounting for bugs in BMCs.
Reported-by: Matt Fleming <mfleming@cloudflare.com>
Closes: https://lore.kernel.org/lkml/20260415115930.3428942-1-matt@readmodwrite.com/
Fixes: <1da177e4c3f4> ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Corey Minyard <corey@minyard.net>
---
I have added this problem as a capability in the openipmi library
simulator so I can reproduce the issue and make sure everything works
properly.
drivers/char/ipmi/ipmi_si_intf.c | 54 +++++++++++++++++++++++++-------
drivers/char/ipmi/ipmi_ssif.c | 23 ++++++++++++--
2 files changed, 64 insertions(+), 13 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 08c208cc64c5..7c3c463e08da 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -168,6 +168,10 @@ struct smi_info {
OEM2_DATA_AVAIL)
unsigned char msg_flags;
+ /* When requesting events and messages, don't do it forever. */
+ unsigned int num_requests_in_a_row;
+ bool last_was_flag_fetch;
+
/* Does the BMC have an event buffer? */
bool has_event_buffer;
@@ -410,7 +414,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info)
start_new_msg(smi_info, smi_info->curr_msg->data,
smi_info->curr_msg->data_size);
- smi_info->si_state = SI_GETTING_MESSAGES;
+ if (smi_info->si_state != SI_GETTING_MESSAGES) {
+ smi_info->num_requests_in_a_row = 0;
+ smi_info->si_state = SI_GETTING_MESSAGES;
+ }
}
static void start_getting_events(struct smi_info *smi_info)
@@ -421,7 +428,10 @@ static void start_getting_events(struct smi_info *smi_info)
start_new_msg(smi_info, smi_info->curr_msg->data,
smi_info->curr_msg->data_size);
- smi_info->si_state = SI_GETTING_EVENTS;
+ if (smi_info->si_state != SI_GETTING_EVENTS) {
+ smi_info->num_requests_in_a_row = 0;
+ smi_info->si_state = SI_GETTING_EVENTS;
+ }
}
/*
@@ -595,6 +605,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
smi_info->si_state = SI_NORMAL;
} else {
smi_info->msg_flags = msg[3];
+ smi_info->last_was_flag_fetch = true;
handle_flags(smi_info);
}
break;
@@ -646,6 +657,11 @@ static void handle_transaction_done(struct smi_info *smi_info)
} else {
smi_inc_stat(smi_info, events);
+ smi_info->num_requests_in_a_row++;
+ if (smi_info->num_requests_in_a_row > 10)
+ /* Stop if we do this too many times. */
+ smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
+
/*
* Do this before we deliver the message
* because delivering the message releases the
@@ -684,6 +700,11 @@ static void handle_transaction_done(struct smi_info *smi_info)
} else {
smi_inc_stat(smi_info, incoming_messages);
+ smi_info->num_requests_in_a_row++;
+ if (smi_info->num_requests_in_a_row > 10)
+ /* Stop if we do this too many times. */
+ smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
+
/*
* Do this before we deliver the message
* because delivering the message releases the
@@ -825,6 +846,26 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
goto out;
}
+ /*
+ * If we are currently idle, or if the last thing that was
+ * done was a flag fetch and there is a message pending, try
+ * to start the next message.
+ *
+ * We do the waiting message check to avoid a stuck flag
+ * completely wedging the driver. Let a message through
+ * in between flag operations if that happens.
+ */
+ if (si_sm_result == SI_SM_IDLE ||
+ (si_sm_result == SI_SM_ATTN && smi_info->waiting_msg &&
+ smi_info->last_was_flag_fetch)) {
+ smi_info->last_was_flag_fetch = false;
+ smi_inc_stat(smi_info, idles);
+
+ si_sm_result = start_next_msg(smi_info);
+ if (si_sm_result != SI_SM_IDLE)
+ goto restart;
+ }
+
/*
* We prefer handling attn over new messages. But don't do
* this if there is not yet an upper layer to handle anything.
@@ -852,15 +893,6 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
}
}
- /* If we are currently idle, try to start the next message. */
- if (si_sm_result == SI_SM_IDLE) {
- smi_inc_stat(smi_info, idles);
-
- si_sm_result = start_next_msg(smi_info);
- if (si_sm_result != SI_SM_IDLE)
- goto restart;
- }
-
if ((si_sm_result == SI_SM_IDLE)
&& (atomic_read(&smi_info->req_events))) {
/*
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index b49500a1bd36..f3798f4e6a63 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -225,6 +225,9 @@ struct ssif_info {
bool has_event_buffer;
bool supports_alert;
+ /* When requesting events and messages, don't do it forever. */
+ unsigned int num_requests_in_a_row;
+
/*
* Used to tell what we should do with alerts. If we are
* waiting on a response, read the data immediately.
@@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
}
ssif_info->curr_msg = msg;
- ssif_info->ssif_state = SSIF_GETTING_EVENTS;
+ if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) {
+ ssif_info->num_requests_in_a_row = 0;
+ ssif_info->ssif_state = SSIF_GETTING_EVENTS;
+ }
ipmi_ssif_unlock_cond(ssif_info, flags);
msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
}
ssif_info->curr_msg = msg;
- ssif_info->ssif_state = SSIF_GETTING_MESSAGES;
+ if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) {
+ ssif_info->num_requests_in_a_row = 0;
+ ssif_info->ssif_state = SSIF_GETTING_MESSAGES;
+ }
ipmi_ssif_unlock_cond(ssif_info, flags);
msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
handle_flags(ssif_info, flags);
} else {
+ ssif_info->num_requests_in_a_row++;
+ if (ssif_info->num_requests_in_a_row > 10)
+ /* Stop if we do this too many times. */
+ ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
+
handle_flags(ssif_info, flags);
ssif_inc_stat(ssif_info, events);
deliver_recv_msg(ssif_info, msg);
@@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
handle_flags(ssif_info, flags);
} else {
+ ssif_info->num_requests_in_a_row++;
+ if (ssif_info->num_requests_in_a_row > 10)
+ /* Stop if we do this too many times. */
+ ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
+
ssif_inc_stat(ssif_info, incoming_messages);
handle_flags(ssif_info, flags);
deliver_recv_msg(ssif_info, msg);
--
2.43.0
reply other threads:[~2026-04-28 18:06 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260428180611.500258-1-corey@minyard.net \
--to=corey@minyard.net \
--cc=kernel-team@cloudflare.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mfleming@cloudflare.com \
--cc=openipmi-developer@lists.sourceforge.net \
--cc=stable@vger.kernel.org \
--cc=tcamuso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox