* [PATCH 0/3] More fixes for BMC failure handling
@ 2026-02-13 6:52 Corey Minyard
2026-02-13 6:52 ` [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up Corey Minyard
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Corey Minyard @ 2026-02-13 6:52 UTC (permalink / raw)
To: Rafael J . Wysocki
Cc: Jaroslav Pulchart, Guenter Roeck, Igor Raits, linux-acpi,
linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica, Huisong Li
These are in addition to the previous patches for this.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up
2026-02-13 6:52 [PATCH 0/3] More fixes for BMC failure handling Corey Minyard
@ 2026-02-13 6:52 ` Corey Minyard
2026-02-13 12:50 ` Rafael J. Wysocki
2026-02-13 6:52 ` [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender Corey Minyard
2026-02-13 6:52 ` [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC Corey Minyard
2 siblings, 1 reply; 7+ messages in thread
From: Corey Minyard @ 2026-02-13 6:52 UTC (permalink / raw)
To: Rafael J . Wysocki
Cc: Jaroslav Pulchart, Guenter Roeck, Igor Raits, linux-acpi,
linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica, Huisong Li,
Corey Minyard
If the BMC is in a bad state, don't bother waiting for queues messages
since there can't be any. Otherwise the unload is blocked until the
BMC is back in a good state.
Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Signed-off-by: Corey Minyard <corey@minyard.net>
---
drivers/char/ipmi/ipmi_si_intf.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 0049e3792ba1..3667033fcc51 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2234,7 +2234,8 @@ static void wait_msg_processed(struct smi_info *smi_info)
unsigned long jiffies_now;
long time_diff;
- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
+ while (smi_info->si_state != SI_HOSED &&
+ (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) {
jiffies_now = jiffies;
time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
* SI_USEC_PER_JIFFY);
--
2.43.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender
2026-02-13 6:52 [PATCH 0/3] More fixes for BMC failure handling Corey Minyard
2026-02-13 6:52 ` [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up Corey Minyard
@ 2026-02-13 6:52 ` Corey Minyard
2026-02-13 12:55 ` Rafael J. Wysocki
2026-02-13 6:52 ` [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC Corey Minyard
2 siblings, 1 reply; 7+ messages in thread
From: Corey Minyard @ 2026-02-13 6:52 UTC (permalink / raw)
To: Rafael J . Wysocki
Cc: Jaroslav Pulchart, Guenter Roeck, Igor Raits, linux-acpi,
linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica, Huisong Li,
Corey Minyard
It used to be, until recently, that the sender operation on the low
level interfaces would not fail. That's not the case any more with
recent changes.
So check the return value from the sender operation, and propagate it
back up from there and handle the errors in all places.
Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Signed-off-by: Corey Minyard <corey@minyard.net>
---
drivers/char/ipmi/ipmi_msghandler.c | 97 +++++++++++++++++++----------
1 file changed, 65 insertions(+), 32 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index a042b1596933..c209486bfee7 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1887,19 +1887,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf,
return smi_msg;
}
-static void smi_send(struct ipmi_smi *intf,
+static int smi_send(struct ipmi_smi *intf,
const struct ipmi_smi_handlers *handlers,
struct ipmi_smi_msg *smi_msg, int priority)
{
int run_to_completion = READ_ONCE(intf->run_to_completion);
unsigned long flags = 0;
+ int rv = 0;
ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
smi_msg = smi_add_send_msg(intf, smi_msg, priority);
ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
- if (smi_msg)
- handlers->sender(intf->send_info, smi_msg);
+ if (smi_msg) {
+ rv = handlers->sender(intf->send_info, smi_msg);
+ if (rv) {
+ ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
+ intf->curr_msg = NULL;
+ ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
+ /*
+ * Something may have been added to the transmit
+ * queue, so schedule a check for that.
+ */
+ queue_work(system_wq, &intf->smi_work);
+ }
+ }
+ return rv;
}
static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg)
@@ -2312,6 +2325,7 @@ static int i_ipmi_request(struct ipmi_user *user,
struct ipmi_recv_msg *recv_msg;
int run_to_completion = READ_ONCE(intf->run_to_completion);
int rv = 0;
+ bool in_seq_table = false;
if (supplied_recv) {
recv_msg = supplied_recv;
@@ -2365,33 +2379,47 @@ static int i_ipmi_request(struct ipmi_user *user,
rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg,
source_address, source_lun,
retries, retry_time_ms);
+ in_seq_table = true;
} else if (is_ipmb_direct_addr(addr)) {
rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg,
recv_msg, source_lun);
} else if (is_lan_addr(addr)) {
rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg,
source_lun, retries, retry_time_ms);
+ in_seq_table = true;
} else {
- /* Unknown address type. */
+ /* Unknown address type. */
ipmi_inc_stat(intf, sent_invalid_commands);
rv = -EINVAL;
}
- if (rv) {
-out_err:
- if (!supplied_smi)
- ipmi_free_smi_msg(smi_msg);
- if (!supplied_recv)
- ipmi_free_recv_msg(recv_msg);
- } else {
+ if (!rv) {
dev_dbg(intf->si_dev, "Send: %*ph\n",
smi_msg->data_size, smi_msg->data);
- smi_send(intf, intf->handlers, smi_msg, priority);
+ rv = smi_send(intf, intf->handlers, smi_msg, priority);
+ if (rv && in_seq_table) {
+ /*
+ * If it's in the sequence table, it will be
+ * retried later, so ignore errors.
+ */
+ rv = 0;
+ /* But we need to fix the timeout. */
+ intf_start_seq_timer(intf, smi_msg->msgid);
+ ipmi_free_smi_msg(smi_msg);
+ smi_msg = NULL;
+ }
}
+out_err:
if (!run_to_completion)
mutex_unlock(&intf->users_mutex);
+ if (rv) {
+ if (!supplied_smi)
+ ipmi_free_smi_msg(smi_msg);
+ if (!supplied_recv)
+ ipmi_free_recv_msg(recv_msg);
+ }
return rv;
}
@@ -3965,12 +3993,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
msg->data_size, msg->data);
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr;
@@ -4044,12 +4072,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
msg->data_size = 5;
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr;
@@ -4189,7 +4217,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
struct ipmi_smi_msg *msg)
{
struct cmd_rcvr *rcvr;
- int rv = 0;
+ int rv = 0; /* Free by default */
unsigned char netfn;
unsigned char cmd;
unsigned char chan;
@@ -4242,12 +4270,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
msg->data_size, msg->data);
- smi_send(intf, intf->handlers, msg, 0);
- /*
- * We used the message, so return the value that
- * causes it to not be freed or queued.
- */
- rv = -1;
+ if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
+ /*
+ * We used the message, so return the value that
+ * causes it to not be freed or queued.
+ */
+ rv = -1;
} else if (!IS_ERR(recv_msg)) {
/* Extract the source address from the data. */
lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr;
@@ -5056,7 +5084,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
ipmi_inc_stat(intf,
retransmitted_ipmb_commands);
- smi_send(intf, intf->handlers, smi_msg, 0);
+ /* If this fails we'll retry later or timeout. */
+ if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) {
+ /* But fix the timeout. */
+ intf_start_seq_timer(intf, smi_msg->msgid);
+ ipmi_free_smi_msg(smi_msg);
+ }
} else
ipmi_free_smi_msg(smi_msg);
--
2.43.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC
2026-02-13 6:52 [PATCH 0/3] More fixes for BMC failure handling Corey Minyard
2026-02-13 6:52 ` [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up Corey Minyard
2026-02-13 6:52 ` [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender Corey Minyard
@ 2026-02-13 6:52 ` Corey Minyard
2026-02-13 12:58 ` Rafael J. Wysocki
2 siblings, 1 reply; 7+ messages in thread
From: Corey Minyard @ 2026-02-13 6:52 UTC (permalink / raw)
To: Rafael J . Wysocki
Cc: Jaroslav Pulchart, Guenter Roeck, Igor Raits, linux-acpi,
linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica, Huisong Li,
Corey Minyard
There is a race on checking the state in the sender, it needs to be
checked under a lock. But you also need a check to avoid issues with
a misbehaving BMC for run to completion mode. So leave the check at
the beginning for run to completion, and add a check under the lock
to avoid the race.
Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
Signed-off-by: Corey Minyard <corey@minyard.net>
---
drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 3667033fcc51..6eda61664aaa 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -924,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
{
struct smi_info *smi_info = send_info;
unsigned long flags;
+ int rv = IPMI_CC_NO_ERROR;
debug_timestamp(smi_info, "Enqueue");
+ /*
+ * Check here for run to completion mode. A check under lock is
+ * later.
+ */
if (smi_info->si_state == SI_HOSED)
return IPMI_BUS_ERR;
@@ -940,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
}
spin_lock_irqsave(&smi_info->si_lock, flags);
- /*
- * The following two lines don't need to be under the lock for
- * the lock's sake, but they do need SMP memory barriers to
- * avoid getting things out of order. We are already claiming
- * the lock, anyway, so just do it under the lock to avoid the
- * ordering problem.
- */
- BUG_ON(smi_info->waiting_msg);
- smi_info->waiting_msg = msg;
- check_start_timer_thread(smi_info);
+ if (smi_info->si_state == SI_HOSED) {
+ rv = IPMI_BUS_ERR;
+ } else {
+ BUG_ON(smi_info->waiting_msg);
+ smi_info->waiting_msg = msg;
+ check_start_timer_thread(smi_info);
+ }
spin_unlock_irqrestore(&smi_info->si_lock, flags);
- return IPMI_CC_NO_ERROR;
+ return rv;
}
static void set_run_to_completion(void *send_info, bool i_run_to_completion)
--
2.43.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up
2026-02-13 6:52 ` [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up Corey Minyard
@ 2026-02-13 12:50 ` Rafael J. Wysocki
0 siblings, 0 replies; 7+ messages in thread
From: Rafael J. Wysocki @ 2026-02-13 12:50 UTC (permalink / raw)
To: Corey Minyard
Cc: Rafael J . Wysocki, Jaroslav Pulchart, Guenter Roeck, Igor Raits,
linux-acpi, linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica,
Huisong Li
On Fri, Feb 13, 2026 at 7:55 AM Corey Minyard <corey@minyard.net> wrote:
>
> If the BMC is in a bad state, don't bother waiting for queues messages
> since there can't be any. Otherwise the unload is blocked until the
> BMC is back in a good state.
>
> Reported-by: Rafael J. Wysocki <rafael@kernel.org>
> Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
> Signed-off-by: Corey Minyard <corey@minyard.net>
I have missed this case, but indeed it needs to be addressed.
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
> ---
> drivers/char/ipmi/ipmi_si_intf.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
> index 0049e3792ba1..3667033fcc51 100644
> --- a/drivers/char/ipmi/ipmi_si_intf.c
> +++ b/drivers/char/ipmi/ipmi_si_intf.c
> @@ -2234,7 +2234,8 @@ static void wait_msg_processed(struct smi_info *smi_info)
> unsigned long jiffies_now;
> long time_diff;
>
> - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
> + while (smi_info->si_state != SI_HOSED &&
> + (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) {
> jiffies_now = jiffies;
> time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
> * SI_USEC_PER_JIFFY);
> --
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender
2026-02-13 6:52 ` [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender Corey Minyard
@ 2026-02-13 12:55 ` Rafael J. Wysocki
0 siblings, 0 replies; 7+ messages in thread
From: Rafael J. Wysocki @ 2026-02-13 12:55 UTC (permalink / raw)
To: Corey Minyard
Cc: Rafael J . Wysocki, Jaroslav Pulchart, Guenter Roeck, Igor Raits,
linux-acpi, linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica,
Huisong Li
On Fri, Feb 13, 2026 at 7:55 AM Corey Minyard <corey@minyard.net> wrote:
>
> It used to be, until recently, that the sender operation on the low
> level interfaces would not fail. That's not the case any more with
> recent changes.
>
> So check the return value from the sender operation, and propagate it
> back up from there and handle the errors in all places.
>
> Reported-by: Rafael J. Wysocki <rafael@kernel.org>
> Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
> Signed-off-by: Corey Minyard <corey@minyard.net>
The changes in smi_send() are what I would do there, but then I
wouldn't know how to take the return value properly into account in
some cases.
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
> ---
> drivers/char/ipmi/ipmi_msghandler.c | 97 +++++++++++++++++++----------
> 1 file changed, 65 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
> index a042b1596933..c209486bfee7 100644
> --- a/drivers/char/ipmi/ipmi_msghandler.c
> +++ b/drivers/char/ipmi/ipmi_msghandler.c
> @@ -1887,19 +1887,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf,
> return smi_msg;
> }
>
> -static void smi_send(struct ipmi_smi *intf,
> +static int smi_send(struct ipmi_smi *intf,
> const struct ipmi_smi_handlers *handlers,
> struct ipmi_smi_msg *smi_msg, int priority)
> {
> int run_to_completion = READ_ONCE(intf->run_to_completion);
> unsigned long flags = 0;
> + int rv = 0;
>
> ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
> smi_msg = smi_add_send_msg(intf, smi_msg, priority);
> ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
>
> - if (smi_msg)
> - handlers->sender(intf->send_info, smi_msg);
> + if (smi_msg) {
> + rv = handlers->sender(intf->send_info, smi_msg);
> + if (rv) {
> + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags);
> + intf->curr_msg = NULL;
> + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags);
> + /*
> + * Something may have been added to the transmit
> + * queue, so schedule a check for that.
> + */
> + queue_work(system_wq, &intf->smi_work);
> + }
> + }
> + return rv;
> }
>
> static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg)
> @@ -2312,6 +2325,7 @@ static int i_ipmi_request(struct ipmi_user *user,
> struct ipmi_recv_msg *recv_msg;
> int run_to_completion = READ_ONCE(intf->run_to_completion);
> int rv = 0;
> + bool in_seq_table = false;
>
> if (supplied_recv) {
> recv_msg = supplied_recv;
> @@ -2365,33 +2379,47 @@ static int i_ipmi_request(struct ipmi_user *user,
> rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg,
> source_address, source_lun,
> retries, retry_time_ms);
> + in_seq_table = true;
> } else if (is_ipmb_direct_addr(addr)) {
> rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg,
> recv_msg, source_lun);
> } else if (is_lan_addr(addr)) {
> rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg,
> source_lun, retries, retry_time_ms);
> + in_seq_table = true;
> } else {
> - /* Unknown address type. */
> + /* Unknown address type. */
> ipmi_inc_stat(intf, sent_invalid_commands);
> rv = -EINVAL;
> }
>
> - if (rv) {
> -out_err:
> - if (!supplied_smi)
> - ipmi_free_smi_msg(smi_msg);
> - if (!supplied_recv)
> - ipmi_free_recv_msg(recv_msg);
> - } else {
> + if (!rv) {
> dev_dbg(intf->si_dev, "Send: %*ph\n",
> smi_msg->data_size, smi_msg->data);
>
> - smi_send(intf, intf->handlers, smi_msg, priority);
> + rv = smi_send(intf, intf->handlers, smi_msg, priority);
> + if (rv && in_seq_table) {
> + /*
> + * If it's in the sequence table, it will be
> + * retried later, so ignore errors.
> + */
> + rv = 0;
> + /* But we need to fix the timeout. */
> + intf_start_seq_timer(intf, smi_msg->msgid);
> + ipmi_free_smi_msg(smi_msg);
> + smi_msg = NULL;
> + }
> }
> +out_err:
> if (!run_to_completion)
> mutex_unlock(&intf->users_mutex);
>
> + if (rv) {
> + if (!supplied_smi)
> + ipmi_free_smi_msg(smi_msg);
> + if (!supplied_recv)
> + ipmi_free_recv_msg(recv_msg);
> + }
> return rv;
> }
>
> @@ -3965,12 +3993,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
> dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
> msg->data_size, msg->data);
>
> - smi_send(intf, intf->handlers, msg, 0);
> - /*
> - * We used the message, so return the value that
> - * causes it to not be freed or queued.
> - */
> - rv = -1;
> + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
> + /*
> + * We used the message, so return the value that
> + * causes it to not be freed or queued.
> + */
> + rv = -1;
> } else if (!IS_ERR(recv_msg)) {
> /* Extract the source address from the data. */
> ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr;
> @@ -4044,12 +4072,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
> msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
> msg->data_size = 5;
>
> - smi_send(intf, intf->handlers, msg, 0);
> - /*
> - * We used the message, so return the value that
> - * causes it to not be freed or queued.
> - */
> - rv = -1;
> + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
> + /*
> + * We used the message, so return the value that
> + * causes it to not be freed or queued.
> + */
> + rv = -1;
> } else if (!IS_ERR(recv_msg)) {
> /* Extract the source address from the data. */
> daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr;
> @@ -4189,7 +4217,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
> struct ipmi_smi_msg *msg)
> {
> struct cmd_rcvr *rcvr;
> - int rv = 0;
> + int rv = 0; /* Free by default */
> unsigned char netfn;
> unsigned char cmd;
> unsigned char chan;
> @@ -4242,12 +4270,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
> dev_dbg(intf->si_dev, "Invalid command: %*ph\n",
> msg->data_size, msg->data);
>
> - smi_send(intf, intf->handlers, msg, 0);
> - /*
> - * We used the message, so return the value that
> - * causes it to not be freed or queued.
> - */
> - rv = -1;
> + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR)
> + /*
> + * We used the message, so return the value that
> + * causes it to not be freed or queued.
> + */
> + rv = -1;
> } else if (!IS_ERR(recv_msg)) {
> /* Extract the source address from the data. */
> lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr;
> @@ -5056,7 +5084,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
> ipmi_inc_stat(intf,
> retransmitted_ipmb_commands);
>
> - smi_send(intf, intf->handlers, smi_msg, 0);
> + /* If this fails we'll retry later or timeout. */
> + if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) {
> + /* But fix the timeout. */
> + intf_start_seq_timer(intf, smi_msg->msgid);
> + ipmi_free_smi_msg(smi_msg);
> + }
> } else
> ipmi_free_smi_msg(smi_msg);
>
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC
2026-02-13 6:52 ` [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC Corey Minyard
@ 2026-02-13 12:58 ` Rafael J. Wysocki
0 siblings, 0 replies; 7+ messages in thread
From: Rafael J. Wysocki @ 2026-02-13 12:58 UTC (permalink / raw)
To: Corey Minyard
Cc: Rafael J . Wysocki, Jaroslav Pulchart, Guenter Roeck, Igor Raits,
linux-acpi, linux-hwmon, Daniel Secik, Zdenek Pesek, Jiri Jurica,
Huisong Li
On Fri, Feb 13, 2026 at 7:55 AM Corey Minyard <corey@minyard.net> wrote:
>
> There is a race on checking the state in the sender, it needs to be
> checked under a lock. But you also need a check to avoid issues with
> a misbehaving BMC for run to completion mode. So leave the check at
> the beginning for run to completion, and add a check under the lock
> to avoid the race.
>
> Reported-by: Rafael J. Wysocki <rafael@kernel.org>
> Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional")
> Signed-off-by: Corey Minyard <corey@minyard.net>
That's what I would do in sender(), more or less, so
Reviewed-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
> ---
> drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++-----------
> 1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
> index 3667033fcc51..6eda61664aaa 100644
> --- a/drivers/char/ipmi/ipmi_si_intf.c
> +++ b/drivers/char/ipmi/ipmi_si_intf.c
> @@ -924,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
> {
> struct smi_info *smi_info = send_info;
> unsigned long flags;
> + int rv = IPMI_CC_NO_ERROR;
>
> debug_timestamp(smi_info, "Enqueue");
>
> + /*
> + * Check here for run to completion mode. A check under lock is
> + * later.
> + */
> if (smi_info->si_state == SI_HOSED)
> return IPMI_BUS_ERR;
>
> @@ -940,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg)
> }
>
> spin_lock_irqsave(&smi_info->si_lock, flags);
> - /*
> - * The following two lines don't need to be under the lock for
> - * the lock's sake, but they do need SMP memory barriers to
> - * avoid getting things out of order. We are already claiming
> - * the lock, anyway, so just do it under the lock to avoid the
> - * ordering problem.
> - */
> - BUG_ON(smi_info->waiting_msg);
> - smi_info->waiting_msg = msg;
> - check_start_timer_thread(smi_info);
> + if (smi_info->si_state == SI_HOSED) {
> + rv = IPMI_BUS_ERR;
> + } else {
> + BUG_ON(smi_info->waiting_msg);
> + smi_info->waiting_msg = msg;
> + check_start_timer_thread(smi_info);
> + }
> spin_unlock_irqrestore(&smi_info->si_lock, flags);
> - return IPMI_CC_NO_ERROR;
> + return rv;
> }
>
> static void set_run_to_completion(void *send_info, bool i_run_to_completion)
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2026-02-13 12:58 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-13 6:52 [PATCH 0/3] More fixes for BMC failure handling Corey Minyard
2026-02-13 6:52 ` [PATCH 1/3] ipmi:si: Don't block module unload if the BMC is messed up Corey Minyard
2026-02-13 12:50 ` Rafael J. Wysocki
2026-02-13 6:52 ` [PATCH 2/3] ipmi:msghandler: Handle error returns from the SMI sender Corey Minyard
2026-02-13 12:55 ` Rafael J. Wysocki
2026-02-13 6:52 ` [PATCH 3/3] ipmi:si: Fix check for a misbehaving BMC Corey Minyard
2026-02-13 12:58 ` Rafael J. Wysocki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox